recorder.dart 37.3 KB
Newer Older
1 2 3 4 5
// Copyright 2014 The Flutter Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

import 'dart:async';
6
import 'dart:html' as html;
7
import 'dart:js_util' as js_util;
8 9 10 11
import 'dart:math' as math;
import 'dart:ui';

import 'package:flutter/foundation.dart';
12
import 'package:flutter/gestures.dart';
13
import 'package:flutter/rendering.dart';
14 15
import 'package:flutter/scheduler.dart';
import 'package:flutter/services.dart';
16
import 'package:flutter/widgets.dart';
17
import 'package:meta/meta.dart';
18

19
/// The number of samples from warm-up iterations.
20
///
21 22
/// We warm-up the benchmark prior to measuring to allow JIT and caches to settle.
const int _kWarmUpSampleCount = 200;
23

24 25
/// The number of samples we use to collect statistics from.
const int _kMeasuredSampleCount = 100;
26

27 28
/// The total number of samples collected by a benchmark.
const int kTotalSampleCount = _kWarmUpSampleCount + _kMeasuredSampleCount;
29

30 31 32 33 34 35 36 37 38 39
/// A benchmark metric that includes frame-related computations prior to
/// submitting layer and picture operations to the underlying renderer, such as
/// HTML and CanvasKit. During this phase we compute transforms, clips, and
/// other information needed for rendering.
const String kProfilePrerollFrame = 'preroll_frame';

/// A benchmark metric that includes submitting layer and picture information
/// to the renderer.
const String kProfileApplyFrame = 'apply_frame';

40 41 42 43 44 45 46 47
/// Measures the amount of time [action] takes.
Duration timeAction(VoidCallback action) {
  final Stopwatch stopwatch = Stopwatch()..start();
  action();
  stopwatch.stop();
  return stopwatch.elapsed;
}

48 49 50
/// A function that performs asynchronous work.
typedef AsyncVoidCallback = Future<void> Function();

51 52 53 54 55
/// An [AsyncVoidCallback] that doesn't do anything.
///
/// This is used just so we don't have to deal with null all over the place.
Future<void> _dummyAsyncVoidCallback() async {}

56 57 58 59 60 61 62 63 64 65 66
/// Runs the benchmark using the given [recorder].
///
/// Notifies about "set up" and "tear down" events via the [setUpAllDidRun]
/// and [tearDownAllWillRun] callbacks.
@sealed
class Runner {
  /// Creates a runner for the [recorder].
  ///
  /// All arguments must not be null.
  Runner({
    @required this.recorder,
67 68
    this.setUpAllDidRun = _dummyAsyncVoidCallback,
    this.tearDownAllWillRun = _dummyAsyncVoidCallback,
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
  });

  /// The recorder that will run and record the benchmark.
  final Recorder recorder;

  /// Called immediately after [Recorder.setUpAll] future is resolved.
  ///
  /// This is useful, for example, to kick off a profiler or a tracer such that
  /// the "set up" computations are not included in the metrics.
  final AsyncVoidCallback setUpAllDidRun;

  /// Called just before calling [Recorder.tearDownAll].
  ///
  /// This is useful, for example, to stop a profiler or a tracer such that
  /// the "tear down" computations are not included in the metrics.
  final AsyncVoidCallback tearDownAllWillRun;

  /// Runs the benchmark and reports the results.
  Future<Profile> run() async {
    await recorder.setUpAll();
    await setUpAllDidRun();
    final Profile profile = await recorder.run();
    await tearDownAllWillRun();
    await recorder.tearDownAll();
    return profile;
  }
}

97 98 99 100
/// Base class for benchmark recorders.
///
/// Each benchmark recorder has a [name] and a [run] method at a minimum.
abstract class Recorder {
101 102 103 104 105
  Recorder._(this.name, this.isTracingEnabled);

  /// Whether this recorder requires tracing using Chrome's DevTools Protocol's
  /// "Tracing" API.
  final bool isTracingEnabled;
106 107 108 109 110 111 112

  /// The name of the benchmark.
  ///
  /// The results displayed in the Flutter Dashboard will use this name as a
  /// prefix.
  final String name;

113 114 115 116 117 118 119 120 121 122 123
  /// Returns the recorded profile.
  ///
  /// This value is only available while the benchmark is running.
  Profile get profile;

  /// Whether the benchmark should continue running.
  ///
  /// Returns `false` if the benchmark collected enough data and it's time to
  /// stop.
  bool shouldContinue() => profile.shouldContinue();

124 125 126 127 128 129
  /// Called once before all runs of this benchmark recorder.
  ///
  /// This is useful for doing one-time setup work that's needed for the
  /// benchmark.
  Future<void> setUpAll() async {}

130 131
  /// The implementation of the benchmark that will produce a [Profile].
  Future<Profile> run();
132 133 134 135 136 137

  /// Called once after all runs of this benchmark recorder.
  ///
  /// This is useful for doing one-time clean up work after the benchmark is
  /// complete.
  Future<void> tearDownAll() async {}
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
}

/// A recorder for benchmarking raw execution of Dart code.
///
/// This is useful for benchmarks that don't need frames or widgets.
///
/// Example:
///
/// ```
/// class BenchForLoop extends RawRecorder {
///   BenchForLoop() : super(name: benchmarkName);
///
///   static const String benchmarkName = 'for_loop';
///
///   @override
///   void body(Profile profile) {
///     profile.record('loop', () {
///       double x = 0;
///       for (int i = 0; i < 10000000; i++) {
///         x *= 1.5;
///       }
///     });
///   }
/// }
/// ```
abstract class RawRecorder extends Recorder {
164
  RawRecorder({@required String name}) : super._(name, false);
165 166 167 168 169 170

  /// The body of the benchmark.
  ///
  /// This is the part that records measurements of the benchmark.
  void body(Profile profile);

171 172 173 174
  @override
  Profile get profile => _profile;
  Profile _profile;

175 176 177
  @override
  @nonVirtual
  Future<Profile> run() async {
178
    _profile = Profile(name: name);
179 180
    do {
      await Future<void>.delayed(Duration.zero);
181 182 183
      body(_profile);
    } while (shouldContinue());
    return _profile;
184 185 186
  }
}

187 188 189 190 191 192 193 194
/// A recorder for benchmarking interactions with the engine without the
/// framework by directly exercising [SceneBuilder].
///
/// To implement a benchmark, extend this class and implement [onDrawFrame].
///
/// Example:
///
/// ```
195
/// class BenchDrawCircle extends SceneBuilderRecorder {
196 197 198 199 200
///   BenchDrawCircle() : super(name: benchmarkName);
///
///   static const String benchmarkName = 'draw_circle';
///
///   @override
201
///   void onDrawFrame(SceneBuilder sceneBuilder) {
202 203 204 205 206 207 208 209 210 211
///     final PictureRecorder pictureRecorder = PictureRecorder();
///     final Canvas canvas = Canvas(pictureRecorder);
///     final Paint paint = Paint()..color = const Color.fromARGB(255, 255, 0, 0);
///     final Size windowSize = window.physicalSize;
///     canvas.drawCircle(windowSize.center(Offset.zero), 50.0, paint);
///     final Picture picture = pictureRecorder.endRecording();
///     sceneBuilder.addPicture(picture);
///   }
/// }
/// ```
212
abstract class SceneBuilderRecorder extends Recorder {
213
  SceneBuilderRecorder({@required String name}) : super._(name, true);
214

215 216 217 218
  @override
  Profile get profile => _profile;
  Profile _profile;

219
  /// Called from [dart:ui.PlatformDispatcher.onBeginFrame].
220 221 222 223 224 225
  @mustCallSuper
  void onBeginFrame() {}

  /// Called on every frame.
  ///
  /// An implementation should exercise the [sceneBuilder] to build a frame.
226 227 228
  /// However, it must not call [SceneBuilder.build] or
  /// [dart:ui.FlutterView.render]. Instead the benchmark harness will call them
  /// and time them appropriately.
229 230 231 232 233
  void onDrawFrame(SceneBuilder sceneBuilder);

  @override
  Future<Profile> run() {
    final Completer<Profile> profileCompleter = Completer<Profile>();
234
    _profile = Profile(name: name);
235

236
    window.onBeginFrame = (_) {
237
      try {
238
        startMeasureFrame(profile);
239 240 241 242 243
        onBeginFrame();
      } catch (error, stackTrace) {
        profileCompleter.completeError(error, stackTrace);
        rethrow;
      }
244 245
    };
    window.onDrawFrame = () {
246
      try {
247
        _profile.record('drawFrameDuration', () {
248 249
          final SceneBuilder sceneBuilder = SceneBuilder();
          onDrawFrame(sceneBuilder);
250
          _profile.record('sceneBuildDuration', () {
251
            final Scene scene = sceneBuilder.build();
252
            _profile.record('windowRenderDuration', () {
253
              window.render(scene);
254 255 256
            }, reported: false);
          }, reported: false);
        }, reported: true);
257
        endMeasureFrame();
258

259
        if (shouldContinue()) {
260 261
          window.scheduleFrame();
        } else {
262
          profileCompleter.complete(_profile);
263 264 265 266
        }
      } catch (error, stackTrace) {
        profileCompleter.completeError(error, stackTrace);
        rethrow;
267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334
      }
    };
    window.scheduleFrame();
    return profileCompleter.future;
  }
}

/// A recorder for benchmarking interactions with the framework by creating
/// widgets.
///
/// To implement a benchmark, extend this class and implement [createWidget].
///
/// Example:
///
/// ```
/// class BenchListView extends WidgetRecorder {
///   BenchListView() : super(name: benchmarkName);
///
///   static const String benchmarkName = 'bench_list_view';
///
///   @override
///   Widget createWidget() {
///     return Directionality(
///       textDirection: TextDirection.ltr,
///       child: _TestListViewWidget(),
///     );
///   }
/// }
///
/// class _TestListViewWidget extends StatefulWidget {
///   @override
///   State<StatefulWidget> createState() {
///     return _TestListViewWidgetState();
///   }
/// }
///
/// class _TestListViewWidgetState extends State<_TestListViewWidget> {
///   ScrollController scrollController;
///
///   @override
///   void initState() {
///     super.initState();
///     scrollController = ScrollController();
///     Timer.run(() async {
///       bool forward = true;
///       while (true) {
///         await scrollController.animateTo(
///           forward ? 300 : 0,
///           curve: Curves.linear,
///           duration: const Duration(seconds: 1),
///         );
///         forward = !forward;
///       }
///     });
///   }
///
///   @override
///   Widget build(BuildContext context) {
///     return ListView.builder(
///       controller: scrollController,
///       itemCount: 10000,
///       itemBuilder: (BuildContext context, int index) {
///         return Text('Item #$index');
///       },
///     );
///   }
/// }
/// ```
335
abstract class WidgetRecorder extends Recorder implements FrameRecorder {
336 337 338 339
  WidgetRecorder({
    @required String name,
    this.useCustomWarmUp = false,
  }) : super._(name, true);
340 341 342 343 344

  /// Creates a widget to be benchmarked.
  ///
  /// The widget must create its own animation to drive the benchmark. The
  /// animation should continue indefinitely. The benchmark harness will stop
345
  /// pumping frames automatically.
346 347
  Widget createWidget();

348
  final List<VoidCallback> _didStopCallbacks = <VoidCallback>[];
349
  @override
350 351 352
  void registerDidStop(VoidCallback fn) {
    _didStopCallbacks.add(fn);
  }
353

354
  @override
355 356
  Profile profile;
  Completer<void> _runCompleter;
357

358 359 360
  /// Whether to delimit warm-up frames in a custom way.
  final bool useCustomWarmUp;

361 362
  Stopwatch _drawFrameStopwatch;

363
  @override
364 365
  @mustCallSuper
  void frameWillDraw() {
366
    startMeasureFrame(profile);
367 368 369
    _drawFrameStopwatch = Stopwatch()..start();
  }

370
  @override
371 372
  @mustCallSuper
  void frameDidDraw() {
373
    endMeasureFrame();
374
    profile.addDataPoint('drawFrameDuration', _drawFrameStopwatch.elapsed, reported: true);
375

376
    if (shouldContinue()) {
377 378
      window.scheduleFrame();
    } else {
379 380
      for (final VoidCallback fn in _didStopCallbacks)
        fn();
381
      _runCompleter.complete();
382 383 384
    }
  }

385 386
  @override
  void _onError(dynamic error, StackTrace stackTrace) {
387
    _runCompleter.completeError(error, stackTrace);
388 389
  }

390
  @override
391 392
  Future<Profile> run() async {
    _runCompleter = Completer<void>();
393
    final Profile localProfile = profile = Profile(name: name, useCustomWarmUp: useCustomWarmUp);
394 395 396
    final _RecordingWidgetsBinding binding =
        _RecordingWidgetsBinding.ensureInitialized();
    final Widget widget = createWidget();
397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412

    registerEngineBenchmarkValueListener(kProfilePrerollFrame, (num value) {
      localProfile.addDataPoint(
        kProfilePrerollFrame,
        Duration(microseconds: value.toInt()),
        reported: false,
      );
    });
    registerEngineBenchmarkValueListener(kProfileApplyFrame, (num value) {
      localProfile.addDataPoint(
        kProfileApplyFrame,
        Duration(microseconds: value.toInt()),
        reported: false,
      );
    });

413
    binding._beginRecording(this, widget);
414

415 416 417 418
    try {
      await _runCompleter.future;
      return localProfile;
    } finally {
419 420
      stopListeningToEngineBenchmarkValues(kProfilePrerollFrame);
      stopListeningToEngineBenchmarkValues(kProfileApplyFrame);
421
      _runCompleter = null;
422
      profile = null;
423
    }
424 425 426
  }
}

427 428 429 430 431 432 433
/// A recorder for measuring the performance of building a widget from scratch
/// starting from an empty frame.
///
/// The recorder will call [createWidget] and render it, then it will pump
/// another frame that clears the screen. It repeats this process, measuring the
/// performance of frames that render the widget and ignoring the frames that
/// clear the screen.
434
abstract class WidgetBuildRecorder extends Recorder implements FrameRecorder {
435
  WidgetBuildRecorder({@required String name}) : super._(name, true);
436 437 438 439 440 441 442 443

  /// Creates a widget to be benchmarked.
  ///
  /// The widget is not expected to animate as we only care about construction
  /// of the widget. If you are interested in benchmarking an animation,
  /// consider using [WidgetRecorder].
  Widget createWidget();

444
  final List<VoidCallback> _didStopCallbacks = <VoidCallback>[];
445
  @override
446 447 448
  void registerDidStop(VoidCallback fn) {
    _didStopCallbacks.add(fn);
  }
449

450
  @override
451 452
  Profile profile;
  Completer<void> _runCompleter;
453 454 455 456 457 458

  Stopwatch _drawFrameStopwatch;

  /// Whether in this frame we should call [createWidget] and render it.
  ///
  /// If false, then this frame will clear the screen.
459
  bool showWidget = true;
460 461 462 463 464

  /// The state that hosts the widget under test.
  _WidgetBuildRecorderHostState _hostState;

  Widget _getWidgetForFrame() {
465
    if (showWidget) {
466 467 468 469 470 471 472
      return createWidget();
    } else {
      return null;
    }
  }

  @override
473 474
  @mustCallSuper
  void frameWillDraw() {
475
    if (showWidget) {
476
      startMeasureFrame(profile);
477 478
      _drawFrameStopwatch = Stopwatch()..start();
    }
479 480 481
  }

  @override
482 483
  @mustCallSuper
  void frameDidDraw() {
484
    // Only record frames that show the widget.
485
    if (showWidget) {
486
      endMeasureFrame();
487
      profile.addDataPoint('drawFrameDuration', _drawFrameStopwatch.elapsed, reported: true);
488
    }
489

490
    if (shouldContinue()) {
491
      showWidget = !showWidget;
492 493
      _hostState._setStateTrampoline();
    } else {
494 495
      for (final VoidCallback fn in _didStopCallbacks)
        fn();
496
      _runCompleter.complete();
497 498 499
    }
  }

500 501
  @override
  void _onError(dynamic error, StackTrace stackTrace) {
502
    _runCompleter.completeError(error, stackTrace);
503 504
  }

505
  @override
506 507 508
  Future<Profile> run() async {
    _runCompleter = Completer<void>();
    final Profile localProfile = profile = Profile(name: name);
509 510 511
    final _RecordingWidgetsBinding binding =
        _RecordingWidgetsBinding.ensureInitialized();
    binding._beginRecording(this, _WidgetBuildRecorderHost(this));
512

513 514 515 516 517
    try {
      await _runCompleter.future;
      return localProfile;
    } finally {
      _runCompleter = null;
518
      profile = null;
519
    }
520 521 522 523 524 525 526 527 528 529
  }
}

/// Hosts widgets created by [WidgetBuildRecorder].
class _WidgetBuildRecorderHost extends StatefulWidget {
  const _WidgetBuildRecorderHost(this.recorder);

  final WidgetBuildRecorder recorder;

  @override
Ian Hickson's avatar
Ian Hickson committed
530
  State<StatefulWidget> createState() => _WidgetBuildRecorderHostState();
531 532 533
}

class _WidgetBuildRecorderHostState extends State<_WidgetBuildRecorderHost> {
Ian Hickson's avatar
Ian Hickson committed
534 535 536 537 538 539
  @override
  void initState() {
    super.initState();
    widget.recorder._hostState = this;
  }

540 541 542 543 544 545 546 547 548 549 550 551 552
  // This is just to bypass the @protected on setState.
  void _setStateTrampoline() {
    setState(() {});
  }

  @override
  Widget build(BuildContext context) {
    return SizedBox.expand(
      child: widget.recorder._getWidgetForFrame(),
    );
  }
}

553 554 555 556 557 558 559
/// Series of time recordings indexed in time order.
///
/// It can calculate [average], [standardDeviation] and [noise]. If the amount
/// of data collected is higher than [_kMeasuredSampleCount], then these
/// calculations will only apply to the latest [_kMeasuredSampleCount] data
/// points.
class Timeseries {
560 561
  Timeseries(this.name, this.isReported, {this.useCustomWarmUp = false})
      : _warmUpFrameCount = useCustomWarmUp ? 0 : null;
562

563
  /// The label of this timeseries used for debugging and result inspection.
564
  final String name;
565

566 567 568 569 570 571 572 573 574 575
  /// Whether this timeseries is reported to the benchmark dashboard.
  ///
  /// If `true` a new benchmark card is created for the timeseries and is
  /// visible on the dashboard.
  ///
  /// If `false` the data is stored but it does not show up on the dashboard.
  /// Use unreported metrics for metrics that are useful for manual inspection
  /// but that are too fine-grained to be useful for tracking on the dashboard.
  final bool isReported;

576 577 578 579 580 581 582 583 584 585 586 587
  /// Whether to delimit warm-up frames in a custom way.
  final bool useCustomWarmUp;

  /// The number of frames ignored as warm-up frames, used only
  /// when [useCustomWarmUp] is true.
  int _warmUpFrameCount;

  /// The number of frames ignored as warm-up frames.
  int get warmUpFrameCount => useCustomWarmUp
      ? _warmUpFrameCount
      : count - _kMeasuredSampleCount;

588 589 590
  /// List of all the values that have been recorded.
  ///
  /// This list has no limit.
591
  final List<double> _allValues = <double>[];
592

593 594 595
  /// The total amount of data collected, including ones that were dropped
  /// because of the sample size limit.
  int get count => _allValues.length;
596

597
  /// Extracts useful statistics out of this timeseries.
598
  ///
599 600
  /// See [TimeseriesStats] for more details.
  TimeseriesStats computeStats() {
601 602 603 604
    final int finalWarmUpFrameCount = warmUpFrameCount;

    assert(finalWarmUpFrameCount >= 0 && finalWarmUpFrameCount < count);

605
    // The first few values we simply discard and never look at. They're from the warm-up phase.
606
    final List<double> warmUpValues = _allValues.sublist(0, finalWarmUpFrameCount);
607 608

    // Values we analyze.
609
    final List<double> candidateValues = _allValues.sublist(finalWarmUpFrameCount);
610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630

    // The average that includes outliers.
    final double dirtyAverage = _computeAverage(name, candidateValues);

    // The standard deviation that includes outliers.
    final double dirtyStandardDeviation = _computeStandardDeviationForPopulation(name, candidateValues);

    // Any value that's higher than this is considered an outlier.
    final double outlierCutOff = dirtyAverage + dirtyStandardDeviation;

    // Candidates with outliers removed.
    final Iterable<double> cleanValues = candidateValues.where((double value) => value <= outlierCutOff);

    // Outlier candidates.
    final Iterable<double> outliers = candidateValues.where((double value) => value > outlierCutOff);

    // Final statistics.
    final double cleanAverage = _computeAverage(name, cleanValues);
    final double standardDeviation = _computeStandardDeviationForPopulation(name, cleanValues);
    final double noise = cleanAverage > 0.0 ? standardDeviation / cleanAverage : 0.0;

631 632 633 634 635 636 637
    // Compute outlier average. If there are no outliers the outlier average is
    // the same as clean value average. In other words, in a perfect benchmark
    // with no noise the difference between average and outlier average is zero,
    // which the best possible outcome. Noise produces a positive difference
    // between the two.
    final double outlierAverage = outliers.isNotEmpty ? _computeAverage(name, outliers) : cleanAverage;

638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664
    final List<AnnotatedSample> annotatedValues = <AnnotatedSample>[
      for (final double warmUpValue in warmUpValues)
        AnnotatedSample(
          magnitude: warmUpValue,
          isOutlier: warmUpValue > outlierCutOff,
          isWarmUpValue: true,
        ),
      for (final double candidate in candidateValues)
        AnnotatedSample(
          magnitude: candidate,
          isOutlier: candidate > outlierCutOff,
          isWarmUpValue: false,
        ),
    ];

    return TimeseriesStats(
      name: name,
      average: cleanAverage,
      outlierCutOff: outlierCutOff,
      outlierAverage: outlierAverage,
      standardDeviation: standardDeviation,
      noise: noise,
      cleanSampleCount: cleanValues.length,
      outlierSampleCount: outliers.length,
      samples: annotatedValues,
    );
  }
665

666
  /// Adds a value to this timeseries.
667
  void add(double value, {@required bool isWarmUpValue}) {
668 669 670 671 672
    if (value < 0.0) {
      throw StateError(
        'Timeseries $name: negative metric values are not supported. Got: $value',
      );
    }
673
    _allValues.add(value);
674 675 676
    if (useCustomWarmUp && isWarmUpValue) {
      _warmUpFrameCount += 1;
    }
677 678 679
  }
}

680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738
/// Various statistics about a [Timeseries].
///
/// See the docs on the individual fields for more details.
@sealed
class TimeseriesStats {
  const TimeseriesStats({
    @required this.name,
    @required this.average,
    @required this.outlierCutOff,
    @required this.outlierAverage,
    @required this.standardDeviation,
    @required this.noise,
    @required this.cleanSampleCount,
    @required this.outlierSampleCount,
    @required this.samples,
  });

  /// The label used to refer to the corresponding timeseries.
  final String name;

  /// The average value of the measured samples without outliers.
  final double average;

  /// The standard deviation in the measured samples without outliers.
  final double standardDeviation;

  /// The noise as a multiple of the [average] value takes from clean samples.
  ///
  /// This value can be multiplied by 100.0 to get noise as a percentage of
  /// the average.
  ///
  /// If [average] is zero, treats the result as perfect score, returns zero.
  final double noise;

  /// The maximum value a sample can have without being considered an outlier.
  ///
  /// See [Timeseries.computeStats] for details on how this value is computed.
  final double outlierCutOff;

  /// The average of outlier samples.
  ///
  /// This value can be used to judge how badly we jank, when we jank.
  ///
  /// Another useful metrics is the difference between [outlierAverage] and
  /// [average]. The smaller the value the more predictable is the performance
  /// of the corresponding benchmark.
  final double outlierAverage;

  /// The number of measured samples after outlier are removed.
  final int cleanSampleCount;

  /// The number of outliers.
  final int outlierSampleCount;

  /// All collected samples, annotated with statistical information.
  ///
  /// See [AnnotatedSample] for more details.
  final List<AnnotatedSample> samples;

739 740 741 742 743 744 745 746 747
  /// Outlier average divided by clean average.
  ///
  /// This is a measure of performance consistency. The higher this number the
  /// worse is jank when it happens. Smaller is better, with 1.0 being the
  /// perfect score. If [average] is zero, this value defaults to 1.0.
  double get outlierRatio => average > 0.0
    ? outlierAverage / average
    : 1.0; // this can only happen in perfect benchmark that reports only zeros

748 749 750 751
  @override
  String toString() {
    final StringBuffer buffer = StringBuffer();
    buffer.writeln(
752 753 754
      '$name: (samples: $cleanSampleCount clean/$outlierSampleCount '
      'outliers/${cleanSampleCount + outlierSampleCount} '
      'measured/${samples.length} total)');
755 756
    buffer.writeln(' | average: $average μs');
    buffer.writeln(' | outlier average: $outlierAverage μs');
757
    buffer.writeln(' | outlier/clean ratio: ${outlierRatio}x');
758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787
    buffer.writeln(' | noise: ${_ratioToPercent(noise)}');
    return buffer.toString();
  }
}

/// Annotates a single measurement with statistical information.
@sealed
class AnnotatedSample {
  const AnnotatedSample({
    @required this.magnitude,
    @required this.isOutlier,
    @required this.isWarmUpValue,
  });

  /// The non-negative raw result of the measurement.
  final double magnitude;

  /// Whether this sample was considered an outlier.
  final bool isOutlier;

  /// Whether this sample was taken during the warm-up phase.
  ///
  /// If this value is `true`, this sample does not participate in
  /// statistical computations. However, the sample would still be
  /// shown in the visualization of results so that the benchmark
  /// can be inspected manually to make sure there's a predictable
  /// warm-up regression slope.
  final bool isWarmUpValue;
}

788
/// Base class for a profile collected from running a benchmark.
789
class Profile {
790 791 792
  Profile({@required this.name, this.useCustomWarmUp = false})
      : assert(name != null),
        _isWarmingUp = useCustomWarmUp;
793 794 795 796

  /// The name of the benchmark that produced this profile.
  final String name;

797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819
  /// Whether to delimit warm-up frames in a custom way.
  final bool useCustomWarmUp;

  /// Whether we are measuring warm-up frames currently.
  bool get isWarmingUp => _isWarmingUp;

  bool _isWarmingUp;

  /// Stop the warm-up phase.
  ///
  /// Call this method only when [useCustomWarmUp] and [isWarmingUp] are both
  /// true.
  /// Call this method only once for each profile.
  void stopWarmingUp() {
    if (!useCustomWarmUp) {
      throw Exception('`stopWarmingUp` should be used only when `useCustomWarmUp` is true.');
    } else if (!_isWarmingUp) {
      throw Exception('Warm-up already stopped.');
    } else {
      _isWarmingUp = false;
    }
  }

820 821
  /// This data will be used to display cards in the Flutter Dashboard.
  final Map<String, Timeseries> scoreData = <String, Timeseries>{};
822

823 824
  /// This data isn't displayed anywhere. It's stored for completeness purposes.
  final Map<String, dynamic> extraData = <String, dynamic>{};
825

826
  /// Invokes [callback] and records the duration of its execution under [key].
827
  Duration record(String key, VoidCallback callback, { @required bool reported }) {
828
    final Duration duration = timeAction(callback);
829
    addDataPoint(key, duration, reported: reported);
830
    return duration;
831 832
  }

833 834 835 836 837 838 839
  /// Adds a timed sample to the timeseries corresponding to [key].
  ///
  /// Set [reported] to `true` to report the timeseries to the dashboard UI.
  ///
  /// Set [reported] to `false` to store the data, but not show it on the
  /// dashboard UI.
  void addDataPoint(String key, Duration duration, { @required bool reported }) {
840 841 842 843
    scoreData.putIfAbsent(
        key,
        () => Timeseries(key, reported, useCustomWarmUp: useCustomWarmUp),
    ).add(duration.inMicroseconds.toDouble(), isWarmUpValue: isWarmingUp);
844 845
  }

846 847 848 849 850 851 852 853 854
  /// Decides whether the data collected so far is sufficient to stop, or
  /// whether the benchmark should continue collecting more data.
  ///
  /// The signals used are sample size, noise, and duration.
  ///
  /// If any of the timeseries doesn't satisfy the noise requirements, this
  /// method will return true (asking the benchmark to continue collecting
  /// data).
  bool shouldContinue() {
855 856
    // If there are no `Timeseries` in the `scoreData`, then we haven't
    // recorded anything yet. Don't stop.
857 858 859
    if (scoreData.isEmpty) {
      return true;
    }
860

861 862 863
    // We have recorded something, but do we have enough samples? If every
    // timeseries has collected enough samples, stop the benchmark.
    return !scoreData.keys.every((String key) => scoreData[key].count >= kTotalSampleCount);
864 865 866 867
  }

  /// Returns a JSON representation of the profile that will be sent to the
  /// server.
868
  Map<String, dynamic> toJson() {
869 870 871 872
    final List<String> scoreKeys = <String>[];
    final Map<String, dynamic> json = <String, dynamic>{
      'name': name,
      'scoreKeys': scoreKeys,
873
    };
874 875 876

    for (final String key in scoreData.keys) {
      final Timeseries timeseries = scoreData[key];
877 878 879 880 881 882 883 884 885

      if (timeseries.isReported) {
        scoreKeys.add('$key.average');
        // Report `outlierRatio` rather than `outlierAverage`, because
        // the absolute value of outliers is less interesting than the
        // ratio.
        scoreKeys.add('$key.outlierRatio');
      }

886 887 888
      final TimeseriesStats stats = timeseries.computeStats();
      json['$key.average'] = stats.average;
      json['$key.outlierAverage'] = stats.outlierAverage;
889
      json['$key.outlierRatio'] = stats.outlierRatio;
890
      json['$key.noise'] = stats.noise;
891 892 893 894 895
    }

    json.addAll(extraData);

    return json;
896 897 898 899
  }

  @override
  String toString() {
900 901 902 903
    final StringBuffer buffer = StringBuffer();
    buffer.writeln('name: $name');
    for (final String key in scoreData.keys) {
      final Timeseries timeseries = scoreData[key];
904 905
      final TimeseriesStats stats = timeseries.computeStats();
      buffer.writeln(stats.toString());
906 907 908 909 910 911 912 913 914 915 916 917 918
    }
    for (final String key in extraData.keys) {
      final dynamic value = extraData[key];
      if (value is List) {
        buffer.writeln('$key:');
        for (final dynamic item in value) {
          buffer.writeln(' - $item');
        }
      } else {
        buffer.writeln('$key: $value');
      }
    }
    return buffer.toString();
919 920 921 922
  }
}

/// Computes the arithmetic mean (or average) of given [values].
923
double _computeAverage(String label, Iterable<double> values) {
924 925 926 927
  if (values.isEmpty) {
    throw StateError('$label: attempted to compute an average of an empty value list.');
  }

928
  final double sum = values.reduce((double a, double b) => a + b);
929 930 931 932 933 934 935 936 937 938
  return sum / values.length;
}

/// Computes population standard deviation.
///
/// Unlike sample standard deviation, which divides by N - 1, this divides by N.
///
/// See also:
///
/// * https://en.wikipedia.org/wiki/Standard_deviation
939
double _computeStandardDeviationForPopulation(String label, Iterable<double> population) {
940 941 942 943
  if (population.isEmpty) {
    throw StateError('$label: attempted to compute the standard deviation of empty population.');
  }
  final double mean = _computeAverage(label, population);
944 945
  final double sumOfSquaredDeltas = population.fold<double>(
    0.0,
946
    (double previous, double value) => previous += math.pow(value - mean, 2),
947 948 949 950
  );
  return math.sqrt(sumOfSquaredDeltas / population.length);
}

951 952 953 954
String _ratioToPercent(double value) {
  return '${(value * 100).toStringAsFixed(2)}%';
}

955 956
/// Implemented by recorders that use [_RecordingWidgetsBinding] to receive
/// frame life-cycle calls.
957
abstract class FrameRecorder {
958 959
  /// Add a callback that will be called by the recorder when it stops recording.
  void registerDidStop(VoidCallback cb);
960 961

  /// Called just before calling [SchedulerBinding.handleDrawFrame].
962
  void frameWillDraw();
963 964

  /// Called immediately after calling [SchedulerBinding.handleDrawFrame].
965
  void frameDidDraw();
966 967 968 969 970

  /// Reports an error.
  ///
  /// The implementation is expected to halt benchmark execution as soon as possible.
  void _onError(dynamic error, StackTrace stackTrace);
971 972
}

973 974 975 976 977 978 979 980 981 982 983 984
/// A variant of [WidgetsBinding] that collaborates with a [Recorder] to decide
/// when to stop pumping frames.
///
/// A normal [WidgetsBinding] typically always pumps frames whenever a widget
/// instructs it to do so by calling [scheduleFrame] (transitively via
/// `setState`). This binding will stop pumping new frames as soon as benchmark
/// parameters are satisfactory (e.g. when the metric noise levels become low
/// enough).
class _RecordingWidgetsBinding extends BindingBase
    with
        GestureBinding,
        SchedulerBinding,
985
        ServicesBinding,
986 987 988 989 990 991 992 993 994 995 996 997
        PaintingBinding,
        SemanticsBinding,
        RendererBinding,
        WidgetsBinding {
  /// Makes an instance of [_RecordingWidgetsBinding] the current binding.
  static _RecordingWidgetsBinding ensureInitialized() {
    if (WidgetsBinding.instance == null) {
      _RecordingWidgetsBinding();
    }
    return WidgetsBinding.instance as _RecordingWidgetsBinding;
  }

998
  FrameRecorder _recorder;
999
  bool _hasErrored = false;
1000

1001 1002 1003 1004 1005 1006 1007 1008 1009 1010
  /// To short-circuit all frame lifecycle methods when the benchmark has
  /// stopped collecting data.
  bool _benchmarkStopped = false;

  void _beginRecording(FrameRecorder recorder, Widget widget) {
    if (_recorder != null) {
      throw Exception(
        'Cannot call _RecordingWidgetsBinding._beginRecording more than once',
      );
    }
1011 1012
    final FlutterExceptionHandler originalOnError = FlutterError.onError;

1013
    recorder.registerDidStop(() {
1014
      _benchmarkStopped = true;
1015
    });
1016

1017 1018
    // Fail hard and fast on errors. Benchmarks should not have any errors.
    FlutterError.onError = (FlutterErrorDetails details) {
1019
      _haltBenchmarkWithError(details.exception, details.stack);
1020 1021
      originalOnError(details);
    };
1022
    _recorder = recorder;
1023 1024 1025
    runApp(widget);
  }

1026 1027 1028 1029
  void _haltBenchmarkWithError(dynamic error, StackTrace stackTrace) {
    if (_hasErrored) {
      return;
    }
1030
    _recorder._onError(error, stackTrace);
1031 1032 1033
    _hasErrored = true;
  }

1034 1035
  @override
  void handleBeginFrame(Duration rawTimeStamp) {
1036 1037
    // Don't keep on truckin' if there's an error or the benchmark has stopped.
    if (_hasErrored || _benchmarkStopped) {
1038 1039
      return;
    }
1040 1041 1042 1043 1044 1045
    try {
      super.handleBeginFrame(rawTimeStamp);
    } catch (error, stackTrace) {
      _haltBenchmarkWithError(error, stackTrace);
      rethrow;
    }
1046 1047 1048 1049
  }

  @override
  void scheduleFrame() {
1050 1051 1052
    // Don't keep on truckin' if there's an error or the benchmark has stopped.
    if (_hasErrored || _benchmarkStopped) {
      return;
1053
    }
1054
    super.scheduleFrame();
1055 1056 1057 1058
  }

  @override
  void handleDrawFrame() {
1059 1060
    // Don't keep on truckin' if there's an error or the benchmark has stopped.
    if (_hasErrored || _benchmarkStopped) {
1061 1062
      return;
    }
1063
    try {
1064
      _recorder.frameWillDraw();
1065
      super.handleDrawFrame();
1066
      _recorder.frameDidDraw();
1067 1068 1069 1070
    } catch (error, stackTrace) {
      _haltBenchmarkWithError(error, stackTrace);
      rethrow;
    }
1071 1072
  }
}
1073 1074 1075

int _currentFrameNumber = 1;

1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089
/// If [_calledStartMeasureFrame] is true, we have called [startMeasureFrame]
/// but have not its pairing [endMeasureFrame] yet.
///
/// This flag ensures that [startMeasureFrame] and [endMeasureFrame] are always
/// called in pairs, with [startMeasureFrame] followed by [endMeasureFrame].
bool _calledStartMeasureFrame = false;

/// Whether we are recording a measured frame.
///
/// This flag ensures that we always stop measuring a frame if we
/// have started one. Because we want to skip warm-up frames, this flag
/// is necessary.
bool _isMeasuringFrame = false;

1090 1091 1092 1093 1094 1095 1096
/// Adds a marker indication the beginning of frame rendering.
///
/// This adds an event to the performance trace used to find measured frames in
/// Chrome tracing data. The tracing data contains all frames, but some
/// benchmarks are only interested in a subset of frames. For example,
/// [WidgetBuildRecorder] only measures frames that build widgets, and ignores
/// frames that clear the screen.
1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112
///
/// Warm-up frames are not measured. If [profile.isWarmingUp] is true,
/// this function does nothing.
void startMeasureFrame(Profile profile) {
  if (_calledStartMeasureFrame) {
    throw Exception('`startMeasureFrame` called twice in a row.');
  }

  _calledStartMeasureFrame = true;

  if (!profile.isWarmingUp) {
    // Tell the browser to mark the beginning of the frame.
    html.window.performance.mark('measured_frame_start#$_currentFrameNumber');

    _isMeasuringFrame = true;
  }
1113 1114 1115 1116 1117 1118
}

/// Signals the end of a measured frame.
///
/// See [startMeasureFrame] for details on what this instrumentation is used
/// for.
1119 1120 1121 1122
///
/// Warm-up frames are not measured. If [profile.isWarmingUp] was true
/// when the corresponding [startMeasureFrame] was called,
/// this function does nothing.
1123
void endMeasureFrame() {
1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143
  if (!_calledStartMeasureFrame) {
    throw Exception('`startMeasureFrame` has not been called before calling `endMeasureFrame`');
  }

  _calledStartMeasureFrame = false;

  if (_isMeasuringFrame) {
    // Tell the browser to mark the end of the frame, and measure the duration.
    html.window.performance.mark('measured_frame_end#$_currentFrameNumber');
    html.window.performance.measure(
      'measured_frame',
      'measured_frame_start#$_currentFrameNumber',
      'measured_frame_end#$_currentFrameNumber',
    );

    // Increment the current frame number.
    _currentFrameNumber += 1;

    _isMeasuringFrame = false;
  }
1144
}
1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196

/// A function that receives a benchmark value from the framework.
typedef EngineBenchmarkValueListener = void Function(num value);

// Maps from a value label name to a listener.
final Map<String, EngineBenchmarkValueListener> _engineBenchmarkListeners = <String, EngineBenchmarkValueListener>{};

/// Registers a [listener] for engine benchmark values labeled by [name].
///
/// If another listener is already registered, overrides it.
void registerEngineBenchmarkValueListener(String name, EngineBenchmarkValueListener listener) {
  if (listener == null) {
    throw ArgumentError(
      'Listener must not be null. To stop listening to engine benchmark values '
      'under label "$name", call stopListeningToEngineBenchmarkValues(\'$name\').',
    );
  }

  if (_engineBenchmarkListeners.containsKey(name)) {
    throw StateError(
      'A listener for "$name" is already registered.\n'
      'Call `stopListeningToEngineBenchmarkValues` to unregister the previous '
      'listener before registering a new one.'
    );
  }

  if (_engineBenchmarkListeners.isEmpty) {
    // The first listener is being registered. Register the global listener.
    js_util.setProperty(html.window, '_flutter_internal_on_benchmark', _dispatchEngineBenchmarkValue);
  }

  _engineBenchmarkListeners[name] = listener;
}

/// Stops listening to engine benchmark values under labeled by [name].
void stopListeningToEngineBenchmarkValues(String name) {
  _engineBenchmarkListeners.remove(name);
  if (_engineBenchmarkListeners.isEmpty) {
    // The last listener unregistered. Remove the global listener.
    js_util.setProperty(html.window, '_flutter_internal_on_benchmark', null);
  }
}

// Dispatches a benchmark value reported by the engine to the relevant listener.
//
// If there are no listeners registered for [name], ignores the value.
void _dispatchEngineBenchmarkValue(String name, double value) {
  final EngineBenchmarkValueListener listener = _engineBenchmarkListeners[name];
  if (listener != null) {
    listener(value);
  }
}