[web] add image decoder benchmark (#93174)

4dd56df4 · Yegor · GitHub · 0125b032 · 4dd56df4 · 4dd56df4
Unverified Commit 4dd56df4 authored Nov 08, 2021 by Yegor Committed by GitHub Nov 08, 2021
4 changed files
--- a/dev/benchmarks/macrobenchmarks/lib/src/web/bench_dynamic_clip_on_static_picture.dart
+++ b/dev/benchmarks/macrobenchmarks/lib/src/web/bench_dynamic_clip_on_static_picture.dart
@@ -36,7 +36,7 @@ class BenchDynamicClipOnStaticPicture extends SceneBuilderRecorder {
    // If the scrollable extent is too small, the benchmark may end up
    // scrolling the picture out of the clip area entirely, resulting in
    // bogus metric values.
-    const double maxScrollExtent = kTotalSampleCount * kScrollDelta;
+    const double maxScrollExtent = kDefaultTotalSampleCount * kScrollDelta;
    const double pictureHeight = kRows * kRowHeight;
    if (maxScrollExtent > pictureHeight) {
      throw Exception(

--- a/dev/benchmarks/macrobenchmarks/lib/src/web/bench_image_decoding.dart
+++ b/dev/benchmarks/macrobenchmarks/lib/src/web/bench_image_decoding.dart
+// Copyright 2014 The Flutter Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+import 'dart:html' as html;
+import 'dart:typed_data';
+import 'dart:ui' as ui;
+
+import 'recorder.dart';
+
+/// Measures the performance of image decoding.
+///
+/// The benchmark measures the decoding latency and not impact on jank. It
+/// cannot distinguish between blocking and non-blocking decoding. It simply
+/// measures the total time it takes to decode image frames. For example, the
+/// WASM codecs execute on the main thread and block the UI, leading to jank,
+/// but the browser's WebCodecs API is asynchronous running on a separate thread
+/// and does not jank. However, the benchmark result may be the same.
+///
+/// This benchmark does not support the HTML renderer because the HTML renderer
+/// cannot decode image frames (it always returns 1 dummy frame, even for
+/// animated images).
+class BenchImageDecoding extends RawRecorder {
+  BenchImageDecoding() : super(
+    name: benchmarkName,
+    useCustomWarmUp: true,
+  );
+
+  static const String benchmarkName = 'bench_image_decoding';
+
+  // These test images are taken from https://github.com/flutter/flutter_gallery_assets/tree/master/lib/splash_effects
+  static const List<String> _imageUrls = <String>[
+    'assets/packages/flutter_gallery_assets/splash_effects/splash_effect_1.gif',
+    'assets/packages/flutter_gallery_assets/splash_effects/splash_effect_2.gif',
+    'assets/packages/flutter_gallery_assets/splash_effects/splash_effect_3.gif',
+  ];
+
+  final List<Uint8List> _imageData = <Uint8List>[];
+
+  @override
+  Future<void> setUpAll() async {
+    if (_imageData.isNotEmpty) {
+      return;
+    }
+    for (final String imageUrl in _imageUrls) {
+      final html.Body image = await html.window.fetch(imageUrl) as html.Body;
+      _imageData.add((await image.arrayBuffer() as ByteBuffer).asUint8List());
+    }
+  }
+
+  // The number of samples recorded so far.
+  int _sampleCount = 0;
+
+  // The number of samples used for warm-up.
+  static const int _warmUpSampleCount = 5;
+
+  // The number of samples used to measure performance after the warm-up.
+  static const int _measuredSampleCount = 20;
+
+  @override
+  Future<void> body(Profile profile) async {
+    await profile.recordAsync('recordImageDecode', () async {
+      final List<Future<void>> allDecodes = <Future<void>>[
+        for (final Uint8List data in _imageData)
+          _decodeImage(data),
+      ];
+      await Future.wait(allDecodes);
+    }, reported: true);
+
+    _sampleCount += 1;
+    if (_sampleCount == _warmUpSampleCount) {
+      profile.stopWarmingUp();
+    }
+    if (_sampleCount >= _warmUpSampleCount + _measuredSampleCount) {
+      profile.stopBenchmark();
+    }
+  }
+}
+
+Future<void> _decodeImage(Uint8List data) async {
+  final ui.Codec codec = await ui.instantiateImageCodec(data);
+  const int decodeFrameCount = 5;
+  if (codec.frameCount < decodeFrameCount) {
+    throw Exception(
+      'Test image contains too few frames for this benchmark (${codec.frameCount}). '
+      'Choose a test image with at least $decodeFrameCount frames.'
+    );
+  }
+  for (int i = 0; i < decodeFrameCount; i++) {
+    (await codec.getNextFrame()).image.dispose();
+  }
+  codec.dispose();
+}
--- a/dev/benchmarks/macrobenchmarks/lib/src/web/recorder.dart
+++ b/dev/benchmarks/macrobenchmarks/lib/src/web/recorder.dart
@@ -16,16 +16,22 @@ import 'package:flutter/services.dart';
 import 'package:flutter/widgets.dart';
 import 'package:meta/meta.dart';

-/// The number of samples from warm-up iterations.
+/// The default number of samples from warm-up iterations.
 ///
-/// We warm-up the benchmark prior to measuring to allow JIT and caches to settle.
-const int _kWarmUpSampleCount = 200;
+/// This value is used when [Profile.useCustomWarmUp] is set to false.
+///
+/// The benchmark is warmed up prior to measuring to allow JIT and caches to settle.
+const int _kDefaultWarmUpSampleCount = 200;

-/// The number of samples we use to collect statistics from.
-const int _kMeasuredSampleCount = 100;
+/// The default number of samples collected to compute benchmark statistics.
+///
+/// This value is used when [Profile.useCustomWarmUp] is set to false.
+const int _kDefaultMeasuredSampleCount = 100;

-/// The total number of samples collected by a benchmark.
-const int kTotalSampleCount = _kWarmUpSampleCount + _kMeasuredSampleCount;
+/// The default total number of samples collected by a benchmark.
+///
+/// This value is used when [Profile.useCustomWarmUp] is set to false.
+const int kDefaultTotalSampleCount = _kDefaultWarmUpSampleCount + _kDefaultMeasuredSampleCount;

 /// A benchmark metric that includes frame-related computations prior to
 /// submitting layer and picture operations to the underlying renderer, such as
@@ -38,6 +44,10 @@ const String kProfilePrerollFrame = 'preroll_frame';
 const String kProfileApplyFrame = 'apply_frame';

 /// Measures the amount of time [action] takes.
+///
+/// See also:
+///
+///  * [timeAsyncAction], which measures the time of asynchronous work.
 Duration timeAction(VoidCallback action) {
  final Stopwatch stopwatch = Stopwatch()..start();
  action();
@@ -45,6 +55,18 @@ Duration timeAction(VoidCallback action) {
  return stopwatch.elapsed;
 }

+/// Measures the amount of time the future returned by [action] takes to complete.
+///
+/// See also:
+///
+///  * [timeAction], which measures the time of synchronous work.
+Future<Duration> timeAsyncAction(AsyncCallback action) async {
+  final Stopwatch stopwatch = Stopwatch()..start();
+  await action();
+  stopwatch.stop();
+  return stopwatch.elapsed;
+}
+
 /// A function that performs asynchronous work.
 typedef AsyncVoidCallback = Future<void> Function();

@@ -161,12 +183,16 @@ abstract class Recorder {
 /// }
 /// ```
 abstract class RawRecorder extends Recorder {
-  RawRecorder({required String name}) : super._(name, false);
+  RawRecorder({required String name, bool useCustomWarmUp = false})
+    : _useCustomWarmUp = useCustomWarmUp, super._(name, false);
+
+  /// Whether to delimit warm-up frames in a custom way.
+  final bool _useCustomWarmUp;

  /// The body of the benchmark.
  ///
  /// This is the part that records measurements of the benchmark.
-  void body(Profile profile);
+  FutureOr<void> body(Profile profile);

  @override
  Profile? get profile => _profile;
@@ -175,10 +201,13 @@ abstract class RawRecorder extends Recorder {
  @override
  @nonVirtual
  Future<Profile> run() async {
-    _profile = Profile(name: name);
+    _profile = Profile(name: name, useCustomWarmUp: _useCustomWarmUp);
    do {
      await Future<void>.delayed(Duration.zero);
-      body(_profile!);
+      final FutureOr<void> result = body(_profile!);
+      if (result is Future) {
+        await result;
+      }
    } while (shouldContinue());
    return _profile!;
  }
@@ -552,13 +581,15 @@ class _WidgetBuildRecorderHostState extends State<_WidgetBuildRecorderHost> {

 /// Series of time recordings indexed in time order.
 ///
-/// It can calculate [average], [standardDeviation] and [noise]. If the amount
-/// of data collected is higher than [_kMeasuredSampleCount], then these
-/// calculations will only apply to the latest [_kMeasuredSampleCount] data
-/// points.
+/// A timeseries is expected to contain at least one warm-up frame added by
+/// calling [add] with `isWarmUpValue` set to true, followed by at least one
+/// measured value added by calling [add] with `isWarmUpValue` set to false.
 class Timeseries {
-  Timeseries(this.name, this.isReported, {this.useCustomWarmUp = false})
-      : _warmUpFrameCount = useCustomWarmUp ? 0 : null;
+  /// Creates an empty timeseries.
+  ///
+  /// The [name] is a unique name of this timeseries. If [isReported] is true
+  /// this timeseries is reported to the benchmark dashboard.
+  Timeseries(this.name, this.isReported);

  /// The label of this timeseries used for debugging and result inspection.
  final String name;
@@ -573,17 +604,8 @@ class Timeseries {
  /// but that are too fine-grained to be useful for tracking on the dashboard.
  final bool isReported;

-  /// Whether to delimit warm-up frames in a custom way.
-  final bool useCustomWarmUp;
-
-  /// The number of frames ignored as warm-up frames, used only
-  /// when [useCustomWarmUp] is true.
-  int? _warmUpFrameCount;
-
-  /// The number of frames ignored as warm-up frames.
-  int get warmUpFrameCount => useCustomWarmUp
-      ? _warmUpFrameCount!
-      : count - _kMeasuredSampleCount;
+  /// The number of samples ignored as warm-up frames.
+  int _warmUpSampleCount = 0;

  /// List of all the values that have been recorded.
  ///
@@ -598,15 +620,26 @@ class Timeseries {
  ///
  /// See [TimeseriesStats] for more details.
  TimeseriesStats computeStats() {
-    final int finalWarmUpFrameCount = warmUpFrameCount;
-
-    assert(finalWarmUpFrameCount >= 0 && finalWarmUpFrameCount < count);
+    // Assertions do not use the `assert` keyword because benchmarks run in
+    // profile mode, where asserts are tree-shaken out.
+    if (_warmUpSampleCount == 0) {
+      throw StateError(
+        'The benchmark did not warm-up. Use at least one sample to warm-up '
+        'the benchmark to reduce noise.');
+    }
+    if (_warmUpSampleCount >= count) {
+      throw StateError(
+        'The benchmark did not report any measured samples. Add at least one '
+        'sample after warm-up is done. There were $_warmUpSampleCount warm-up '
+        'samples, and no measured samples in this timeseries.'
+      );
+    }

    // The first few values we simply discard and never look at. They're from the warm-up phase.
-    final List<double> warmUpValues = _allValues.sublist(0, finalWarmUpFrameCount);
+    final List<double> warmUpValues = _allValues.sublist(0, _warmUpSampleCount);

    // Values we analyze.
-    final List<double> candidateValues = _allValues.sublist(finalWarmUpFrameCount);
+    final List<double> candidateValues = _allValues.sublist(_warmUpSampleCount);

    // The average that includes outliers.
    final double dirtyAverage = _computeAverage(name, candidateValues);
@@ -663,6 +696,9 @@ class Timeseries {
    );
  }

+  // Whether the timeseries is in the warm-up phase.
+  bool _isWarmingUp = true;
+
  /// Adds a value to this timeseries.
  void add(double value, {required bool isWarmUpValue}) {
    if (value < 0.0) {
@@ -670,10 +706,17 @@ class Timeseries {
        'Timeseries $name: negative metric values are not supported. Got: $value',
      );
    }
-    _allValues.add(value);
-    if (useCustomWarmUp && isWarmUpValue) {
-      _warmUpFrameCount = (_warmUpFrameCount ?? 0) + 1;
+    if (isWarmUpValue) {
+      if (!_isWarmingUp) {
+        throw StateError(
+          'A warm-up value was added to the timeseries after the warm-up phase finished.'
+        );
+      }
+      _warmUpSampleCount += 1;
+    } else if (_isWarmingUp) {
+      _isWarmingUp = false;
    }
+    _allValues.add(value);
  }
 }

@@ -787,9 +830,17 @@ class AnnotatedSample {

 /// Base class for a profile collected from running a benchmark.
 class Profile {
+  /// Creates an empty profile that can be populated with benchmark samples
+  /// using [record], [recordAsync], and [addDataPoint] methods.
+  ///
+  /// The [name] is the unique name of this profile that distinguishes is from
+  /// other profiles. Typically, the name will describe the benchmark.
+  ///
+  /// If [useCustomWarmUp] is true the benchmark will continue running until
+  /// [stopBenchmark] is called. Otherwise, the benchmark collects the
+  /// [kDefaultTotalSampleCount] samples and stops automatically.
  Profile({required this.name, this.useCustomWarmUp = false})
-      : assert(name != null),
-        _isWarmingUp = useCustomWarmUp;
+      : assert(name != null);

  /// The name of the benchmark that produced this profile.
  final String name;
@@ -797,26 +848,48 @@ class Profile {
  /// Whether to delimit warm-up frames in a custom way.
  final bool useCustomWarmUp;

-  /// Whether we are measuring warm-up frames currently.
+  /// True if the benchmark is currently measuring warm-up frames.
  bool get isWarmingUp => _isWarmingUp;
+  bool _isWarmingUp = true;

-  bool _isWarmingUp;
+  /// True if the benchmark is currently running.
+  bool get isRunning => _isRunning;
+  bool _isRunning = true;

-  /// Stop the warm-up phase.
+  /// Stops the warm-up phase.
  ///
-  /// Call this method only when [useCustomWarmUp] and [isWarmingUp] are both
-  /// true.
-  /// Call this method only once for each profile.
+  /// After calling this method, subsequent calls to [record], [recordAsync],
+  /// and [addDataPoint] will record measured data samples.
+  ///
+  /// Call this method only once for each profile and only when [isWarmingUp]
+  /// is true.
  void stopWarmingUp() {
-    if (!useCustomWarmUp) {
-      throw Exception('`stopWarmingUp` should be used only when `useCustomWarmUp` is true.');
-    } else if (!_isWarmingUp) {
-      throw Exception('Warm-up already stopped.');
+    if (!_isWarmingUp) {
+      throw StateError('Warm-up already stopped.');
    } else {
      _isWarmingUp = false;
    }
  }

+  /// Stops the benchmark.
+  ///
+  /// Call this method only once for each profile and only when [isWarmingUp]
+  /// is false (i.e. after calling [stopWarmingUp]).
+  void stopBenchmark() {
+    if (_isWarmingUp) {
+      throw StateError(
+        'Warm-up has not finished yet. Benchmark should only be stopped after '
+        'it recorded at least one sample after the warm-up.'
+      );
+    } else if (scoreData.isEmpty) {
+      throw StateError(
+        'The benchmark did not collect any data.'
+      );
+    } else {
+      _isRunning = false;
+    }
+  }
+
  /// This data will be used to display cards in the Flutter Dashboard.
  final Map<String, Timeseries> scoreData = <String, Timeseries>{};

@@ -824,12 +897,27 @@ class Profile {
  final Map<String, dynamic> extraData = <String, dynamic>{};

  /// Invokes [callback] and records the duration of its execution under [key].
+  ///
+  /// See also:
+  ///
+  ///  * [recordAsync], which records asynchronous work.
  Duration record(String key, VoidCallback callback, { required bool reported }) {
    final Duration duration = timeAction(callback);
    addDataPoint(key, duration, reported: reported);
    return duration;
  }

+  /// Invokes [callback] and records the amount of time the returned future takes.
+  ///
+  /// See also:
+  ///
+  ///  * [record], which records synchronous work.
+  Future<Duration> recordAsync(String key, AsyncCallback callback, { required bool reported }) async {
+    final Duration duration = await timeAsyncAction(callback);
+    addDataPoint(key, duration, reported: reported);
+    return duration;
+  }
+
  /// Adds a timed sample to the timeseries corresponding to [key].
  ///
  /// Set [reported] to `true` to report the timeseries to the dashboard UI.
@@ -839,8 +927,43 @@ class Profile {
  void addDataPoint(String key, Duration duration, { required bool reported }) {
    scoreData.putIfAbsent(
        key,
-        () => Timeseries(key, reported, useCustomWarmUp: useCustomWarmUp),
+        () => Timeseries(key, reported),
    ).add(duration.inMicroseconds.toDouble(), isWarmUpValue: isWarmingUp);
+
+    if (!useCustomWarmUp) {
+      // The stopWarmingUp and stopBenchmark will not be called. Use the
+      // auto-stopping logic.
+      _autoUpdateBenchmarkPhase();
+    }
+  }
+
+  /// Checks the samples collected so far and sets the appropriate benchmark phase.
+  ///
+  /// If enough warm-up samples have been collected, stops the warm-up phase and
+  /// begins the measuring phase.
+  ///
+  /// If enough total samples have been collected, stops the benchmark.
+  void _autoUpdateBenchmarkPhase() {
+    if (useCustomWarmUp) {
+      StateError(
+        'Must not call _autoUpdateBenchmarkPhase if custom warm-up is used. '
+        'Call `stopWarmingUp` and `stopBenchmark` instead.'
+      );
+    }
+
+    if (_isWarmingUp) {
+      final bool doesHaveEnoughWarmUpSamples = scoreData.keys
+        .every((String key) => scoreData[key]!.count >= _kDefaultWarmUpSampleCount);
+      if (doesHaveEnoughWarmUpSamples) {
+        stopWarmingUp();
+      }
+    } else if (_isRunning) {
+      final bool doesHaveEnoughTotalSamples = scoreData.keys
+        .every((String key) => scoreData[key]!.count >= kDefaultTotalSampleCount);
+      if (doesHaveEnoughTotalSamples) {
+        stopBenchmark();
+      }
+    }
  }

  /// Decides whether the data collected so far is sufficient to stop, or
@@ -858,9 +981,7 @@ class Profile {
      return true;
    }

-    // We have recorded something, but do we have enough samples? If every
-    // timeseries has collected enough samples, stop the benchmark.
-    return !scoreData.keys.every((String key) => scoreData[key]!.count >= kTotalSampleCount);
+    return isRunning;
  }

  /// Returns a JSON representation of the profile that will be sent to the

--- a/dev/benchmarks/macrobenchmarks/lib/web_benchmarks.dart
+++ b/dev/benchmarks/macrobenchmarks/lib/web_benchmarks.dart
@@ -15,6 +15,7 @@ import 'src/web/bench_clipped_out_pictures.dart';
 import 'src/web/bench_default_target_platform.dart';
 import 'src/web/bench_draw_rect.dart';
 import 'src/web/bench_dynamic_clip_on_static_picture.dart';
+import 'src/web/bench_image_decoding.dart';
 import 'src/web/bench_mouse_region_grid_hover.dart';
 import 'src/web/bench_mouse_region_grid_scroll.dart';
 import 'src/web/bench_mouse_region_mixed_grid_hover.dart';
@@ -62,6 +63,11 @@ final Map<String, RecorderFactory> benchmarks = <String, RecorderFactory>{
    BenchTextLayout.canvasKitBenchmarkName: () => BenchTextLayout.canvasKit(),
    BenchBuildColorsGrid.canvasKitBenchmarkName: () => BenchBuildColorsGrid.canvasKit(),
    BenchTextCachedLayout.canvasKitBenchmarkName: () => BenchTextCachedLayout.canvasKit(),
+
+    // The HTML renderer does not decode frame-by-frame. It just drops an <img>
+    // element and lets it animate automatically with no feedback to the
+    // framework. So this benchmark only makes sense in CanvasKit.
+    BenchImageDecoding.benchmarkName: () => BenchImageDecoding(),
  },

  // HTML-only benchmarks