Unverified Commit 056e455e authored by Ming Lyu (CareF)'s avatar Ming Lyu (CareF) Committed by GitHub

Add a E2E based performance test case (#61509)

parent 8e58c51d
......@@ -33,7 +33,7 @@ More detailed logs should be in `build/cubic_bezier_perf.timeline.json`.
To run the backdrop filter benchmark on a device:
To run a mobile benchmark on a device:
```
```bash
flutter drive --profile -t test_driver/run_app.dart --driver test_driver/[test_name]_test.dart
```
......@@ -55,6 +55,21 @@ The key `[test_name]` can be:
- `textfield_perf`
- `cubic_bezier_perf`
### E2E benchmarks
(On-going work)
[E2E](https://pub.dev/packages/e2e)-based tests are driven independent of the
host machine. The following tests are E2E:
- `cull_opacity_perf.dart`
These tests should be run by:
```bash
flutter drive --profile -t test/[test_name]_e2e.dart --driver test_driver/e2e_test.dart
```
## Web benchmarks
Web benchmarks are compiled from the same entry point in `lib/web_benchmarks.dart`.
......@@ -121,3 +136,10 @@ cd dev/devicelab
# Runs using the CanvasKit renderer
../../bin/cache/dart-sdk/bin/dart bin/run.dart -t bin/tasks/web_benchmarks_canvaskit.dart
```
## Frame policy test
File `test/frame_policy.dart` and its driving script `test_driver/frame_policy_test.dart`
are used for testing [`fullyLive`](https://api.flutter.dev/flutter/flutter_test/LiveTestWidgetsFlutterBindingFramePolicy-class.html)
and [`benchmarkLive`](https://api.flutter.dev/flutter/flutter_test/LiveTestWidgetsFlutterBindingFramePolicy-class.html)
policies in terms of its effect on [`WidgetTester.handlePointerEventRecord`](https://master-api.flutter.dev/flutter/flutter_test/WidgetTester/handlePointerEventRecord.html).
// Copyright 2014 The Flutter Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// The test should be run as:
// flutter drive -t test/cull_opacity_perf_e2e.dart --driver test_driver/e2e_test.dart --trace-startup --profile
import 'package:macrobenchmarks/common.dart';
import 'util.dart';
Future<void> main() async {
macroPerfTestE2E(
'cull_opacity_perf',
kCullOpacityRouteName,
pageDelay: const Duration(seconds: 1),
duration: const Duration(seconds: 10),
timeout: const Duration(seconds: 45),
);
}
// Copyright 2014 The Flutter Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
import 'dart:async';
import 'dart:ui';
import 'package:flutter/scheduler.dart';
import 'package:flutter_test/flutter_test.dart';
import 'package:flutter/widgets.dart';
import 'package:macrobenchmarks/common.dart';
import 'package:e2e/e2e.dart';
import 'package:macrobenchmarks/main.dart' as app;
/// The maximum amount of time considered safe to spend for a frame's build
/// phase. Anything past that is in the danger of missing the frame as 60FPS.
///
/// Changing this doesn't re-evaluate existing summary.
Duration kBuildBudget = const Duration(milliseconds: 16);
// TODO(CareF): Automatically calculate the refresh budget (#61958)
typedef ControlCallback = Future<void> Function(WidgetController controller);
void macroPerfTestE2E(
String testName,
String routeName, {
Duration pageDelay,
Duration duration = const Duration(seconds: 3),
Duration timeout = const Duration(seconds: 30),
ControlCallback body,
ControlCallback setup,
}) {
assert(() {
debugPrint(kDebugWarning);
return true;
}());
final WidgetsBinding _binding = E2EWidgetsFlutterBinding.ensureInitialized();
assert(_binding is E2EWidgetsFlutterBinding);
final E2EWidgetsFlutterBinding binding = _binding as E2EWidgetsFlutterBinding;
binding.framePolicy = LiveTestWidgetsFlutterBindingFramePolicy.benchmarkLive;
testWidgets(testName, (WidgetTester tester) async {
assert((tester.binding as LiveTestWidgetsFlutterBinding).framePolicy ==
LiveTestWidgetsFlutterBindingFramePolicy.fullyLive);
app.main();
await tester.pumpAndSettle();
// The slight initial delay avoids starting the timing during a
// period of increased load on the device. Without this delay, the
// benchmark has greater noise.
// See: https://github.com/flutter/flutter/issues/19434
await tester.binding.delayed(const Duration(microseconds: 250));
final Finder scrollable =
find.byKey(const ValueKey<String>(kScrollableName));
expect(scrollable, findsOneWidget);
final Finder button =
find.byKey(ValueKey<String>(routeName), skipOffstage: false);
await tester.ensureVisible(button);
expect(button, findsOneWidget);
await tester.pumpAndSettle();
await tester.tap(button);
if (pageDelay != null) {
// Wait for the page to load
await tester.binding.delayed(pageDelay);
}
if (setup != null) {
await setup(tester);
}
await watchPerformance(binding, () async {
final Future<void> durationFuture = tester.binding.delayed(duration);
if (body != null) {
await body(tester);
}
await durationFuture;
});
}, semanticsEnabled: false, timeout: Timeout(timeout));
}
Future<void> watchPerformance(
E2EWidgetsFlutterBinding binding,
Future<void> action(),
) async {
final List<FrameTiming> frameTimings = <FrameTiming>[];
final TimingsCallback watcher = frameTimings.addAll;
binding.addTimingsCallback(watcher);
await action();
binding.removeTimingsCallback(watcher);
// TODO(CareF): determine if it's running on firebase and report metric online
final FrameTimingSummarizer frameTimes = FrameTimingSummarizer(frameTimings);
binding.reportData = <String, dynamic>{'performance': frameTimes.summary};
}
/// This class and summarizes a list of [FrameTiming] for the performance
/// metrics.
class FrameTimingSummarizer {
factory FrameTimingSummarizer(List<FrameTiming> data) {
assert(data != null);
assert(data.isNotEmpty);
final List<Duration> frameBuildTime = List<Duration>.unmodifiable(
data.map<Duration>((FrameTiming datum) => datum.buildDuration),
);
final List<Duration> frameBuildTimeSorted = List<Duration>.from(frameBuildTime)..sort();
final List<Duration> frameRasterizerTime = List<Duration>.unmodifiable(
data.map<Duration>((FrameTiming datum) => datum.rasterDuration),
);
final List<Duration> frameRasterizerTimeSorted = List<Duration>.from(frameBuildTime)..sort();
final Duration Function(Duration, Duration) add = (Duration a, Duration b) => a + b;
return FrameTimingSummarizer._(
frameBuildTime: frameBuildTime,
frameRasterizerTime: frameRasterizerTime,
// This avarage calculation is microsecond precision, which is fine
// because typical values of these times are milliseconds.
averageFrameBuildTime: frameBuildTime.reduce(add) ~/ data.length,
p90FrameBuildTime: _findPercentile(frameBuildTimeSorted, 0.90),
p99FrameBuildTime: _findPercentile(frameBuildTimeSorted, 0.99),
worstFrameBuildTime: frameBuildTimeSorted.last,
missedFrameBuildBudget: _countExceed(frameBuildTimeSorted, kBuildBudget),
averageFrameRasterizerTime: frameRasterizerTime.reduce(add) ~/ data.length,
p90FrameRasterizerTime: _findPercentile(frameRasterizerTimeSorted, 0.90),
p99FrameRasterizerTime: _findPercentile(frameRasterizerTimeSorted, 0.90),
worstFrameRasterizerTime: frameRasterizerTimeSorted.last,
missedFrameRasterizerBudget: _countExceed(frameRasterizerTimeSorted, kBuildBudget),
);
}
const FrameTimingSummarizer._({
@required this.frameBuildTime,
@required this.frameRasterizerTime,
@required this.averageFrameBuildTime,
@required this.p90FrameBuildTime,
@required this.p99FrameBuildTime,
@required this.worstFrameBuildTime,
@required this.missedFrameBuildBudget,
@required this.averageFrameRasterizerTime,
@required this.p90FrameRasterizerTime,
@required this.p99FrameRasterizerTime,
@required this.worstFrameRasterizerTime,
@required this.missedFrameRasterizerBudget
});
/// List of frame build time in microseconds
final List<Duration> frameBuildTime;
/// List of frame rasterizer time in microseconds
final List<Duration> frameRasterizerTime;
/// The average value of [frameBuildTime] in milliseconds.
final Duration averageFrameBuildTime;
/// The 90-th percentile value of [frameBuildTime] in milliseconds
final Duration p90FrameBuildTime;
/// The 99-th percentile value of [frameBuildTime] in milliseconds
final Duration p99FrameBuildTime;
/// The largest value of [frameBuildTime] in milliseconds
final Duration worstFrameBuildTime;
/// Number of items in [frameBuildTime] that's greater than [kBuildBudget]
final int missedFrameBuildBudget;
/// The average value of [frameRasterizerTime] in milliseconds.
final Duration averageFrameRasterizerTime;
/// The 90-th percentile value of [frameRasterizerTime] in milliseconds.
final Duration p90FrameRasterizerTime;
/// The 99-th percentile value of [frameRasterizerTime] in milliseconds.
final Duration p99FrameRasterizerTime;
/// The largest value of [frameRasterizerTime] in milliseconds.
final Duration worstFrameRasterizerTime;
/// Number of items in [frameRasterizerTime] that's greater than [kBuildBudget]
final int missedFrameRasterizerBudget;
Map<String, dynamic> get summary => <String, dynamic>{
'average_frame_build_time_millis':
averageFrameBuildTime.inMicroseconds / 1E3,
'90th_percentile_frame_build_time_millis':
p90FrameBuildTime.inMicroseconds / 1E3,
'99th_percentile_frame_build_time_millis':
p99FrameBuildTime.inMicroseconds / 1E3,
'worst_frame_build_time_millis':
worstFrameBuildTime.inMicroseconds / 1E3,
'missed_frame_build_budget_count': missedFrameBuildBudget,
'average_frame_rasterizer_time_millis':
averageFrameRasterizerTime.inMicroseconds / 1E3,
'90th_percentile_frame_rasterizer_time_millis':
p90FrameRasterizerTime.inMicroseconds / 1E3,
'99th_percentile_frame_rasterizer_time_millis':
p99FrameRasterizerTime.inMicroseconds / 1E3,
'worst_frame_rasterizer_time_millis':
worstFrameRasterizerTime.inMicroseconds / 1E3,
'missed_frame_rasterizer_budget_count': missedFrameRasterizerBudget,
'frame_count': frameBuildTime.length,
'frame_build_times': frameBuildTime
.map<int>((Duration datum) => datum.inMicroseconds).toList(),
'frame_rasterizer_times': frameRasterizerTime
.map<int>((Duration datum) => datum.inMicroseconds).toList(),
};
}
// The following helper functions require data sorted
// return the 100*p-th percentile of the data
T _findPercentile<T>(List<T> data, double p) {
assert(p >= 0 && p <= 1);
return data[((data.length - 1) * p).round()];
}
// return the number of items in data that > threshold
int _countExceed<T extends Comparable<T>>(List<T> data, T threshold) {
return data.length - data.indexWhere((T datum) => datum.compareTo(threshold) > 0);
}
// Copyright 2014 The Flutter Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
import 'dart:async';
import 'dart:convert';
import 'dart:io';
import 'package:e2e/common.dart' as e2e;
import 'package:flutter_driver/flutter_driver.dart';
import 'package:path/path.dart' as path;
const JsonEncoder _prettyEncoder = JsonEncoder.withIndent(' ');
/// Flutter Driver test output directory.
///
/// Tests should write any output files to this directory. Defaults to the path
/// set in the FLUTTER_TEST_OUTPUTS_DIR environment variable, or `build` if
/// unset.
String testOutputsDirectory = Platform.environment['FLUTTER_TEST_OUTPUTS_DIR'] ?? 'build';
String testOutputFilename = 'e2e_perf_summary';
Future<void> main() async {
final FlutterDriver driver = await FlutterDriver.connect();
final String jsonResult =
await driver.requestData(null, timeout: const Duration(minutes: 1));
final e2e.Response response = e2e.Response.fromJson(jsonResult);
await driver.close();
if (response.allTestsPassed) {
print('All tests passed.');
await fs.directory(testOutputsDirectory).create(recursive: true);
final File file = fs.file(path.join(
testOutputsDirectory,
'$testOutputFilename.json'
));
final String resultString = _encodeJson(
response.data['performance'] as Map<String, dynamic>,
true,
);
await file.writeAsString(resultString);
exit(0);
} else {
print('Failure Details:\n${response.formattedFailureDetails}');
exit(1);
}
}
String _encodeJson(Map<String, dynamic> jsonObject, bool pretty) {
return pretty
? _prettyEncoder.convert(jsonObject)
: json.encode(jsonObject);
}
// Copyright 2014 The Flutter Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
import 'dart:async';
import 'package:flutter_devicelab/tasks/perf_tests.dart';
import 'package:flutter_devicelab/framework/adb.dart';
import 'package:flutter_devicelab/framework/framework.dart';
Future<void> main() async {
deviceOperatingSystem = DeviceOperatingSystem.android;
await task(createCullOpacityPerfE2ETest());
}
......@@ -76,6 +76,13 @@ TaskFunction createCullOpacityPerfTest() {
).run;
}
TaskFunction createCullOpacityPerfE2ETest() {
return E2EPerfTest(
'${flutterDirectory.path}/dev/benchmarks/macrobenchmarks',
'test/cull_opacity_perf_e2e.dart',
).run;
}
TaskFunction createCubicBezierPerfTest() {
return PerfTest(
'${flutterDirectory.path}/dev/benchmarks/macrobenchmarks',
......@@ -348,6 +355,8 @@ class PerfTest {
this.timelineFileName, {
this.needsMeasureCpuGpu = false,
this.testDriver,
this.needsFullTimeline = true,
this.benchmarkScoreKeys,
});
/// The directory where the app under test is defined.
......@@ -356,10 +365,35 @@ class PerfTest {
final String testTarget;
// The prefix name of the filename such as `<timelineFileName>.timeline_summary.json`.
final String timelineFileName;
String get resultFilename => '$timelineFileName.timeline_summary';
/// The test file to run on the host.
final String testDriver;
/// Whether to collect CPU and GPU metrics.
final bool needsMeasureCpuGpu;
/// Whether to collect full timeline, meaning if `--trace-startup` flag is needed.
final bool needsFullTimeline;
/// The keys of the values that need to be reported.
///
/// If it's `null`, then report:
/// ```Dart
/// <String>[
/// 'average_frame_build_time_millis',
/// 'worst_frame_build_time_millis',
/// '90th_percentile_frame_build_time_millis',
/// '99th_percentile_frame_build_time_millis',
/// 'average_frame_rasterizer_time_millis',
/// 'worst_frame_rasterizer_time_millis',
/// '90th_percentile_frame_rasterizer_time_millis',
/// '99th_percentile_frame_rasterizer_time_millis',
/// 'average_vsync_transitions_missed',
/// '90th_percentile_vsync_transitions_missed',
/// '99th_percentile_vsync_transitions_missed',
/// if (needsMeasureCpuGpu) 'cpu_percentage',
/// if (needsMeasureCpuGpu) 'gpu_percentage',
/// ]
/// ```
final List<String> benchmarkScoreKeys;
Future<TaskResult> run() {
return internalRun();
......@@ -382,7 +416,8 @@ class PerfTest {
'-v',
'--verbose-system-logs',
'--profile',
'--trace-startup', // Enables "endless" timeline event buffering.
if (needsFullTimeline)
'--trace-startup', // Enables "endless" timeline event buffering.
'-t', testTarget,
if (noBuild) '--no-build',
if (testDriver != null)
......@@ -396,7 +431,7 @@ class PerfTest {
deviceId,
]);
final Map<String, dynamic> data = json.decode(
file('$testDirectory/build/$timelineFileName.timeline_summary.json').readAsStringSync(),
file('$testDirectory/build/$resultFilename.json').readAsStringSync(),
) as Map<String, dynamic>;
if (data['frame_count'] as int < 5) {
......@@ -412,25 +447,57 @@ class PerfTest {
});
}
return TaskResult.success(data, benchmarkScoreKeys: <String>[
'average_frame_build_time_millis',
'worst_frame_build_time_millis',
'90th_percentile_frame_build_time_millis',
'99th_percentile_frame_build_time_millis',
'average_frame_rasterizer_time_millis',
'worst_frame_rasterizer_time_millis',
'90th_percentile_frame_rasterizer_time_millis',
'99th_percentile_frame_rasterizer_time_millis',
'average_vsync_transitions_missed',
'90th_percentile_vsync_transitions_missed',
'99th_percentile_vsync_transitions_missed',
if (needsMeasureCpuGpu) 'cpu_percentage',
if (needsMeasureCpuGpu) 'gpu_percentage',
]);
return TaskResult.success(
data,
benchmarkScoreKeys: benchmarkScoreKeys ?? <String>[
'average_frame_build_time_millis',
'worst_frame_build_time_millis',
'90th_percentile_frame_build_time_millis',
'99th_percentile_frame_build_time_millis',
'average_frame_rasterizer_time_millis',
'worst_frame_rasterizer_time_millis',
'90th_percentile_frame_rasterizer_time_millis',
'99th_percentile_frame_rasterizer_time_millis',
'average_vsync_transitions_missed',
'90th_percentile_vsync_transitions_missed',
'99th_percentile_vsync_transitions_missed',
if (needsMeasureCpuGpu) 'cpu_percentage',
if (needsMeasureCpuGpu) 'gpu_percentage',
],
);
});
}
}
class E2EPerfTest extends PerfTest {
const E2EPerfTest(
String testDirectory,
String testTarget, {
String summaryFilename,
List<String> benchmarkScoreKeys,
}
) : super(
testDirectory,
testTarget,
summaryFilename,
testDriver: 'test_driver/e2e_test.dart',
needsFullTimeline: false,
benchmarkScoreKeys: benchmarkScoreKeys ?? const <String>[
'average_frame_build_time_millis',
'worst_frame_build_time_millis',
'90th_percentile_frame_build_time_millis',
'99th_percentile_frame_build_time_millis',
'average_frame_rasterizer_time_millis',
'worst_frame_rasterizer_time_millis',
'90th_percentile_frame_rasterizer_time_millis',
'99th_percentile_frame_rasterizer_time_millis',
],
);
@override
String get resultFilename => timelineFileName ?? 'e2e_perf_summary';
}
class PerfTestWithSkSL extends PerfTest {
PerfTestWithSkSL(
String testDirectory,
......
......@@ -161,6 +161,13 @@ tasks:
stage: devicelab
required_agent_capabilities: ["mac/android"]
cull_opacity_perf__e2e_summary:
description: >
Measures the runtime performance of culling opacity widgets on Android on
E2E with self-driving test app.
stage: devicelab
required_agent_capabilities: ["linux/android"]
multi_widget_construction_perf__timeline_summary:
description: >
Measures the runtime performance of constructing and destructing widgets on Android.
......
......@@ -270,6 +270,22 @@ class TargetPlatformVariant extends TestVariant<TargetPlatform> {
}
}
/// The warning message to show when a benchmark is performed with assert on.
const String kDebugWarning = '''
┏╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍┓
┇ ⚠ THIS BENCHMARK IS BEING RUN IN DEBUG MODE ⚠ ┇
┡╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍┦
│ │
│ Numbers obtained from a benchmark while asserts are │
│ enabled will not accurately reflect the performance │
│ that will be experienced by end users using release ╎
│ builds. Benchmarks should be run using this command ╎
│ line: "flutter run --profile test.dart" or ┊
│ or "flutter drive --profile -t test.dart". ┊
│ ┊
└─────────────────────────────────────────────────╌┄┈ 🐢
''';
/// Runs the [callback] inside the Flutter benchmark environment.
///
/// Use this function for benchmarking custom [StatelessWidget]s and
......@@ -320,18 +336,7 @@ Future<void> benchmarkWidgets(
assert(() {
if (mayRunWithAsserts)
return true;
print('┏╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍┓');
print('┇ ⚠ THIS BENCHMARK IS BEING RUN WITH ASSERTS ENABLED ⚠ ┇');
print('┡╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍┦');
print('│ │');
print('│ Numbers obtained from a benchmark while asserts are │');
print('│ enabled will not accurately reflect the performance │');
print('│ that will be experienced by end users using release ╎');
print('│ builds. Benchmarks should be run using this command ┆');
print('│ line: flutter run --release benchmark.dart ┊');
print('│ ');
print('└─────────────────────────────────────────────────╌┄┈ 🐢');
print(kDebugWarning);
return true;
}());
final TestWidgetsFlutterBinding binding = TestWidgetsFlutterBinding.ensureInitialized() as TestWidgetsFlutterBinding;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment