feat: implement flag to fail flaky tests (#30618)

Implements feature requested in #30457 The test runner treats flaky tests as failures when the flag is enabled, but still reports flaky tests as flaky in the reporting interface. It feels like something worth discussing as this behaviour makes sense to me, but looked a bit odd to @BJSS-russell-pollock when I ran this past him. Closes #30457.
microsoft · May 15, 2024 · 6ae5cd3 · 6ae5cd3
1 parent 7a588e6
commit 6ae5cd3
Show file tree

Hide file tree

Showing 5 changed files with 26 additions and 1 deletion.
diff --git a/docs/src/test-cli-js.md b/docs/src/test-cli-js.md
@@ -81,6 +81,7 @@ Complete set of Playwright Test options is available in the [configuration file]
 | Non-option arguments | Each argument is treated as a regular expression matched against the full test file path. Only tests from the files matching the pattern will be executed. Special symbols like `$` or `*` should be escaped with `\`. In many shells/terminals you may need to quote the arguments. |
 | `-c <file>` or `--config <file>`| Configuration file. If not passed, defaults to `playwright.config.ts` or `playwright.config.js` in the current directory. |
 | `--debug`| Run tests with Playwright Inspector. Shortcut for `PWDEBUG=1` environment variable and `--timeout=0 --max-failures=1 --headed --workers=1` options.|
+| `--fail-on-flaky-tests` | Fails test runs that contain flaky tests. By default flaky tests count as successes. |
 | `--forbid-only` | Whether to disallow `test.only`. Useful on CI.|
 | `--global-timeout <number>` | Total timeout for the whole test run in milliseconds. By default, there is no global timeout. Learn more about [various timeouts](./test-timeouts.md).|
 | `-g <grep>` or `--grep <grep>` | Only run tests matching this regular expression. For example, this will run `'should add to cart'` when passed `-g "add to cart"`.  The regular expression will be tested against the string that consists of the test file name, `test.describe` titles if any, test title and all test tags, separated by spaces, e.g. `my-test.spec.ts my-suite my-test @smoke`. The filter does not apply to the tests from dependency projects, i.e. Playwright will still run all tests from [project dependencies](./test-projects.md#dependencies). |

diff --git a/packages/playwright/src/common/config.ts b/packages/playwright/src/common/config.ts
@@ -52,6 +52,7 @@ export class FullConfigInternal {
   cliProjectFilter?: string[];
   cliListOnly = false;
   cliPassWithNoTests?: boolean;
+  cliFailOnFlakyTests?: boolean;
   testIdMatcher?: Matcher;
   defineConfigWasUsed = false;
 

diff --git a/packages/playwright/src/program.ts b/packages/playwright/src/program.ts
@@ -194,6 +194,7 @@ async function runTests(args: string[], opts: { [key: string]: any }) {
   config.cliListOnly = !!opts.list;
   config.cliProjectFilter = opts.project || undefined;
   config.cliPassWithNoTests = !!opts.passWithNoTests;
+  config.cliFailOnFlakyTests = !!opts.failOnFlakyTests;
 
   const runner = new Runner(config);
   let status: FullResult['status'];
@@ -336,6 +337,7 @@ const testOptions: [string, string][] = [
   ['--browser <browser>', `Browser to use for tests, one of "all", "chromium", "firefox" or "webkit" (default: "chromium")`],
   ['-c, --config <file>', `Configuration file, or a test directory with optional "playwright.config.{m,c}?{js,ts}"`],
   ['--debug', `Run tests with Playwright Inspector. Shortcut for "PWDEBUG=1" environment variable and "--timeout=0 --max-failures=1 --headed --workers=1" options`],
+  ['--fail-on-flaky-tests', `Fail if any test is flagged as flaky (default: false)`],
   ['--forbid-only', `Fail if test.only is called (default: false)`],
   ['--fully-parallel', `Run all tests in parallel (default: false)`],
   ['--global-timeout <timeout>', `Maximum time this test suite can run in milliseconds (default: unlimited)`],

diff --git a/packages/playwright/src/runner/failureTracker.ts b/packages/playwright/src/runner/failureTracker.ts
@@ -48,7 +48,15 @@ export class FailureTracker {
   }
 
   result(): 'failed' | 'passed' {
-    return this._hasWorkerErrors || this.hasReachedMaxFailures() || this._rootSuite?.allTests().some(test => !test.ok()) ? 'failed' : 'passed';
+    return this._hasWorkerErrors || this.hasReachedMaxFailures() || this.hasFailedTests() || (this._config.cliFailOnFlakyTests && this.hasFlakyTests()) ? 'failed' : 'passed';
+  }
+
+  hasFailedTests() {
+    return this._rootSuite?.allTests().some(test => !test.ok());
+  }
+
+  hasFlakyTests() {
+    return this._rootSuite?.allTests().some(test => (test.outcome() === 'flaky'));
   }
 
   maxFailures() {

diff --git a/tests/playwright-test/exit-code.spec.ts b/tests/playwright-test/exit-code.spec.ts
@@ -128,6 +128,19 @@ test('should allow flaky', async ({ runInlineTest }) => {
   expect(result.flaky).toBe(1);
 });
 
+test('failOnFlakyTests flag disallows flaky', async ({ runInlineTest }) => {
+  const result = await runInlineTest({
+    'a.test.js': `
+      import { test, expect } from '@playwright/test';
+      test('flake', async ({}, testInfo) => {
+        expect(testInfo.retry).toBe(1);
+      });
+    `,
+  }, { 'retries': 1, 'fail-on-flaky-tests': true });
+  expect(result.exitCode).not.toBe(0);
+  expect(result.flaky).toBe(1);
+});
+
 test('should fail on unexpected pass', async ({ runInlineTest }) => {
   const { exitCode, failed, output } = await runInlineTest({
     'unexpected-pass.spec.js': `