microsoft · rbuckton · May 24, 2024 · Apr 22, 2024 · Apr 25, 2024 · Apr 25, 2024
diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts
@@ -21,6 +21,7 @@ import {
     KeywordSyntaxKind,
     LanguageVariant,
     last,
+    lastOrUndefined,
     LineAndCharacter,
     MapLike,
     parsePseudoBigInt,
@@ -2389,7 +2390,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
     function reScanSlashToken(): SyntaxKind {
         if (token === SyntaxKind.SlashToken || token === SyntaxKind.SlashEqualsToken) {
             // Quickly get to the end of regex such that we know the flags
-            let p = tokenStart + 1;
+            const startOfRegExpBody = tokenStart + 1;
+            pos = startOfRegExpBody;
             let inEscape = false;
             // Although nested character classes are allowed in Unicode Sets mode,
             // an unescaped slash is nevertheless invalid even in a character class in Unicode mode.
@@ -2401,16 +2403,14 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
             while (true) {
                 // If we reach the end of a file, or hit a newline, then this is an unterminated
                 // regex.  Report error and return what we have so far.
-                if (p >= end) {
+                if (pos >= end) {
                     tokenFlags |= TokenFlags.Unterminated;
-                    error(Diagnostics.Unterminated_regular_expression_literal);
                     break;
                 }
 
-                const ch = text.charCodeAt(p);
+                const ch = text.charCodeAt(pos);
                 if (isLineBreak(ch)) {
                     tokenFlags |= TokenFlags.Unterminated;
-                    error(Diagnostics.Unterminated_regular_expression_literal);
                     break;
                 }
 
@@ -2422,7 +2422,6 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
                 else if (ch === CharacterCodes.slash && !inCharacterClass) {
                     // A slash within a character class is permissible,
                     // but in general it signals the end of the regexp literal.
-                    p++;
                     break;
                 }
                 else if (ch === CharacterCodes.openBracket) {
@@ -2434,50 +2433,94 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
                 else if (ch === CharacterCodes.closeBracket) {
                     inCharacterClass = false;
                 }
-                p++;
+                pos++;
             }
-            const isUnterminated = !!(tokenFlags & TokenFlags.Unterminated);
-            const endOfBody = p - (isUnterminated ? 0 : 1);
-            let regExpFlags = RegularExpressionFlags.None;
-            while (p < end) {
-                const ch = text.charCodeAt(p);
-                if (!isIdentifierPart(ch, languageVersion)) {
-                    break;
-                }
-                const flag = characterToRegularExpressionFlag(String.fromCharCode(ch));
-                if (flag === undefined) {
-                    error(Diagnostics.Unknown_regular_expression_flag, p, 1);
-                }
-                else if (regExpFlags & flag) {
-                    error(Diagnostics.Duplicate_regular_expression_flag, p, 1);
-                }
-                else if (((regExpFlags | flag) & RegularExpressionFlags.UnicodeMode) === RegularExpressionFlags.UnicodeMode) {
-                    error(Diagnostics.The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously, p, 1);
-                }
-                else {
-                    regExpFlags |= flag;
-                    const availableFrom = regExpFlagToFirstAvailableLanguageVersion.get(flag)!;
-                    if (languageVersion < availableFrom) {
-                        error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, p, 1, getNameOfScriptTarget(availableFrom));
+            const endOfRegExpBody = pos;
+            if (tokenFlags & TokenFlags.Unterminated) {
+                // Search for the nearest unbalanced bracket for better recovery. Since the expression is
+                // invalid anyways, we take nested square brackets into consideration for the best guess.
+                pos = startOfRegExpBody;
+                inEscape = false;
+                let characterClassDepth = 0;
+                const bracketStack: CharacterCodes[] = [];
+                while (pos < endOfRegExpBody) {
+                    const ch = text.charCodeAt(pos);
+                    if (inEscape) {
+                        inEscape = false;
+                    }
+                    else if (ch === CharacterCodes.backslash) {
+                        inEscape = true;
+                    }
+                    else if (ch === CharacterCodes.openBracket) {
+                        characterClassDepth++;
+                    }
+                    else if (ch === CharacterCodes.closeBracket && characterClassDepth) {
+                        characterClassDepth--;
                     }
+                    else if (!characterClassDepth) {
+                        if (ch === CharacterCodes.openParen) {
+                            bracketStack.push(CharacterCodes.closeParen);
+                        }
+                        else if (ch === CharacterCodes.openBrace) {
+                            bracketStack.push(CharacterCodes.closeBrace);
+                        }
+                        else if (ch === lastOrUndefined(bracketStack)) {
+                            bracketStack.pop();
+                        }
+                        else if (ch === CharacterCodes.closeParen || ch === CharacterCodes.closeBracket || ch === CharacterCodes.closeBrace) {
+                            // We encountered an unbalanced bracket outside a character class. Treat this position as the end of regex.
+                            break;
+                        }
+                    }
+                    pos++;
                 }
-                p++;
+                // Whitespaces and semicolons at the end are not likely to be part of the regex
+                while (isWhiteSpaceLike(text.charCodeAt(pos - 1)) || text.charCodeAt(pos - 1) === CharacterCodes.semicolon) pos--;
+                error(Diagnostics.Unterminated_regular_expression_literal, tokenStart, pos - tokenStart);
             }
-            pos = tokenStart + 1;
-            const saveTokenPos = tokenStart;
-            const saveTokenFlags = tokenFlags;
-            scanRegularExpressionWorker(text, endOfBody, regExpFlags, isUnterminated);
-            if (!isUnterminated) {
-                pos = p;
+            else {
+                // Consume the slash character
+                pos++;
+                let regExpFlags = RegularExpressionFlags.None;
+                while (pos < end) {
+                    const ch = text.charCodeAt(pos);
+                    if (!isIdentifierPart(ch, languageVersion)) {
+                        break;
+                    }
+                    const flag = characterToRegularExpressionFlag(String.fromCharCode(ch));
+                    if (flag === undefined) {
+                        error(Diagnostics.Unknown_regular_expression_flag, pos, 1);
+                    }
+                    else if (regExpFlags & flag) {
+                        error(Diagnostics.Duplicate_regular_expression_flag, pos, 1);
+                    }
+                    else if (((regExpFlags | flag) & RegularExpressionFlags.UnicodeMode) === RegularExpressionFlags.UnicodeMode) {
+                        error(Diagnostics.The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously, pos, 1);
+                    }
+                    else {
+                        regExpFlags |= flag;
+                        const availableFrom = regExpFlagToFirstAvailableLanguageVersion.get(flag)!;
+                        if (languageVersion < availableFrom) {
+                            error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, pos, 1, getNameOfScriptTarget(availableFrom));
+                        }
+                    }
+                    pos++;
+                }
+                const endOfRegExpFlags = pos;
+                pos = startOfRegExpBody;
+                const saveTokenPos = tokenStart;
+                const saveTokenFlags = tokenFlags;
+                scanRegularExpressionWorker(text, endOfRegExpBody, regExpFlags);
+                pos = endOfRegExpFlags;
+                tokenStart = saveTokenPos;
+                tokenFlags = saveTokenFlags;
             }
-            tokenStart = saveTokenPos;
-            tokenFlags = saveTokenFlags;
             tokenValue = text.substring(tokenStart, pos);
             token = SyntaxKind.RegularExpressionLiteral;
         }
         return token;
 
-        function scanRegularExpressionWorker(text: string, end: number, regExpFlags: RegularExpressionFlags, isUnterminated: boolean) {
+        function scanRegularExpressionWorker(text: string, end: number, regExpFlags: RegularExpressionFlags) {
             /** Grammar parameter */
             const unicodeMode = !!(regExpFlags & RegularExpressionFlags.UnicodeMode);
             /** Grammar parameter */
@@ -2685,10 +2728,6 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
                         // falls through
                         case CharacterCodes.closeBracket:
                         case CharacterCodes.closeBrace:
-                            if (isUnterminated && !isInGroup) {
-                                // Assume what starting from the character to be outside of the regex
-                                return;
-                            }
                             if (unicodeMode || ch === CharacterCodes.closeParen) {
                                 error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch));
                             }

diff --git a/src/testRunner/tests.ts b/src/testRunner/tests.ts
@@ -16,6 +16,7 @@ import "./unittests/paths";
 import "./unittests/printer";
 import "./unittests/programApi";
 import "./unittests/publicApi";
+import "./unittests/regExpParserRecovery";
 import "./unittests/reuseProgramStructure";
 import "./unittests/semver";
 import "./unittests/transform";

diff --git a/src/testRunner/unittests/regExpParserRecovery.ts b/src/testRunner/unittests/regExpParserRecovery.ts
@@ -0,0 +1,81 @@
+import * as ts from "../_namespaces/ts";
+
+describe("unittests:: regExpParserRecovery", () => {
+    const testCases = [
+        "/",
+        "/[]",
+        "/{}",
+        "/()",
+        "/foo",
+        "/foo[]",
+        "/foo{}",
+        "/foo()",
+        "/[]foo",
+        "/{}foo",
+        "/()foo",
+        "/{[]}",
+        "/([])",
+        "/[)}({]",
+        "/({[)}]})",
+        "/\\[",
+        "/\\{",
+        "/\\(",
+        "/[\\[]",
+        "/(\\[)",
+        "/{\\[}",
+        "/[\\(]",
+        "/(\\()",
+        "/{\\(}",
+        "/[\\{]",
+        "/(\\{)",
+        "/{\\{}",
+        "/\\{(\\[\\([{])",
+        "/\\]",
+        "/\\}",
+        "/\\)",
+        "/[\\]]",
+        "/(\\])",
+        "/{\\]}",
+        "/[\\)]",
+        "/(\\))",
+        "/{\\)}",
+        "/[\\}]",
+        "/(\\})",
+        "/{\\}}",
+        "/({[\\]})]})",
+    ];
+    const whiteSpaceSequences = [
+        "",
+        "  ",
+        "\t\v\r\n",
+        "\u3000\u2028",
+    ];
+    it("stops parsing unterminated regexes at correct position", () => {
+        ts.forEach(testCases, testCase => {
+            ts.forEach(whiteSpaceSequences, whiteSpaces => {
+                const testCaseWithWhiteSpaces = testCase + whiteSpaces;
+                const sources = [
+                    `const regex = ${testCaseWithWhiteSpaces};`,
+                    `(${testCaseWithWhiteSpaces});`,
+                    `([${testCaseWithWhiteSpaces}]);`,
+                    `({prop: ${testCaseWithWhiteSpaces}});`,
+                    `({prop: ([(${testCaseWithWhiteSpaces})])});`,
+                    `({[(${testCaseWithWhiteSpaces}).source]: 42});`,
+                ];
+                ts.forEach(sources, source => {
+                    const { parseDiagnostics } = ts.createLanguageServiceSourceFile(
+                        /*fileName*/ "",
+                        ts.ScriptSnapshot.fromString(source),
+                        ts.ScriptTarget.Latest,
+                        /*version*/ "0",
+                        /*setNodeParents*/ false,
+                    );
+                    const diagnostic = ts.find(parseDiagnostics, ({ code }) => code === ts.Diagnostics.Unterminated_regular_expression_literal.code);
+                    assert(diagnostic, "There should be an 'Unterminated regular expression literal.' error");
+                    assert.equal(diagnostic.start, source.indexOf("/"), "Diagnostic should start at where the regex starts");
+                    assert.equal(diagnostic.length, testCase.length, "Diagnostic should end at where the regex ends");
+                });
+            });
+        });
+    });
+});
diff --git a/tests/baselines/reference/parser645086_1.errors.txt b/tests/baselines/reference/parser645086_1.errors.txt
@@ -1,13 +1,10 @@
 parser645086_1.ts(1,13): error TS1005: ',' expected.
 parser645086_1.ts(1,14): error TS1134: Variable declaration expected.
-parser645086_1.ts(1,15): error TS1161: Unterminated regular expression literal.
 
 
-==== parser645086_1.ts (3 errors) ====
+==== parser645086_1.ts (2 errors) ====
     var v = /[]/]/
                 ~
 !!! error TS1005: ',' expected.
                  ~
-!!! error TS1134: Variable declaration expected.
-
-!!! error TS1161: Unterminated regular expression literal.
+!!! error TS1134: Variable declaration expected.
diff --git a/tests/baselines/reference/parser645086_2.errors.txt b/tests/baselines/reference/parser645086_2.errors.txt
@@ -1,13 +1,10 @@
 parser645086_2.ts(1,14): error TS1005: ',' expected.
 parser645086_2.ts(1,15): error TS1134: Variable declaration expected.
-parser645086_2.ts(1,16): error TS1161: Unterminated regular expression literal.
 
 
-==== parser645086_2.ts (3 errors) ====
+==== parser645086_2.ts (2 errors) ====
     var v = /[^]/]/
                  ~
 !!! error TS1005: ',' expected.
                   ~
-!!! error TS1134: Variable declaration expected.
-
-!!! error TS1161: Unterminated regular expression literal.
+!!! error TS1134: Variable declaration expected.
diff --git a/tests/baselines/reference/parserMissingToken2.errors.txt b/tests/baselines/reference/parserMissingToken2.errors.txt
@@ -1,7 +1,7 @@
-parserMissingToken2.ts(1,2): error TS1161: Unterminated regular expression literal.
+parserMissingToken2.ts(1,1): error TS1161: Unterminated regular expression literal.
 
 
 ==== parserMissingToken2.ts (1 errors) ====
     / b;
-     
+    ~~~
 !!! error TS1161: Unterminated regular expression literal.
diff --git a/tests/baselines/reference/parserMissingToken2.js b/tests/baselines/reference/parserMissingToken2.js
@@ -4,4 +4,4 @@
 / b;
 
 //// [parserMissingToken2.js]
-/ b;;
+/ b;
diff --git a/tests/baselines/reference/parserMissingToken2.types b/tests/baselines/reference/parserMissingToken2.types
@@ -2,6 +2,6 @@
 
 === parserMissingToken2.ts ===
 / b;
->/ b; : RegExp
->     : ^^^^^^
+>/ b : RegExp
+>    : ^^^^^^
 
diff --git a/tests/baselines/reference/parserRegularExpressionDivideAmbiguity4.errors.txt b/tests/baselines/reference/parserRegularExpressionDivideAmbiguity4.errors.txt
@@ -1,10 +1,10 @@
 parserRegularExpressionDivideAmbiguity4.ts(1,1): error TS2304: Cannot find name 'foo'.
-parserRegularExpressionDivideAmbiguity4.ts(1,6): error TS1161: Unterminated regular expression literal.
+parserRegularExpressionDivideAmbiguity4.ts(1,5): error TS1161: Unterminated regular expression literal.
 
 
 ==== parserRegularExpressionDivideAmbiguity4.ts (2 errors) ====
     foo(/notregexp);
     ~~~
 !!! error TS2304: Cannot find name 'foo'.
-         
+        ~~~~~~~~~~
 !!! error TS1161: Unterminated regular expression literal.
diff --git a/tests/baselines/reference/tsxAttributeInvalidNames.errors.txt b/tests/baselines/reference/tsxAttributeInvalidNames.errors.txt
@@ -10,7 +10,7 @@ file.tsx(11,1): error TS2362: The left-hand side of an arithmetic operation must
 file.tsx(11,8): error TS1003: Identifier expected.
 file.tsx(11,9): error TS2304: Cannot find name 'data'.
 file.tsx(11,13): error TS1005: ';' expected.
-file.tsx(11,20): error TS1161: Unterminated regular expression literal.
+file.tsx(11,19): error TS1161: Unterminated regular expression literal.
 
 
 ==== file.tsx (13 errors) ====
@@ -49,5 +49,5 @@ file.tsx(11,20): error TS1161: Unterminated regular expression literal.
 !!! error TS2304: Cannot find name 'data'.
                 ~
 !!! error TS1005: ';' expected.
-                       
+                      ~~
 !!! error TS1161: Unterminated regular expression literal.
diff --git a/tests/baselines/reference/tsxAttributeInvalidNames.js b/tests/baselines/reference/tsxAttributeInvalidNames.js
@@ -22,4 +22,4 @@ data = { 32:  } /  > ;
 {
     32;
 }
-/>;;
+/>;
diff --git a/tests/baselines/reference/tsxAttributeInvalidNames.types b/tests/baselines/reference/tsxAttributeInvalidNames.types
@@ -56,6 +56,6 @@ declare module JSX {
 >     : ^^^
 >32 : 32
 >   : ^^
->/>; : RegExp
->    : ^^^^^^
+>/> : RegExp
+>   : ^^^^^^
 
diff --git a/tests/baselines/reference/unterminatedRegexAtEndOfSource1.errors.txt b/tests/baselines/reference/unterminatedRegexAtEndOfSource1.errors.txt
@@ -1,7 +1,7 @@
-unterminatedRegexAtEndOfSource1.ts(1,10): error TS1161: Unterminated regular expression literal.
+unterminatedRegexAtEndOfSource1.ts(1,9): error TS1161: Unterminated regular expression literal.
 
 
 ==== unterminatedRegexAtEndOfSource1.ts (1 errors) ====
     var a = /
-             
+            ~
 !!! error TS1161: Unterminated regular expression literal.
diff --git a/tests/cases/fourslash/whiteSpaceTrimming4.ts b/tests/cases/fourslash/whiteSpaceTrimming4.ts
@@ -5,4 +5,4 @@
 goTo.marker('1');
 edit.insert("\n");
 
-verify.currentFileContentIs("var re = /\\w+   \n    /;");
+verify.currentFileContentIs("var re = /\\w+\n    /;");