New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Correct Regular Expressions Behavior Related to Annex B #58320
base: main
Are you sure you want to change the base?
Changes from 1 commit
e049438
358eb30
8facb0a
f5c0b60
cff993f
603c3cf
2e62d25
ed08ef7
8b67d77
b48f0d0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2638,6 +2638,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean | |
const digitsStart = pos; | ||
scanDigits(); | ||
const min = tokenValue; | ||
if (annexB && !min) { | ||
isPreviousTermQuantifiable = true; | ||
break; | ||
} | ||
if (text.charCodeAt(pos) === CharacterCodes.comma) { | ||
pos++; | ||
scanDigits(); | ||
|
@@ -2647,25 +2651,28 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean | |
error(Diagnostics.Incomplete_quantifier_Digit_expected, digitsStart, 0); | ||
} | ||
else { | ||
if (unicodeMode) { | ||
error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, start, 1, String.fromCharCode(ch)); | ||
} | ||
error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, start, 1, String.fromCharCode(ch)); | ||
isPreviousTermQuantifiable = true; | ||
break; | ||
} | ||
} | ||
if (max && Number.parseInt(min) > Number.parseInt(max)) { | ||
else if (max && Number.parseInt(min) > Number.parseInt(max) && (!annexB || text.charCodeAt(pos) === CharacterCodes.closeBrace)) { | ||
error(Diagnostics.Numbers_out_of_order_in_quantifier, digitsStart, pos - digitsStart); | ||
} | ||
} | ||
else if (!min) { | ||
if (unicodeMode) { | ||
if (!annexB) { | ||
error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, start, 1, String.fromCharCode(ch)); | ||
} | ||
isPreviousTermQuantifiable = true; | ||
break; | ||
} | ||
scanExpectedChar(CharacterCodes.closeBrace); | ||
if (!annexB) { | ||
scanExpectedChar(CharacterCodes.closeBrace); | ||
} | ||
else if (text.charCodeAt(pos) === CharacterCodes.closeBrace) { | ||
pos++; | ||
} | ||
pos--; | ||
// falls through | ||
case CharacterCodes.asterisk: | ||
|
@@ -2707,7 +2714,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean | |
// Assume what starting from the character to be outside of the regex | ||
return; | ||
} | ||
if (unicodeMode || ch === CharacterCodes.closeParen) { | ||
if (!annexB || ch === CharacterCodes.closeParen) { | ||
error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch)); | ||
} | ||
pos++; | ||
|
@@ -2767,7 +2774,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean | |
scanGroupName(/*isReference*/ true); | ||
scanExpectedChar(CharacterCodes.greaterThan); | ||
} | ||
else if (unicodeMode) { | ||
else { | ||
// This actually is allowed in Annex B if there are no named capturing groups in the regex, | ||
// but if we were going to slience these errors, we would have to record the positions of all '\k's | ||
// and defer the errors until after the scanning to know if the regex has any named capturing groups. | ||
error(Diagnostics.k_must_be_followed_by_a_capturing_group_name_enclosed_in_angle_brackets, pos - 2, 2); | ||
} | ||
break; | ||
|
@@ -3390,7 +3400,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean | |
error(Diagnostics.Unicode_property_value_expressions_are_only_available_when_the_Unicode_u_flag_or_the_Unicode_Sets_v_flag_is_set, start, pos - start); | ||
} | ||
} | ||
else if (unicodeMode) { | ||
else if (!annexB) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In Annex B, braces after There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not really outdated There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The outdated label is just because the comment is on an old revision of the PR and GitHub can't figure out where the comment goes after. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed. I could’ve removed the review and re-comment but finally chose not to. |
||
error(Diagnostics._0_must_be_followed_by_a_Unicode_property_value_expression_enclosed_in_braces, pos - 2, 2, String.fromCharCode(ch)); | ||
} | ||
return true; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Though it may be redundant, I think it might be better to still indicate
unicodeMode
here so that someone editing this code in the future doesn't mistakenly think this only applies to non-Annex B code. It may be better to useunicodeMode || !annexB
and remove theif (unicodeMode) { annexB = false; }
at the top ofscanRegularExpressionWorker
.The same would go for other uses of
annexB
as well.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are you sure you are really fine with a dozen of occurrences of
unicodeMode || !annexB
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No, but
would work.