Skip to content

Commit

Permalink
Fix handling of case-sensitive set loops in RegexPrefixAnalyzer.FindP…
Browse files Browse the repository at this point in the history
…refixes (dotnet#101608)

* Fix handling of case-sensitive set loops in RegexPrefixAnalyzer.FindPrefixes

For an expression like `[Aa]{2}`, we were generating the strings "AA" and "aa" but not "Aa" or "aA".

This code isn't exercised yet, as we're currently only using FindPrefixes for case-insensitive, but I'm trying to enable it for case-sensitive as well, and hit this. I'm not adding new tests here as plenty of existing tests catch it once it's enabled.

* Also exit early as soon as we can detect too many possible prefixes
  • Loading branch information
stephentoub authored and matouskozak committed Apr 30, 2024
1 parent 1a16869 commit eacaff5
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 19 deletions.
Expand Up @@ -59,9 +59,11 @@ static bool FindPrefixesCore(RegexNode node, List<StringBuilder> results, bool i
// If we're too deep to analyze further, we can't trust what we've already computed, so stop iterating.
// Also bail if any of our results is already hitting the threshold, or if this node is RTL, which is
// not worth the complexity of handling.
// Or if we've already discovered more than the allowed number of prefixes.
if (!StackHelper.TryEnsureSufficientExecutionStack() ||
!results.TrueForAll(sb => sb.Length < MaxPrefixLength) ||
(node.Options & RegexOptions.RightToLeft) != 0)
(node.Options & RegexOptions.RightToLeft) != 0 ||
results.Count > MaxPrefixes)
{
return false;
}
Expand Down Expand Up @@ -162,23 +164,30 @@ static bool FindPrefixesCore(RegexNode node, List<StringBuilder> results, bool i
int reps = node.Kind is RegexNodeKind.Set ? 1 : Math.Min(node.M, MaxPrefixLength);
if (!ignoreCase)
{
int existingCount = results.Count;

// Duplicate all of the existing strings for all of the new suffixes, other than the first.
foreach (char suffix in setChars.Slice(1, charCount - 1))
for (int rep = 0; rep < reps; rep++)
{
for (int existing = 0; existing < existingCount; existing++)
int existingCount = results.Count;
if (existingCount * charCount > MaxPrefixes)
{
StringBuilder newSb = new StringBuilder().Append(results[existing]);
newSb.Append(suffix, reps);
results.Add(newSb);
return false;
}
}

// Then append the first suffix to all of the existing strings.
for (int existing = 0; existing < existingCount; existing++)
{
results[existing].Append(setChars[0], reps);
// Duplicate all of the existing strings for all of the new suffixes, other than the first.
foreach (char suffix in setChars.Slice(1, charCount - 1))
{
for (int existing = 0; existing < existingCount; existing++)
{
StringBuilder newSb = new StringBuilder().Append(results[existing]);
newSb.Append(suffix);
results.Add(newSb);
}
}

// Then append the first suffix to all of the existing strings.
for (int existing = 0; existing < existingCount; existing++)
{
results[existing].Append(setChars[0]);
}
}
}
else
Expand Down Expand Up @@ -248,6 +257,12 @@ static bool FindPrefixesCore(RegexNode node, List<StringBuilder> results, bool i
{
_ = FindPrefixesCore(node.Child(i), alternateBranchResults, ignoreCase);

// If we now have too many results, bail.
if ((allBranchResults?.Count ?? 0) + alternateBranchResults.Count > MaxPrefixes)
{
return false;
}

Debug.Assert(alternateBranchResults.Count > 0);
foreach (StringBuilder sb in alternateBranchResults)
{
Expand Down
Expand Up @@ -1172,6 +1172,7 @@ public async Task Match_VaryingLengthStrings_Huge(RegexEngine engine)

public static IEnumerable<object[]> Match_DeepNesting_MemberData()
{
foreach (RegexOptions options in new[] { RegexOptions.None, RegexOptions.IgnoreCase })
foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
{
if (RegexHelpers.IsNonBacktracking(engine))
Expand All @@ -1180,23 +1181,23 @@ public static IEnumerable<object[]> Match_DeepNesting_MemberData()
continue;
}

yield return new object[] { engine, 1 };
yield return new object[] { engine, 10 };
yield return new object[] { engine, 100 };
yield return new object[] { engine, options, 1 };
yield return new object[] { engine, options, 10 };
yield return new object[] { engine, options, 100 };
}
}

[Theory]
[MemberData(nameof(Match_DeepNesting_MemberData))]
public async Task Match_DeepNesting(RegexEngine engine, int count)
public async Task Match_DeepNesting(RegexEngine engine, RegexOptions options, int count)
{
const string Start = @"((?>abc|(?:def[ghi]", End = @")))";
const string Match = "defg";

string pattern = string.Concat(Enumerable.Repeat(Start, count)) + string.Concat(Enumerable.Repeat(End, count));
string input = string.Concat(Enumerable.Repeat(Match, count));

Regex r = await RegexHelpers.GetRegexAsync(engine, pattern);
Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options);
Match m = r.Match(input);

Assert.True(m.Success);
Expand Down

0 comments on commit eacaff5

Please sign in to comment.