Skip to content

Commit

Permalink
Ranges (pegjs/pegjs#30): Add support for delimiters in ranges
Browse files Browse the repository at this point in the history
  • Loading branch information
Mingun committed Feb 21, 2023
1 parent 8546641 commit 22feab4
Show file tree
Hide file tree
Showing 9 changed files with 121 additions and 11 deletions.
11 changes: 10 additions & 1 deletion lib/compiler/asts.js
Expand Up @@ -53,8 +53,17 @@ const asts = {
if (min.type !== "constant" || min.value === 0) {
return false;
}
if (consumes(node.expression)) {
return true;
}
// |node.delimiter| used only when |node.expression| match at least two times
// The first `if` filtered out all non-constant minimums, so at this point
// |min.value| is always a constant
if (min.value > 1 && node.delimiter && consumes(node.delimiter)) {
return true;
}

return consumes(node.expression);
return false;
},
semantic_and: consumesFalse,
semantic_not: consumesFalse,
Expand Down
55 changes: 53 additions & 2 deletions lib/compiler/passes/generate-bytecode.js
Expand Up @@ -509,6 +509,50 @@ function generateBytecode(ast, options) {
);
}

function buildRangeBody(
delimiterNode,
expressionMatch,
expressionCode,
context,
offset
) {
if (delimiterNode) {
return buildSequence( // stack:[ ]
[op.PUSH_CURR_POS], // pos = peg$currPos; stack:[ pos ]
generate(delimiterNode, { // item = delim(); stack:[ pos, delim ]
// +1 for the saved offset
sp: context.sp + offset + 1,
env: cloneEnv(context.env),
action: null,
}),
buildCondition(
delimiterNode.match | 0,
[op.IF_NOT_ERROR], // if (item !== peg$FAILED) {
buildSequence(
[op.POP], // stack:[ pos ]
expressionCode, // item = expr(); stack:[ pos, item ]
buildCondition(
-expressionMatch,
[op.IF_ERROR], // if (item === peg$FAILED) {
// If element FAILED, rollback currPos to saved value.
/* eslint-disable indent */
[op.POP, // stack:[ pos ]
op.POP_CURR_POS, // peg$currPos = pos; stack:[ ]
op.PUSH_FAILED], // item = peg$FAILED; stack:[ peg$FAILED ]
/* eslint-enable indent */
// Else, just drop saved currPos.
[op.NIP] // } stack:[ item ]
)
), // }
// If delimiter FAILED, currPos not changed, so just drop it.
[op.NIP] // stack:[ peg$FAILED ]
) // stack:[ <?> ]
);
}

return expressionCode;
}

function wrapGenerators(generators) {
if (options && options.output === "source-and-map") {
Object.entries(generators).forEach(([name, generator]) => {
Expand Down Expand Up @@ -858,12 +902,19 @@ function generateBytecode(ast, options) {
env: cloneEnv(context.env),
action: null,
});
const bodyCode = buildRangeBody(
node.delimiter,
node.expression.match | 0,
expressionCode,
context,
offset
);
// Check the high boundary, if it is defined.
const checkMaxCode = buildCheckMax(expressionCode, node.max);
const checkMaxCode = buildCheckMax(bodyCode, node.max);
// For dynamic high boundary we need check the first iteration, because the result can be
// empty. Constant boundaries does not require that check, because they are always >=1
const firstElemCode = hasBoundedMax
? checkMaxCode
? buildCheckMax(expressionCode, node.max)
: expressionCode;
const mainLoopCode = buildSequence(
// If the low boundary present, then backtracking is possible, so save the current pos
Expand Down
23 changes: 20 additions & 3 deletions lib/compiler/passes/inference-match-result.js
Expand Up @@ -98,6 +98,7 @@ function inferenceMatchResult(ast) {
one_or_more: inferenceExpression,
repeated(node) {
const match = inference(node.expression);
const dMatch = node.delimiter ? inference(node.delimiter) : NEVER_MATCH;
// If minimum is `null` it is equals to maximum (parsed from `|exact|` syntax)
const min = node.min ? node.min : node.max;

Expand All @@ -122,12 +123,28 @@ function inferenceMatchResult(ast) {
return (node.match = min.value === 0 ? ALWAYS_MATCH : NEVER_MATCH);
}
if (match === ALWAYS_MATCH) {
if (node.delimiter && min.value >= 2) {
// If an expression always match the final result determined only
// by the delimiter, but delimiter used only when count of elements
// two and more
return (node.match = dMatch);
}

return (node.match = ALWAYS_MATCH);
}

// Here an expression sometimes match. If it should be repeated at least once
// the whole range sometimes match, otherwise it will always succeeds (at least
// an empty array guaranteed)
// Here `match === SOMETIMES_MATCH`
if (node.delimiter && min.value >= 2) {
// If an expression always match the final result determined only
// by the delimiter, but delimiter used only when count of elements
// two and more
return (
// If a delimiter never match then the range also never match (because
// there at least one delimiter)
node.match = dMatch === NEVER_MATCH ? NEVER_MATCH : SOMETIMES_MATCH
);
}

return (node.match = min.value === 0 ? ALWAYS_MATCH : SOMETIMES_MATCH);
},
group: inferenceExpression,
Expand Down
8 changes: 7 additions & 1 deletion lib/compiler/passes/report-duplicate-labels.js
Expand Up @@ -55,7 +55,13 @@ function reportDuplicateLabels(ast, options, session) {
optional: checkExpressionWithClonedEnv,
zero_or_more: checkExpressionWithClonedEnv,
one_or_more: checkExpressionWithClonedEnv,
repeated: checkExpressionWithClonedEnv,
repeated(node, env) {
if (node.delimiter) {
check(node.delimiter, cloneEnv(env));
}

check(node.expression, cloneEnv(env));
},
group: checkExpressionWithClonedEnv,
});

Expand Down
12 changes: 12 additions & 0 deletions lib/compiler/passes/report-infinite-recursion.js
Expand Up @@ -34,6 +34,18 @@ function reportInfiniteRecursion(ast, options, session) {
});
},

repeated(node) {
check(node.expression);

// If an expression does not consume input then recursion
// over delimiter is possible
if (node.delimiter
&& !asts.alwaysConsumesOnSuccess(ast, node.expression)
) {
check(node.delimiter);
}
},

rule_ref(node) {
backtraceRefs.push(node);

Expand Down
4 changes: 3 additions & 1 deletion lib/compiler/passes/report-infinite-repetition.js
Expand Up @@ -26,7 +26,9 @@ function reportInfiniteRepetition(ast, options, session) {
},

repeated(node) {
if (asts.alwaysConsumesOnSuccess(ast, node.expression)) {
if (asts.alwaysConsumesOnSuccess(ast, node.expression)
|| node.delimiter && asts.alwaysConsumesOnSuccess(ast, node.delimiter)
) {
return;
}
if (node.max.value === null) {
Expand Down
8 changes: 7 additions & 1 deletion lib/compiler/visitor.js
Expand Up @@ -53,7 +53,13 @@ const visitor = {
optional: visitExpression,
zero_or_more: visitExpression,
one_or_more: visitExpression,
repeated: visitExpression,
repeated(node, ...args) {
if (node.delimiter) {
visit(node.delimiter, ...args);
}

return visit(node.expression, ...args);
},
group: visitExpression,
semantic_and: visitNop,
semantic_not: visitNop,
Expand Down
8 changes: 7 additions & 1 deletion lib/peg.d.ts
Expand Up @@ -249,6 +249,11 @@ declare namespace ast {
min: RepeatedBoundary | null;
/** Maximum count of repetitions. */
max: RepeatedBoundary;
/**
* An expression that should appear between occurrences of the `expression`.
* Matched parts of input skipped and do not included to the result array.
*/
delimiter: Expression | null;
expression: Primary;
}

Expand Down Expand Up @@ -721,7 +726,8 @@ export namespace compiler {
*/
one_or_more?(node: ast.Suffixed, ...args: any[]): any;
/**
* Default behavior: run visitor on `expression` and return it result
* Default behavior: run visitor on `delimiter` if it is defined then
* run visitor on `expression` and return it result
*
* @param node Node, representing repetition of the `expression` specified number of times
* @param args Any arguments passed to the `Visitor`
Expand Down
3 changes: 2 additions & 1 deletion src/parser.pegjs
Expand Up @@ -206,7 +206,7 @@ SuffixedOperator
/ "+"

RepeatedExpression
= expression:PrimaryExpression __ "|" __ boundaries:Boundaries __ "|" {
= expression:PrimaryExpression __ "|" __ boundaries:Boundaries __ delimiter:("," __ @Expression __)? "|" {
let min = boundaries[0];
let max = boundaries[1];
if (max.type === "constant" && max.value === 0) {
Expand All @@ -218,6 +218,7 @@ RepeatedExpression
min,
max,
expression,
delimiter,
location: location(),
};
}
Expand Down

0 comments on commit 22feab4

Please sign in to comment.