Skip to content

Commit

Permalink
Ranges (pegjs/pegjs#30): Implement ranges support. Range syntax:
Browse files Browse the repository at this point in the history
```
expression|  exact |
expression|   ..   |
expression|min..   |
expression|   ..max|
expression|min..max|
```

Introduce two new opcodes:
* IF_LT <min>, <then part length>, <else part length>
* IF_GE <max>, <then part length>, <else part length>

Introduce a new AST node -- `repeated`, that contains expression and the minimum and maximum number of it repetition.
If `node.min.value` is `null` or isn't positive -- check of the minimum length isn't made.
If `node.max.value` is `null`, check of the maximum length isn't made.
If `node.min` is `null` then it is equals to the `node.max` (exact repetitions case)
  • Loading branch information
Mingun committed Jun 11, 2022
1 parent 48585a5 commit 4bf607f
Show file tree
Hide file tree
Showing 11 changed files with 216 additions and 7 deletions.
6 changes: 6 additions & 0 deletions lib/compiler/asts.js
Expand Up @@ -43,6 +43,12 @@ const asts = {
simple_not: consumesFalse,
optional: consumesFalse,
zero_or_more: consumesFalse,
repeated(node) {
// Handle exact case
const min = node.min ? node.min : node.max;

return min.value > 0 ? consumes(node.expression) : false;
},
semantic_and: consumesFalse,
semantic_not: consumesFalse,

Expand Down
6 changes: 5 additions & 1 deletion lib/compiler/opcodes.js
Expand Up @@ -26,6 +26,8 @@ const opcodes = {
IF: 13, // IF t, f
IF_ERROR: 14, // IF_ERROR t, f
IF_NOT_ERROR: 15, // IF_NOT_ERROR t, f
IF_LT: 30, // IF_LT min, t, f
IF_GE: 31, // IF_GE max, t, f
WHILE_NOT_ERROR: 16, // WHILE_NOT_ERROR b

// Matching
Expand Down Expand Up @@ -60,7 +62,9 @@ const opcodes = {
// sections above are repeated here in order to ensure we don't
// reuse them.
//
// 30-34 reserved for @mingun
// IF_LT: 30
// IF_GE: 31
// 32-34 reserved for @mingun
// PUSH_EMPTY_STRING: 35
// PLUCK: 36
};
Expand Down
85 changes: 85 additions & 0 deletions lib/compiler/passes/generate-bytecode.js
Expand Up @@ -106,6 +106,22 @@ const { ALWAYS_MATCH, SOMETIMES_MATCH, NEVER_MATCH } = require("./inference-matc
// interpret(ip + 3 + t, ip + 3 + t + f);
// }
//
// [30] IF_LT min, t, f
//
// if (stack.top().length < min) {
// interpret(ip + 3, ip + 3 + t);
// } else {
// interpret(ip + 3 + t, ip + 3 + t + f);
// }
//
// [31] IF_GE max, t, f
//
// if (stack.top().length >= max) {
// interpret(ip + 3, ip + 3 + t);
// } else {
// interpret(ip + 3 + t, ip + 3 + t + f);
// }
//
// [16] WHILE_NOT_ERROR b
//
// while(stack.top() !== FAILED) {
Expand Down Expand Up @@ -355,6 +371,51 @@ function generateBytecode(ast) {
);
}

/* eslint capitalized-comments: "off" */
/**
* @param {number[]} expressionCode Bytecode for parsing repetitions
* @param {import("../../peg").ast.RepeatedBoundary} max Maximum boundary of repetitions.
* If `null`, the maximum boundary is unlimited
*
* @returns {number[]} Bytecode that performs check of the maximum boundary
*/
function buildCheckMax(expressionCode, max) {
if (max.value !== null) {
// Push `peg$FAILED` - this break loop on next iteration, so |result|
// will contains not more then |max| elements.
return buildCondition(
SOMETIMES_MATCH,
[op.IF_GE, max.value], // if (r.length >= max) stack:[ [elem...] ]
[op.PUSH_FAILED], // elem = peg$FAILED; stack:[ [elem...], peg$FAILED ]
expressionCode // else
); // elem = expr(); stack:[ [elem...], elem ]
}

return expressionCode;
}

/* eslint capitalized-comments: "off" */
/**
* @param {number[]} expressionCode Bytecode for parsing repeated elements
* @param {import("../../peg").ast.RepeatedBoundary} min Minimum boundary of repetitions.
* If `null`, the minimum boundary is zero
*
* @returns {number[]} Bytecode that performs check of the minimum boundary
*/
function buildCheckMin(expressionCode, min) {
return buildSequence(
expressionCode, // result = [elem...]; stack:[ pos, [elem...] ]
buildCondition(
SOMETIMES_MATCH,
[op.IF_LT, min.value], // if (result.length < min) {
[op.POP, op.POP_CURR_POS, // currPos = savedPos; stack:[ ]
// eslint-disable-next-line indent
op.PUSH_FAILED], // result = peg$FAILED; stack:[ peg$FAILED ]
[op.NIP] // } stack:[ [elem...] ]
)
);
}

const generate = visitor.build({
grammar(node) {
node.rules.forEach(generate);
Expand Down Expand Up @@ -633,6 +694,30 @@ function generateBytecode(ast) {
);
},

repeated(node, context) {
// Handle case when minimum was literally equals to maximum
const min = node.min ? node.min : node.max;
const hasMin = min.value > 0;
const expressionCode = generate(node.expression, {
sp: context.sp + (hasMin ? 2 : 1),
env: cloneEnv(context.env),
action: null,
});
// Check the high boundary, if it is defined.
const checkMaxCode = buildCheckMax(expressionCode, node.max);
const mainLoopCode = buildSequence(
// If the low boundary present, then backtracking is possible, so save the current pos
hasMin ? [op.PUSH_CURR_POS] : [], // var savedPos = curPos; stack:[ pos ]
[op.PUSH_EMPTY_ARRAY], // var result = []; stack:[ pos, [] ]
expressionCode, // var elem = expr(); stack:[ pos, [], elem ]
buildAppendLoop(checkMaxCode), // while(...)r.push(elem); stack:[ pos, [...], elem|peg$FAILED ]
[op.POP] // stack:[ pos, [elem...] ] (pop elem===`peg$FAILED`)
);

// Check the low boundary, if it is defined and not |0|.
return hasMin ? buildCheckMin(mainLoopCode, min) : mainLoopCode;
},

group(node, context) {
return generate(node.expression, {
sp: context.sp,
Expand Down
8 changes: 8 additions & 0 deletions lib/compiler/passes/generate-js.js
Expand Up @@ -429,6 +429,14 @@ function generateJS(ast, options) {
compileCondition(stack.top() + " !== peg$FAILED", 0);
break;

case op.IF_LT: // IF_LT min, t, f
compileCondition(stack.top() + ".length < " + bc[ip + 1], 1);
break;

case op.IF_GE: // IF_GE max, t, f
compileCondition(stack.top() + ".length >= " + bc[ip + 1], 1);
break;

case op.WHILE_NOT_ERROR: // WHILE_NOT_ERROR b
compileLoop(stack.top() + " !== peg$FAILED");
break;
Expand Down
7 changes: 7 additions & 0 deletions lib/compiler/passes/inference-match-result.js
Expand Up @@ -96,6 +96,13 @@ function inferenceMatchResult(ast) {
optional: alwaysMatch,
zero_or_more: alwaysMatch,
one_or_more: inferenceExpression,
repeated(node) {
const match = inference(node.expression);
// Handle exact case
const min = node.min ? node.min : node.max;

return (node.match = min.value > 0 ? match : ALWAYS_MATCH);
},
group: inferenceExpression,
semantic_and: sometimesMatch,
semantic_not: sometimesMatch,
Expand Down
1 change: 1 addition & 0 deletions lib/compiler/passes/report-duplicate-labels.js
Expand Up @@ -55,6 +55,7 @@ function reportDuplicateLabels(ast, options, session) {
optional: checkExpressionWithClonedEnv,
zero_or_more: checkExpressionWithClonedEnv,
one_or_more: checkExpressionWithClonedEnv,
repeated: checkExpressionWithClonedEnv,
group: checkExpressionWithClonedEnv,
});

Expand Down
19 changes: 18 additions & 1 deletion lib/compiler/passes/report-infinite-repetition.js
Expand Up @@ -3,7 +3,7 @@
const asts = require("../asts");
const visitor = require("../visitor");

// Reports expressions that don't consume any input inside |*| or |+| in the
// Reports expressions that don't consume any input inside |*|, |+| or repeated in the
// grammar, which prevents infinite loops in the generated parser.
function reportInfiniteRepetition(ast, options, session) {
const check = visitor.build({
Expand All @@ -24,6 +24,23 @@ function reportInfiniteRepetition(ast, options, session) {
);
}
},

repeated(node) {
if (asts.alwaysConsumesOnSuccess(ast, node.expression)) {
return;
}
if (node.max.value === null) {
session.error(
"Possible infinite loop when parsing (unbounded range repetition used with an expression that may not consume any input)",
node.location
);
} else {
session.warning(
`An expression always match ${node.max.value} times, because it does not consume any input`,
node.location
);
}
},
});

check(ast);
Expand Down
1 change: 1 addition & 0 deletions lib/compiler/visitor.js
Expand Up @@ -53,6 +53,7 @@ const visitor = {
optional: visitExpression,
zero_or_more: visitExpression,
one_or_more: visitExpression,
repeated: visitExpression,
group: visitExpression,
semantic_and: visitNop,
semantic_not: visitNop,
Expand Down
40 changes: 38 additions & 2 deletions lib/peg.d.ts
Expand Up @@ -122,6 +122,7 @@ declare namespace ast {
| Labeled
| Prefixed
| Suffixed
| Repeated
| Primary;

/** One element of the choice node. */
Expand All @@ -131,6 +132,7 @@ declare namespace ast {
| Labeled
| Prefixed
| Suffixed
| Repeated
| Primary;

interface Choice extends Expr<"choice"> {
Expand All @@ -147,6 +149,7 @@ declare namespace ast {
| Labeled
| Prefixed
| Suffixed
| Repeated
| Primary
);
}
Expand All @@ -156,6 +159,7 @@ declare namespace ast {
= Labeled
| Prefixed
| Suffixed
| Repeated
| Primary;

interface Sequence extends Expr<"sequence"> {
Expand All @@ -178,19 +182,44 @@ declare namespace ast {
*/
labelLocation: LocationRange;
/** Expression which result will be available in the user code under name `label`. */
expression: Prefixed | Suffixed | Primary;
expression: Prefixed | Suffixed | Repeated | Primary;
}

/** Expression with a preceding operator. */
interface Prefixed extends Expr<"text" | "simple_and" | "simple_not"> {
expression: Suffixed | Primary;
expression: Suffixed | Repeated | Primary;
}

/** Expression with a following operator. */
interface Suffixed extends Expr<"optional" | "zero_or_more" | "one_or_more"> {
expression: Primary;
}

interface Boundary<T> {
type: T;
location: LocationRange;
}

interface ConstantBoundary extends Boundary<"constant"> {
/** Repetition count. Always a positive integer. */
value: number;
}

type RepeatedBoundary
= ConstantBoundary;

/** Expression repeated from `min` to `max` times. */
interface Repeated extends Expr<"repeated"> {
/**
* Minimum count of repetitions. If `null` then exact repetition
* is used and minimum the same as maximum.
*/
min: RepeatedBoundary | null;
/** Maximum count of repetitions. */
max: RepeatedBoundary;
expression: Primary;
}

type Primary
= RuleReference
| SemanticPredicate
Expand Down Expand Up @@ -607,6 +636,13 @@ export namespace compiler {
* @param args Any arguments passed to the `Visitor`
*/
one_or_more?(node: ast.Suffixed, ...args: any[]): any;
/**
* Default behavior: run visitor on `expression` and return it result
*
* @param node Node, representing repetition of the `expression` specified number of times
* @param args Any arguments passed to the `Visitor`
*/
repeated?(node: ast.Repeated, ...args: any[]): any;
/**
* Default behavior: run visitor on `expression` and return it result
*
Expand Down
33 changes: 33 additions & 0 deletions src/parser.pegjs
Expand Up @@ -197,13 +197,43 @@ SuffixedExpression
location: location()
};
}
/ RepeatedExpression
/ PrimaryExpression

SuffixedOperator
= "?"
/ "*"
/ "+"

RepeatedExpression
= expression:PrimaryExpression __ "|" __ boundaries:Boundaries __ "|" {
let min = boundaries[0];
let max = boundaries[1];
if (max.value === 0) {
error("The maximum count of repetitions of the rule must be > 0", max.location);
}
return {
type: "repeated",
min,
max,
expression,
location: location(),
};
}

Boundaries
= min:Boundary? __ ".." __ max:Boundary? {
return [
min !== null ? min : { type: "constant", value: 0 },
max !== null ? max : { type: "constant", value: null },
];
}
/ exact:Boundary { return [null, exact]; }

Boundary
= value:Integer { return { type: "constant", value, location: location() }; }

PrimaryExpression
= LiteralMatcher
/ CharacterClassMatcher
Expand Down Expand Up @@ -430,6 +460,9 @@ BareCodeBlock
Code
= $((![{}] SourceCharacter)+ / "{" Code "}")*

Integer
= digits:$DecimalDigit+ { return parseInt(digits); }

// Unicode Character Categories
//
// Extracted from the following Unicode Character Database file:
Expand Down

0 comments on commit 4bf607f

Please sign in to comment.