Skip to content

Commit

Permalink
Ranges (pegjs/pegjs#30): Add ability for use labels as range boundaries
Browse files Browse the repository at this point in the history
Added two new opcodes:
- IF_LT_DYNAMIC: same as IF_LT, but the argument is a reference to the stack variable instead of constant
- IF_GE_DYNAMIC: same as IF_GE, but the argument is a reference to the stack variable instead of constant
  • Loading branch information
Mingun committed Jun 11, 2022
1 parent e303fea commit 2826da9
Show file tree
Hide file tree
Showing 8 changed files with 116 additions and 18 deletions.
11 changes: 9 additions & 2 deletions lib/compiler/asts.js
Expand Up @@ -44,10 +44,17 @@ const asts = {
optional: consumesFalse,
zero_or_more: consumesFalse,
repeated(node) {
// Handle exact case
// If minimum is `null` it is equals to maximum (parsed from `|exact|` syntax)
const min = node.min ? node.min : node.max;

return min.value > 0 ? consumes(node.expression) : false;
// If the low boundary is variable then it can be zero.
// Expression, repeated zero times, does not consume any input
// but always matched - so it does not always consumes on success
if (min.type !== "constant" || min.value === 0) {
return false;
}

return consumes(node.expression);
},
semantic_and: consumesFalse,
semantic_not: consumesFalse,
Expand Down
6 changes: 5 additions & 1 deletion lib/compiler/opcodes.js
Expand Up @@ -28,6 +28,8 @@ const opcodes = {
IF_NOT_ERROR: 15, // IF_NOT_ERROR t, f
IF_LT: 30, // IF_LT min, t, f
IF_GE: 31, // IF_GE max, t, f
IF_LT_DYNAMIC: 32, // IF_LT_DYNAMIC min, t, f
IF_GE_DYNAMIC: 33, // IF_GE_DYNAMIC max, t, f
WHILE_NOT_ERROR: 16, // WHILE_NOT_ERROR b

// Matching
Expand Down Expand Up @@ -64,7 +66,9 @@ const opcodes = {
//
// IF_LT: 30
// IF_GE: 31
// 32-34 reserved for @mingun
// IF_LT_DYNAMIC: 32
// IF_GE_DYNAMIC: 33
// 34 reserved for @mingun
// PUSH_EMPTY_STRING: 35
// PLUCK: 36
};
Expand Down
58 changes: 48 additions & 10 deletions lib/compiler/passes/generate-bytecode.js
Expand Up @@ -122,6 +122,22 @@ const { ALWAYS_MATCH, SOMETIMES_MATCH, NEVER_MATCH } = require("./inference-matc
// interpret(ip + 3 + t, ip + 3 + t + f);
// }
//
// [32] IF_LT_DYNAMIC min, t, f
//
// if (stack.top().length < stack[min]) {
// interpret(ip + 3, ip + 3 + t);
// } else {
// interpret(ip + 3 + t, ip + 3 + t + f);
// }
//
// [33] IF_GE_DYNAMIC max, t, f
//
// if (stack.top().length >= stack[max]) {
// interpret(ip + 3, ip + 3 + t);
// } else {
// interpret(ip + 3 + t, ip + 3 + t + f);
// }
//
// [16] WHILE_NOT_ERROR b
//
// while(stack.top() !== FAILED) {
Expand Down Expand Up @@ -376,16 +392,22 @@ function generateBytecode(ast) {
* @param {number[]} expressionCode Bytecode for parsing repetitions
* @param {import("../../peg").ast.RepeatedBoundary} max Maximum boundary of repetitions.
* If `null`, the maximum boundary is unlimited
* @param {object} context
* @param {number} sp Pointer to the top of the variable stack
*
* @returns {number[]} Bytecode that performs check of the maximum boundary
*/
function buildCheckMax(expressionCode, max) {
function buildCheckMax(expressionCode, max, context, sp) {
if (max.value !== null) {
const checkCode = max.type === "constant"
? [op.IF_GE, max.value]
: [op.IF_GE_DYNAMIC, sp - context.env[max.value]];

// Push `peg$FAILED` - this break loop on next iteration, so |result|
// will contains not more then |max| elements.
return buildCondition(
SOMETIMES_MATCH,
[op.IF_GE, max.value], // if (r.length >= max) stack:[ [elem...] ]
checkCode, // if (r.length >= max) stack:[ [elem...] ]
[op.PUSH_FAILED], // elem = peg$FAILED; stack:[ [elem...], peg$FAILED ]
expressionCode // else
); // elem = expr(); stack:[ [elem...], elem ]
Expand All @@ -402,12 +424,16 @@ function generateBytecode(ast) {
*
* @returns {number[]} Bytecode that performs check of the minimum boundary
*/
function buildCheckMin(expressionCode, min) {
function buildCheckMin(expressionCode, min, context) {
const checkCode = min.type === "constant"
? [op.IF_LT, min.value]
: [op.IF_LT_DYNAMIC, context.sp + 2 - context.env[min.value]];

return buildSequence(
expressionCode, // result = [elem...]; stack:[ pos, [elem...] ]
buildCondition(
SOMETIMES_MATCH,
[op.IF_LT, min.value], // if (result.length < min) {
checkCode, // if (result.length < min) {
[op.POP, op.POP_CURR_POS, // currPos = savedPos; stack:[ ]
// eslint-disable-next-line indent
op.PUSH_FAILED], // result = peg$FAILED; stack:[ peg$FAILED ]
Expand Down Expand Up @@ -697,25 +723,37 @@ function generateBytecode(ast) {
repeated(node, context) {
// Handle case when minimum was literally equals to maximum
const min = node.min ? node.min : node.max;
const hasMin = min.value > 0;
const hasMin = min.type !== "constant" || min.value > 0;
const hasBoundedMax = node.max.type !== "constant" && node.max.value !== null;
const sp = context.sp + (hasMin ? 2 : 1);

const expressionCode = generate(node.expression, {
sp: context.sp + (hasMin ? 2 : 1),
sp,
env: cloneEnv(context.env),
action: null,
});
// Check the high boundary, if it is defined.
const checkMaxCode = buildCheckMax(expressionCode, node.max);
const checkMaxCode = buildCheckMax(
expressionCode, node.max, context, sp
);
// For dynamic high boundary we need check the first iteration, because the result can be
// empty. Constant boundaries does not require that check, because they are always >=1
const firstElemCode = hasBoundedMax
? checkMaxCode
: expressionCode;
const mainLoopCode = buildSequence(
// If the low boundary present, then backtracking is possible, so save the current pos
hasMin ? [op.PUSH_CURR_POS] : [], // var savedPos = curPos; stack:[ pos ]
[op.PUSH_EMPTY_ARRAY], // var result = []; stack:[ pos, [] ]
expressionCode, // var elem = expr(); stack:[ pos, [], elem ]
firstElemCode, // var elem = expr(); stack:[ pos, [], elem ]
buildAppendLoop(checkMaxCode), // while(...)r.push(elem); stack:[ pos, [...], elem|peg$FAILED ]
[op.POP] // stack:[ pos, [elem...] ] (pop elem===`peg$FAILED`)
[op.POP] // stack:[ pos, [...] ] (pop elem===`peg$FAILED`)
);

// Check the low boundary, if it is defined and not |0|.
return hasMin ? buildCheckMin(mainLoopCode, min) : mainLoopCode;
return hasMin
? buildCheckMin(mainLoopCode, min, context)
: mainLoopCode;
},

group(node, context) {
Expand Down
8 changes: 8 additions & 0 deletions lib/compiler/passes/generate-js.js
Expand Up @@ -437,6 +437,14 @@ function generateJS(ast, options) {
compileCondition(stack.top() + ".length >= " + bc[ip + 1], 1);
break;

case op.IF_LT_DYNAMIC: // IF_LT_DYNAMIC min, t, f
compileCondition(stack.top() + ".length < " + stack.index(bc[ip + 1]) + "|0", 1);
break;

case op.IF_GE_DYNAMIC: // IF_GE_DYNAMIC max, t, f
compileCondition(stack.top() + ".length >= " + stack.index(bc[ip + 1]) + "|0", 1);
break;

case op.WHILE_NOT_ERROR: // WHILE_NOT_ERROR b
compileLoop(stack.top() + " !== peg$FAILED");
break;
Expand Down
31 changes: 29 additions & 2 deletions lib/compiler/passes/inference-match-result.js
Expand Up @@ -98,10 +98,37 @@ function inferenceMatchResult(ast) {
one_or_more: inferenceExpression,
repeated(node) {
const match = inference(node.expression);
// Handle exact case
// If minimum is `null` it is equals to maximum (parsed from `|exact|` syntax)
const min = node.min ? node.min : node.max;

return (node.match = min.value > 0 ? match : ALWAYS_MATCH);
// If any boundary are variable - it can be negative, and it that case
// node does not match, but it may be match with some other values
if (min.type !== "constant" || node.max.type !== "constant") {
return (node.match = SOMETIMES_MATCH);
}
// Now both boundaries is constants
// If the upper boundary is zero or minimum exceeds maximum,
// matching is impossible
if (node.max.value === 0
|| node.max.value !== null && min.value > node.max.value
) {
return (node.match = NEVER_MATCH);
}

if (match === NEVER_MATCH) {
// If an expression always fails, a range will also always fail
// (with the one exception - never matched expression repeated
// zero times always match and returns an empty array).
return (node.match = min.value === 0 ? ALWAYS_MATCH : NEVER_MATCH);
}
if (match === ALWAYS_MATCH) {
return (node.match = ALWAYS_MATCH);
}

// Here an expression sometimes match. If it should be repeated at least once
// the whole range sometimes match, otherwise it will always succeeds (at least
// an empty array guaranteed)
return (node.match = min.value === 0 ? ALWAYS_MATCH : SOMETIMES_MATCH);
},
group: inferenceExpression,
semantic_and: sometimesMatch,
Expand Down
9 changes: 8 additions & 1 deletion lib/compiler/passes/report-infinite-repetition.js
Expand Up @@ -35,8 +35,15 @@ function reportInfiniteRepetition(ast, options, session) {
node.location
);
} else {
// If minimum is `null` it is equals to maximum (parsed from `|exact|` syntax)
const min = node.min ? node.min : node.max;

// Because the high boundary is defined, infinity repetition is not possible
// but the grammar will waste of CPU
session.warning(
`An expression always match ${node.max.value} times, because it does not consume any input`,
min.type === "constant" && node.max.type === "constant"
? `An expression may not consume any input and may always match ${node.max.value} times`
: "An expression may not consume any input and may always match with a maximum repetition count",
node.location
);
}
Expand Down
8 changes: 7 additions & 1 deletion lib/peg.d.ts
Expand Up @@ -205,8 +205,14 @@ declare namespace ast {
value: number;
}

interface VariableBoundary extends Boundary<"variable"> {
/** Repetition count - name of the label of the one of preceding expressions. */
value: string;
}

type RepeatedBoundary
= ConstantBoundary;
= ConstantBoundary
| VariableBoundary;

/** Expression repeated from `min` to `max` times. */
interface Repeated extends Expr<"repeated"> {
Expand Down
3 changes: 2 additions & 1 deletion src/parser.pegjs
Expand Up @@ -209,7 +209,7 @@ RepeatedExpression
= expression:PrimaryExpression __ "|" __ boundaries:Boundaries __ "|" {
let min = boundaries[0];
let max = boundaries[1];
if (max.value === 0) {
if (max.type === "constant" && max.value === 0) {
error("The maximum count of repetitions of the rule must be > 0", max.location);
}
Expand All @@ -233,6 +233,7 @@ Boundaries

Boundary
= value:Integer { return { type: "constant", value, location: location() }; }
/ value:IdentifierName { return { type: "variable", value: value[0], location: location() }; }

PrimaryExpression
= LiteralMatcher
Expand Down

0 comments on commit 2826da9

Please sign in to comment.