Skip to content

Commit

Permalink
Ranges (pegjs/pegjs#30): Add ability to use code blocks as range boun…
Browse files Browse the repository at this point in the history
…daries
  • Loading branch information
Mingun committed Jun 11, 2022
1 parent de65bb7 commit 81ea1d4
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 23 deletions.
90 changes: 75 additions & 15 deletions lib/compiler/passes/generate-bytecode.js
Expand Up @@ -387,21 +387,59 @@ function generateBytecode(ast) {
);
}

/**
*
* @param {import("../../peg").ast.RepeatedBoundary} boundary
* @param {{ [label: string]: number}} env Mapping of label names to stack positions
* @param {number} sp Number of the first free slot in the stack
*
* @returns {{ pre: number[], post: number[], sp: number}}
* Bytecode that should be added before and after parsing and new
* first free slot in the stack
*/
function buildRangeCall(boundary, env, sp, offset) {
switch (boundary.type) {
case "constant":
return { pre: [], post: [], sp };
case "variable":
boundary.sp = offset + sp - env[boundary.value];
return { pre: [], post: [], sp };
case "function": {
boundary.sp = offset;

const functionIndex = addFunctionConst(
true,
Object.keys(env),
{ code: boundary.value, codeLocation: boundary.codeLocation }
);

return {
pre: buildCall(functionIndex, 0, env, sp),
post: [op.NIP],
// +1 for the function result
sp: sp + 1,
};
}

// istanbul ignore next Because we never generate invalid boundary type we cannot reach this branch
default:
throw new Error(`Unknown boundary type "${boundary.type}" for the "repeated" node`);
}
}

/* eslint capitalized-comments: "off" */
/**
* @param {number[]} expressionCode Bytecode for parsing repetitions
* @param {import("../../peg").ast.RepeatedBoundary} max Maximum boundary of repetitions.
* If `null`, the maximum boundary is unlimited
* @param {object} context
* @param {number} sp Pointer to the top of the variable stack
*
* @returns {number[]} Bytecode that performs check of the maximum boundary
*/
function buildCheckMax(expressionCode, max, context, sp) {
function buildCheckMax(expressionCode, max) {
if (max.value !== null) {
const checkCode = max.type === "constant"
? [op.IF_GE, max.value]
: [op.IF_GE_DYNAMIC, sp - context.env[max.value]];
: [op.IF_GE_DYNAMIC, max.sp];

// Push `peg$FAILED` - this break loop on next iteration, so |result|
// will contains not more then |max| elements.
Expand All @@ -424,10 +462,10 @@ function generateBytecode(ast) {
*
* @returns {number[]} Bytecode that performs check of the minimum boundary
*/
function buildCheckMin(expressionCode, min, context) {
function buildCheckMin(expressionCode, min) {
const checkCode = min.type === "constant"
? [op.IF_LT, min.value]
: [op.IF_LT_DYNAMIC, context.sp + 2 - context.env[min.value]];
: [op.IF_LT_DYNAMIC, min.sp];

return buildSequence(
expressionCode, // result = [elem...]; stack:[ pos, [elem...] ]
Expand Down Expand Up @@ -725,17 +763,33 @@ function generateBytecode(ast) {
const min = node.min ? node.min : node.max;
const hasMin = min.type !== "constant" || min.value > 0;
const hasBoundedMax = node.max.type !== "constant" && node.max.value !== null;
const sp = context.sp + (hasMin ? 2 : 1);

// +1 for the result slot with an array
// +1 if we have non-constant (i.e. potentially non-zero) or non-zero minimum
// for the position before match for backtracking
const offset = hasMin ? 2 : 1;

// Do not generate function for "minimum" if grammar used `exact` syntax
const minCode = node.min
? buildRangeCall(
node.min,
context.env,
context.sp,
// +1 for the result slot with an array
// +1 for the saved position
// +1 if we have a "function" maximum it occupies an additional slot in the stack
2 + (node.max.type === "function" ? 1 : 0)
)
: { pre: [], post: [], sp: context.sp };
const maxCode = buildRangeCall(node.max, context.env, minCode.sp, offset);

const expressionCode = generate(node.expression, {
sp,
sp: maxCode.sp + offset,
env: cloneEnv(context.env),
action: null,
});
// Check the high boundary, if it is defined.
const checkMaxCode = buildCheckMax(
expressionCode, node.max, context, sp
);
const checkMaxCode = buildCheckMax(expressionCode, node.max);
// For dynamic high boundary we need check the first iteration, because the result can be
// empty. Constant boundaries does not require that check, because they are always >=1
const firstElemCode = hasBoundedMax
Expand All @@ -750,10 +804,16 @@ function generateBytecode(ast) {
[op.POP] // stack:[ pos, [...] ] (pop elem===`peg$FAILED`)
);

// Check the low boundary, if it is defined and not |0|.
return hasMin
? buildCheckMin(mainLoopCode, min, context)
: mainLoopCode;
return buildSequence(
minCode.pre,
maxCode.pre,
// Check the low boundary, if it is defined and not |0|.
hasMin
? buildCheckMin(mainLoopCode, min)
: mainLoopCode,
maxCode.post,
minCode.post
);
},

group(node, context) {
Expand Down
13 changes: 6 additions & 7 deletions lib/compiler/passes/generate-js.js
Expand Up @@ -312,18 +312,14 @@ function generateJS(ast, options) {
parts.push("}");
}

function compileCall() {
const baseLength = 4;
function compileCall(baseLength) {
const paramsLength = bc[ip + baseLength - 1];

const value = f(bc[ip + 1]) + "("
return f(bc[ip + 1]) + "("
+ bc.slice(ip + baseLength, ip + baseLength + paramsLength).map(
p => stack.index(p)
).join(", ")
+ ")";
stack.pop(bc[ip + 2]);
parts.push(stack.push(value));
ip += baseLength + paramsLength;
}

while (ip < end) {
Expand Down Expand Up @@ -524,7 +520,10 @@ function generateJS(ast, options) {
break;

case op.CALL: // CALL f, n, pc, p1, p2, ..., pN
compileCall();
value = compileCall(4);
stack.pop(bc[ip + 2]);
parts.push(stack.push(value));
ip += 4 + bc[ip + 3];
break;

case op.RULE: // RULE r
Expand Down
10 changes: 9 additions & 1 deletion lib/peg.d.ts
Expand Up @@ -210,9 +210,17 @@ declare namespace ast {
value: string;
}

interface FunctionBoundary extends Boundary<"function"> {
/** The code from the grammar. */
value: string;
/** Span that covers all code between `{` and `}`. */
codeLocation: LocationRange;
}

type RepeatedBoundary
= ConstantBoundary
| VariableBoundary;
| VariableBoundary
| FunctionBoundary;

/** Expression repeated from `min` to `max` times. */
interface Repeated extends Expr<"repeated"> {
Expand Down
8 changes: 8 additions & 0 deletions src/parser.pegjs
Expand Up @@ -234,6 +234,14 @@ Boundaries
Boundary
= value:Integer { return { type: "constant", value, location: location() }; }
/ value:IdentifierName { return { type: "variable", value: value[0], location: location() }; }
/ value:CodeBlock {
return {
type: "function",
value: value[0],
codeLocation: value[1],
location: location(),
};
}

PrimaryExpression
= LiteralMatcher
Expand Down

0 comments on commit 81ea1d4

Please sign in to comment.