From f5c1d75f6ba1a5c01589c61b4b13bffcb7b7f20b Mon Sep 17 00:00:00 2001 From: Curtis Man Date: Fri, 27 Feb 2026 16:27:32 -0800 Subject: [PATCH] AGR: Add compiler tests and expand grammar compiler functionality Co-Authored-By: Claude Sonnet 4.6 --- .../actionGrammar/src/grammarCompiler.ts | 128 +++++- .../test/grammarCompileError.spec.ts | 388 ++++++++++++++++++ .../actionGrammar/test/grammarMatcher.spec.ts | 28 ++ .../actionGrammar/test/grammarStore.spec.ts | 26 +- 4 files changed, 541 insertions(+), 29 deletions(-) diff --git a/ts/packages/actionGrammar/src/grammarCompiler.ts b/ts/packages/actionGrammar/src/grammarCompiler.ts index fb2d78989..a76e300ce 100644 --- a/ts/packages/actionGrammar/src/grammarCompiler.ts +++ b/ts/packages/actionGrammar/src/grammarCompiler.ts @@ -5,6 +5,7 @@ import { Grammar, GrammarPart, GrammarRule, + RulesPart, StringPart, } from "./grammarTypes.js"; import { @@ -29,6 +30,8 @@ type DefinitionRecord = { pos: number | undefined; grammarRules?: GrammarRule[]; hasValue: boolean; + compiling: boolean; // true while grammarRules is being populated + nullable?: boolean; // set after compilation; true if any alternative matches ε }; type CompletedDefinitionRecord = DefinitionRecord & { @@ -136,6 +139,7 @@ function createCompileContext( pos: def.pos, // Set this to true to allow recursion to assume that it has value. hasValue: true, + compiling: false, }); } else { existing.rules.push(...def.rules); @@ -300,6 +304,8 @@ const emptyRecord = { pos: undefined, grammarRules: [], hasValue: true, // Pretend to have value to avoid cascading errors + compiling: false, + nullable: false, }; /** @@ -360,11 +366,32 @@ function validateVariableReferences( break; } } +// ε-reachable cycle detection +// +// A grammar rule causes an infinite loop at match time when a named rule can +// recurse back to itself without ever consuming a mandatory input token — i.e. +// when the cycle is reachable via ε-transitions (optional parts, rule expansions +// that themselves match ε). +// +// Detection: `epsilonReachable` carries the set of rule names entered since the +// last mandatory input was consumed. When a back-reference is found +// (record.compiling === true) and the rule name is still in that set, the full +// path back to the entry point was traversed without consuming any input, so an +// error is reported. +// +// Nullability note — two asymmetric checks appear at each rule-reference site: +// record.nullable === false → clear currentEpr (only when *definitely* +// non-nullable; undefined/back-ref leaves it +// intact to avoid masking a cycle further along) +// record.nullable ?? false → propagate ruleNullable (treat back-refs +// conservatively as non-nullable) + function createNamedGrammarRules( context: CompileContext, name: string, referencePosition?: number, referenceVariable?: string, + epsilonReachable: Set = new Set(), referenceContext: CompileContext = context, ): CompletedDefinitionRecord { const record = context.ruleDefMap.get(name); @@ -385,18 +412,43 @@ function createNamedGrammarRules( name, referencePosition, referenceVariable, + epsilonReachable, referenceContext, ); } - if (record.grammarRules === undefined) { + if (record.compiling) { + if (epsilonReachable.has(name)) { + // Back-reference reachable without consuming any input: infinite loop at match time + referenceContext.errors.push({ + message: `Rule '<${name}>' creates an epsilon-reachable cycle that would cause an infinite loop at match time`, + definition: referenceContext.currentDefinition, + pos: referencePosition, + }); + } + // else: non-epsilon back-reference (mandatory input consumed before the + // back-ref); just return the incomplete record — the grammar is valid. + } else if (record.grammarRules === undefined) { + const eprWithSelf = new Set(epsilonReachable).add(name); const prev = context.currentDefinition; context.currentDefinition = name; + // Assign an empty sentinel array before setting compiling=true so that + // any non-epsilon re-entrant call (record.compiling=true, name not in + // epsilonReachable) returns [] rather than undefined for grammarRules. record.grammarRules = []; - record.hasValue = createGrammarRules( + record.compiling = true; + // Pass the sentinel as the output array so createGrammarRules pushes + // directly into it. Any RulesPart.rules captured during a circular + // back-reference holds a reference to this same array object and will + // see the populated rules without a separate copy step. + const { hasValue, nullable } = createGrammarRules( context, record.rules, + eprWithSelf, record.grammarRules, ); + record.hasValue = hasValue; + record.compiling = false; + record.nullable = nullable; context.currentDefinition = prev; } @@ -413,26 +465,39 @@ function createNamedGrammarRules( function createGrammarRules( context: CompileContext, rules: Rule[], - grammarRules: GrammarRule[], -) { + epsilonReachable: Set, + out: GrammarRule[] = [], +): { grammarRules: GrammarRule[]; hasValue: boolean; nullable: boolean } { + const grammarRules = out; let hasValue = true; + let nullable = false; // nullable if ANY alternative is nullable for (const r of rules) { - const result = createGrammarRule(context, r); + const result = createGrammarRule(context, r, epsilonReachable); grammarRules.push(result.grammarRule); hasValue = hasValue && result.hasValue; + nullable = nullable || result.nullable; } - return hasValue; + return { grammarRules, hasValue, nullable }; } function createGrammarRule( context: CompileContext, rule: Rule, -): { grammarRule: GrammarRule; hasValue: boolean } { + epsilonReachable: Set, +): { grammarRule: GrammarRule; hasValue: boolean; nullable: boolean } { const { expressions, value } = rule; const parts: GrammarPart[] = []; const availableVariables = new Set(); let variableCount = 0; let defaultValue = false; + // A rule alternative is nullable if ALL of its parts can match ε. + let ruleNullable = true; + let currentEpr = epsilonReachable; + // Call after any part that guarantees consuming ≥1 input token. + const consumedInput = () => { + currentEpr = new Set(); + ruleNullable = false; + }; for (const expr of expressions) { switch (expr.type) { case "string": { @@ -444,6 +509,7 @@ function createGrammarRule( parts.push(part); // default value of the string defaultValue = true; + consumedInput(); // string literals always consume mandatory input break; } case "variable": { @@ -459,25 +525,38 @@ function createGrammarRule( } availableVariables.add(name); if (ruleReference) { - const { grammarRules } = createNamedGrammarRules( + const record = createNamedGrammarRules( context, refName, refPos, name, + currentEpr, ); parts.push({ type: "rules", - rules: grammarRules, + rules: record.grammarRules, variable: name, name: refName, optional: expr.optional, }); + if (!expr.optional) { + // === false: only clear when *definitely* non-nullable. + // undefined (back-ref still compiling) leaves epr intact + // to avoid masking an ε-cycle further along this path. + if (record.nullable === false) currentEpr = new Set(); + // ?? false: treat undefined (back-ref) as non-nullable — + // conservative for nullability propagation, consistent with + // how cycles are broken (they require mandatory input). + ruleNullable = + ruleNullable && (record.nullable ?? false); + } } else if (refName === "number") { parts.push({ type: "number", variable: name, optional: expr.optional, }); + if (!expr.optional) consumedInput(); } else { // Validate type name references // All non-built-in types must be explicitly imported @@ -513,6 +592,7 @@ function createGrammarRule( optional: expr.optional, typeName: refName, }); + if (!expr.optional) consumedInput(); } break; } @@ -536,35 +616,50 @@ function createGrammarRule( // Use defaultValue=true so single-part rules using a phrase set // don't trip the "Start rule does not produce a value" check. defaultValue = true; + consumedInput(); // phrase sets always consume input break; } - const { grammarRules, hasValue } = createNamedGrammarRules( + const record = createNamedGrammarRules( context, expr.name, expr.pos, + undefined, + currentEpr, ); // default value of the rule reference - defaultValue = hasValue; + defaultValue = record.hasValue; parts.push({ type: "rules", - rules: grammarRules, + rules: record.grammarRules, name: expr.name, }); + // RuleRefExpr has no optional modifier; it is always non-optional. + // === false: only clear when *definitely* non-nullable (same + // asymmetry as the variable ruleRef case above). + if (record.nullable === false) { + currentEpr = new Set(); + } + // ?? false: treat undefined (back-ref) as non-nullable. + ruleNullable = ruleNullable && (record.nullable ?? false); break; } case "rules": { const { rules, optional, repeat } = expr; - const grammarRules: GrammarRule[] = []; // default value of the nested rules - defaultValue = createGrammarRules(context, rules, grammarRules); - const rulesPart: import("./grammarTypes.js").RulesPart = { + const { + grammarRules, + hasValue: groupHasValue, + nullable: groupNullable, + } = createGrammarRules(context, rules, currentEpr); + defaultValue = groupHasValue; + const rulesPart: RulesPart = { type: "rules", rules: grammarRules, optional, }; if (repeat) rulesPart.repeat = true; parts.push(rulesPart); - + if (!optional && !groupNullable) consumedInput(); break; } default: @@ -594,5 +689,6 @@ function createGrammarRule( value !== undefined || variableCount === 1 || (variableCount === 0 && parts.length === 1 && defaultValue), + nullable: ruleNullable, }; } diff --git a/ts/packages/actionGrammar/test/grammarCompileError.spec.ts b/ts/packages/actionGrammar/test/grammarCompileError.spec.ts index 0f6ecddf0..78ccfcd1b 100644 --- a/ts/packages/actionGrammar/test/grammarCompileError.spec.ts +++ b/ts/packages/actionGrammar/test/grammarCompileError.spec.ts @@ -321,6 +321,394 @@ describe("Grammar Compiler", () => { }); }); + describe("Epsilon-reachable recursive cycles", () => { + // Using startValueRequired: false to avoid spurious "no value" errors + // in grammars that are only testing cycle detection. + const opts = { startValueRequired: false }; + + describe("Error", () => { + it("direct self-reference = ", () => { + const grammarText = ` = ;`; + const errors: string[] = []; + loadGrammarRulesNoThrow( + "test", + grammarText, + errors, + undefined, + opts, + ); + expect(errors.length).toBe(1); + expect(errors[0]).toContain("error:"); + expect(errors[0]).toContain(""); + }); + + it("self-reference as first part before literal = foo", () => { + const grammarText = ` = foo;`; + const errors: string[] = []; + loadGrammarRulesNoThrow( + "test", + grammarText, + errors, + undefined, + opts, + ); + expect(errors.length).toBe(1); + expect(errors[0]).toContain("error:"); + expect(errors[0]).toContain(""); + }); + + it("mutual epsilon cycle = , = ", () => { + const grammarText = ` + = ; + = ; + = ; + `; + const errors: string[] = []; + loadGrammarRulesNoThrow( + "test", + grammarText, + errors, + undefined, + opts, + ); + expect(errors.length).toBe(1); + expect(errors[0]).toContain("error:"); + expect(errors[0]).toContain(""); + }); + + it("optional inline rule before self-reference = (foo)? ", () => { + const grammarText = ` = (foo)? ;`; + const errors: string[] = []; + loadGrammarRulesNoThrow( + "test", + grammarText, + errors, + undefined, + opts, + ); + expect(errors.length).toBe(1); + expect(errors[0]).toContain("error:"); + expect(errors[0]).toContain(""); + }); + + it("optional part in mutual cycle = (foo)? , = ", () => { + const grammarText = ` + = ; + = (foo)? ; + = ; + `; + const errors: string[] = []; + loadGrammarRulesNoThrow( + "test", + grammarText, + errors, + undefined, + opts, + ); + expect(errors.length).toBe(1); + expect(errors[0]).toContain("error:"); + expect(errors[0]).toContain(""); + }); + + it("inner epsilon cycle despite outer mandatory input: foo , = , = ", () => { + const grammarText = ` + = foo ; + = ; + = ; + `; + const errors: string[] = []; + loadGrammarRulesNoThrow( + "test", + grammarText, + errors, + undefined, + opts, + ); + expect(errors.length).toBe(1); + expect(errors[0]).toContain("error:"); + expect(errors[0]).toContain(""); + }); + + it("three-node epsilon cycle = , = , = ", () => { + const grammarText = ` + = ; + = ; + = ; + = ; + `; + const errors: string[] = []; + loadGrammarRulesNoThrow( + "test", + grammarText, + errors, + undefined, + opts, + ); + expect(errors.length).toBe(1); + expect(errors[0]).toContain("error:"); + expect(errors[0]).toContain(""); + // The back-reference is inside , so the definition attribution + // should point there. + expect(errors[0]).toContain(""); + }); + + it("epsilon cycle in one alternative of multi-alternative rule = foo | ", () => { + const grammarText = ` = foo | ;`; + const errors: string[] = []; + loadGrammarRulesNoThrow( + "test", + grammarText, + errors, + undefined, + opts, + ); + expect(errors.length).toBe(1); + expect(errors[0]).toContain("error:"); + expect(errors[0]).toContain(""); + }); + + it("nullable intermediate rule creates epsilon path = , = (foo)?", () => { + const grammarText = ` + = ; + = ; + = (foo)?; + `; + const errors: string[] = []; + loadGrammarRulesNoThrow( + "test", + grammarText, + errors, + undefined, + opts, + ); + expect(errors.length).toBe(1); + expect(errors[0]).toContain("error:"); + expect(errors[0]).toContain(""); + }); + + it("optional variable self-reference = $(x:)?", () => { + const grammarText = ` = $(x:)?;`; + const errors: string[] = []; + loadGrammarRulesNoThrow( + "test", + grammarText, + errors, + undefined, + opts, + ); + expect(errors.length).toBe(1); + expect(errors[0]).toContain("error:"); + expect(errors[0]).toContain(""); + }); + + it("Kleene star group before self-reference = (foo)* ", () => { + // (foo)* is optional (zero or more), so is reachable via ε + const grammarText = ` = (foo)* ;`; + const errors: string[] = []; + loadGrammarRulesNoThrow( + "test", + grammarText, + errors, + undefined, + opts, + ); + expect(errors.length).toBe(1); + expect(errors[0]).toContain("error:"); + expect(errors[0]).toContain(""); + }); + + it("Kleene star group containing self-reference = ()* foo", () => { + // The first thing attempted inside ()* is itself — ε-cycle + const grammarText = ` = ()* foo;`; + const errors: string[] = []; + loadGrammarRulesNoThrow( + "test", + grammarText, + errors, + undefined, + opts, + ); + expect(errors.length).toBe(1); + expect(errors[0]).toContain("error:"); + expect(errors[0]).toContain(""); + }); + + it("two independent epsilon cycles each reported separately", () => { + const grammarText = ` + = | ; + = ; + = ; + `; + const errors: string[] = []; + loadGrammarRulesNoThrow( + "test", + grammarText, + errors, + undefined, + opts, + ); + expect(errors.length).toBe(2); + expect(errors.some((e) => e.includes(""))).toBe(true); + expect(errors.some((e) => e.includes(""))).toBe(true); + }); + }); + + describe("Valid (no error)", () => { + it("right-recursive with mandatory input = foo ", () => { + const grammarText = ` = foo ;`; + const errors: string[] = []; + loadGrammarRulesNoThrow( + "test", + grammarText, + errors, + undefined, + opts, + ); + expect(errors.length).toBe(0); + }); + + it("mutual recursion with mandatory input in caller = foo , = ", () => { + const grammarText = ` + = ; + = foo ; + = ; + `; + const errors: string[] = []; + loadGrammarRulesNoThrow( + "test", + grammarText, + errors, + undefined, + opts, + ); + expect(errors.length).toBe(0); + }); + + it("non-nullable rule before self-reference = , = foo", () => { + const grammarText = ` + = ; + = ; + = foo; + `; + const errors: string[] = []; + loadGrammarRulesNoThrow( + "test", + grammarText, + errors, + undefined, + opts, + ); + expect(errors.length).toBe(0); + }); + + it("both rules consume mandatory input = foo , = bar ", () => { + const grammarText = ` + = ; + = foo ; + = bar ; + `; + const errors: string[] = []; + loadGrammarRulesNoThrow( + "test", + grammarText, + errors, + undefined, + opts, + ); + expect(errors.length).toBe(0); + }); + + it("number variable before self-reference = $(n:number) ", () => { + const grammarText = ` = $(n:number) ;`; + const errors: string[] = []; + loadGrammarRulesNoThrow( + "test", + grammarText, + errors, + undefined, + opts, + ); + expect(errors.length).toBe(0); + }); + + it("wildcard variable before self-reference = $(x) ", () => { + const grammarText = ` = $(x) ;`; + const errors: string[] = []; + loadGrammarRulesNoThrow( + "test", + grammarText, + errors, + undefined, + opts, + ); + expect(errors.length).toBe(0); + }); + + it("safe recursive alternative alongside non-recursive alternative = foo | bar ", () => { + // The 'bar ' alternative is safe because 'bar' is consumed first. + // The plain 'foo' alternative is trivially non-recursive. + const grammarText = ` = foo | bar ;`; + const errors: string[] = []; + loadGrammarRulesNoThrow( + "test", + grammarText, + errors, + undefined, + opts, + ); + expect(errors.length).toBe(0); + }); + + it("optional self-reference after mandatory literal = foo $(x:)?", () => { + // 'foo' consumes mandatory input, so the optional back-ref is safe. + const grammarText = ` = foo $(x:)?;`; + const errors: string[] = []; + loadGrammarRulesNoThrow( + "test", + grammarText, + errors, + undefined, + opts, + ); + expect(errors.length).toBe(0); + }); + + it("non-nullable multi-alternative intermediate = , = foo | bar", () => { + // has two alternatives, both consuming mandatory input, so it + // is non-nullable and clears the epsilon-reachable set before . + const grammarText = ` + = ; + = ; + = foo | bar; + `; + const errors: string[] = []; + loadGrammarRulesNoThrow( + "test", + grammarText, + errors, + undefined, + opts, + ); + expect(errors.length).toBe(0); + }); + + it("Kleene plus before self-reference = (foo)+ ", () => { + // (foo)+ must match at least one 'foo', so mandatory input is + // consumed before the recursive . + const grammarText = ` = (foo)+ ;`; + const errors: string[] = []; + loadGrammarRulesNoThrow( + "test", + grammarText, + errors, + undefined, + opts, + ); + expect(errors.length).toBe(0); + }); + }); + }); + describe("Type Imports", () => { it("Imported type reference should not error", () => { const grammarText = ` diff --git a/ts/packages/actionGrammar/test/grammarMatcher.spec.ts b/ts/packages/actionGrammar/test/grammarMatcher.spec.ts index 5c3fcc0ae..7f87f9a8e 100644 --- a/ts/packages/actionGrammar/test/grammarMatcher.spec.ts +++ b/ts/packages/actionGrammar/test/grammarMatcher.spec.ts @@ -414,4 +414,32 @@ describe("Grammar Matcher", () => { expect(testMatchGrammar(grammar, "+0x123")).toStrictEqual([]); }); }); + + describe("Recursive rules", () => { + // Regression: when a rule has a non-epsilon back-reference to itself, + // the compiled RulesPart.rules must point to the final populated array, + // not the empty sentinel assigned before compilation begins. + // If the sentinel is captured (bug), the recursive match silently fails. + it("right-recursive rule reference can match multi-token input", () => { + // = foo -> "hit" | bar -> "bar" + // "foo bar": foo consumes mandatory input, then matches "bar" + const g = ` + = foo -> "hit" + | bar -> "bar"; + `; + const grammar = loadGrammarRules("test.grammar", g); + expect(testMatchGrammar(grammar, "foo bar")).toStrictEqual(["hit"]); + }); + + it("right-recursive variable rule can match multi-token input", () => { + // = foo $(x:) -> x | bar -> "bar" + // "foo bar": foo consumes mandatory input, then $(x:) captures "bar" + const g = ` + = foo $(x:) -> x + | bar -> "bar"; + `; + const grammar = loadGrammarRules("test.grammar", g); + expect(testMatchGrammar(grammar, "foo bar")).toStrictEqual(["bar"]); + }); + }); }); diff --git a/ts/packages/actionGrammar/test/grammarStore.spec.ts b/ts/packages/actionGrammar/test/grammarStore.spec.ts index ddf538e74..4c292f74b 100644 --- a/ts/packages/actionGrammar/test/grammarStore.spec.ts +++ b/ts/packages/actionGrammar/test/grammarStore.spec.ts @@ -48,7 +48,7 @@ describe("GrammarStore", () => { const store = new GrammarStore(); await store.addRule({ - grammarText: ' = "play" $(track:string)', + grammarText: " = play $(track:string);", schemaName: "player", sourceRequest: "play Bohemian Rhapsody", actionName: "playTrack", @@ -70,12 +70,12 @@ describe("GrammarStore", () => { const store = new GrammarStore(); await store.addRule({ - grammarText: ' = "play" $(track:string)', + grammarText: " = play $(track:string);", schemaName: "player", }); await store.addRule({ - grammarText: ' = "schedule" $(event:string)', + grammarText: " = schedule $(event:string);", schemaName: "calendar", }); @@ -91,12 +91,12 @@ describe("GrammarStore", () => { const store = new GrammarStore(); await store.addRule({ - grammarText: ' = "play" $(track:string)', + grammarText: " = play $(track:string);", schemaName: "player", }); await store.addRule({ - grammarText: ' = "pause"', + grammarText: " = pause;", schemaName: "player", }); @@ -111,7 +111,7 @@ describe("GrammarStore", () => { const store = new GrammarStore(); store.addRule({ - grammarText: ' = "play" $(track:string)', + grammarText: " = play $(track:string);", schemaName: "player", }); @@ -128,7 +128,7 @@ describe("GrammarStore", () => { const store1 = new GrammarStore(); await store1.addRule({ - grammarText: ' = "play" $(track:string)', + grammarText: " = play $(track:string);", schemaName: "player", sourceRequest: "play music", actionName: "playTrack", @@ -156,7 +156,7 @@ describe("GrammarStore", () => { await store.setAutoSave(true); await store.addRule({ - grammarText: ' = "play" $(track:string)', + grammarText: " = play $(track:string);", schemaName: "player", }); @@ -190,13 +190,13 @@ describe("GrammarStore", () => { const store = new GrammarStore(); await store.addRule({ - grammarText: ' = "play" $(track:string)', + grammarText: " = play $(track:string);", schemaName: "player", sourceRequest: "play music", }); await store.addRule({ - grammarText: ' = "pause"', + grammarText: " = pause;", schemaName: "player", sourceRequest: "pause", }); @@ -205,8 +205,8 @@ describe("GrammarStore", () => { expect(agr).toContain("# Dynamic Grammar Rules for player"); expect(agr).toContain("# 2 rule(s)"); - expect(agr).toContain(' = "play" $(track:string)'); - expect(agr).toContain(' = "pause"'); + expect(agr).toContain(" = play $(track:string);"); + expect(agr).toContain(" = pause;"); expect(agr).toContain('Source: "play music"'); }); @@ -220,7 +220,7 @@ describe("GrammarStore", () => { store.addRule({ grammarText: - ' = "play" $(track:string) -> { actionName: "playTrack", parameters: { track } };', + ' = play $(track:string) -> { actionName: "playTrack", parameters: { track } };', schemaName: "player", });