diff --git a/contrib/nitcc/src/grammar.nit b/contrib/nitcc/src/grammar.nit index 4929198792..85bc12ecab 100644 --- a/contrib/nitcc/src/grammar.nit +++ b/contrib/nitcc/src/grammar.nit @@ -59,70 +59,100 @@ class Gram return res.to_s end - # Inline (ie. remove from the concrete grammar) some production - # REQUIRE: no circular production in `prods` - fun inline(prods: Collection[Production]) + # Check that prod does not depends on itself (no circular dependency). + fun can_inline(prod: Production): Bool do - for p in self.prods do - for a in p.alts.to_a do - if a.phony then continue - var to_inline = false - for e in a.elems do - if e isa Production and prods.has(e) then to_inline = true + for a in prod.alts.to_a do + for e in a.elems do + if prod == e then + return false end - if not to_inline then continue - - if a.codes == null then a.make_codes + end + end + return true + end - var a0 = new Alternative(p, a.name, new Array[Element]) - a0.trans = true - a0.codes = new Array[Code] - var pool = [a0] - var pool2 = new Array[Alternative] - for e in a.elems do - if not e isa Production or not prods.has(e) then - for x in pool do - x.elems.add(e) - x.codes.add(new CodePop) + # Inline `p = A | B` into `{a:} C . p D` produces 2 new alternatives `{a1:} C A D` and `{a2:} C B D` + # Note that A, B, C and D can contain p and will not be modified. + # Note also that `old_alt` will be removed form the CST. + fun inline_element(old_alt: Alternative, pos: Int) + do + var production = old_alt.elems[pos] # The production to inline + assert production isa Production + var old_prod = old_alt.prod # It's production + if old_alt.codes == null then old_alt.make_codes + + for alt in production.alts do + # For each alternative of the production to inline, create a new altednative based on the old one + var name = old_alt.name + "_i" + old_prod.alts.length.to_s + var new_alt = new Alternative(old_prod, name, new Array[Element]) + new_alt.trans = true + old_prod.alts.add(new_alt) + # All the element are the same, except the production replaced by the selected alternative + for i in [0..old_alt.elems.length[ do + if i == pos then + new_alt.elems.add_all(alt.elems) + else + var e = old_alt.elems[i] + new_alt.elems.add(e) + end + end + # Codes should also be updated + # code in the old alternative might be shifted to correspond to the new position of the existing element + # code getting the must be replaced by the whole code of the inlined alternative, also shifted by the right amount + if alt.codes == null then alt.make_codes + new_alt.codes = new Array[Code] + for code in old_alt.codes.as(not null) do + if code isa CodeGet then + if code.pos == pos then + for code2 in alt.codes.as(not null) do + new_alt.codes.add(code2.shift(pos)) end - continue + else if code.pos >= pos then + # some elements are added but one (the inlined production) is removed + new_alt.codes.add(code.shift(alt.elems.length - 1)) + else + new_alt.codes.add(code) end - if p == e then - print "Circular inlining on {p} :: {a}" - abort - end - pool2.clear - for a2 in e.alts do - if a.phony then continue - if a2.codes == null then a2.make_codes - for x in pool do - var name = a.name + "_" + pool2.length.to_s - var na = new Alternative(p, name, new Array[Element]) - na.trans = true - pool2.add(na) - na.elems.add_all(x.elems) - na.elems.add_all(a2.elems) - na.codes = new Array[Code] - na.codes.add_all(x.codes.as(not null)) - na.codes.add_all(a2.codes.as(not null)) + else + new_alt.codes.add(code) + end + end + #print "old «{old_alt}» {old_alt.codes or else "?"}" + #print "inl «{alt}» {alt.codes or else "?"}" + #print "new «{new_alt}» {new_alt.codes or else "?"}" + end + if not old_alt.trans then + old_prod.ast_alts.add(old_alt) + end + old_prod.alts.remove(old_alt) + end + + # Inline all occurrences of a production and delete it from the CST. + # Require `can_inline(prod)` + fun inline_prod(prod: Production) + do + var changed = true + while changed do + changed = false + for p in self.prods do + for a in p.alts.to_a do + for i in [0..a.elems.length[ do + var e = a.elems[i] + if e != prod then continue + if p == prod then + print "circular" + abort end + inline_element(a, i) + changed = true + break end - var tmp = pool - pool = pool2 - pool2 = tmp - end - for x in pool do - x.codes.add(a.codes.last) end - p.ast_alts.add(a) - p.alts.remove(a) - p.alts.add_all(pool) end end - for p in prods do - self.prods.remove(p) - self.ast_prods.add(p) - end + self.prods.remove(prod) + self.ast_prods.add(prod) end # The starting production in the augmented grammar @@ -312,9 +342,9 @@ class Gram prod.acname = "Nodes[{e.acname}]" prods.add(prod) var alt1 = prod.new_alt("{cname}_one", e) - alt1.codes = [new CodeNewNodes(alt1), new CodePop, new CodeAdd: Code] + alt1.codes = [new CodeNewNodes(alt1), new CodeGet(0), new CodeAdd: Code] var alt2 = prod.new_alt("{cname}_more", prod, e) - alt2.codes = [new CodePop, new CodePop, new CodeAdd: Code] + alt2.codes = [new CodeGet(0), new CodeGet(1), new CodeAdd: Code] plusizes[e] = prod return prod end @@ -343,7 +373,7 @@ class Gram prod.acname = "nullable {e.acname}" prods.add(prod) var a1 = prod.new_alt("{cname}_one", e) - a1.codes = [new CodePop] + a1.codes = [new CodeGet(0)] var a0 = prod.new_alt0("{cname}_none") a0.codes = [new CodeNull] quesizes[e] = prod @@ -571,8 +601,10 @@ class Alternative if codes != null then return var codes = new Array[Code] self.codes = codes + var i = 0 for e in elems do - codes.add(new CodePop) + codes.add(new CodeGet(i)) + i += 1 end codes.add(new CodeNew(self)) end @@ -581,12 +613,15 @@ end # A step in the construction of the AST. # Used to model transformations interface Code + # self or a CodeGet increased by `d`. Is used by inlining. + fun shift(d: Int): Code do return self end -# Get a element from the stack -class CodePop +class CodeGet super Code - redef fun to_s do return "pop" + var pos: Int + redef fun to_s do return "get{pos}" + redef fun shift(d) do return new CodeGet(pos+d) end # Allocate a new AST node for an alternative using the correct number of popped elements diff --git a/contrib/nitcc/src/lrautomaton.nit b/contrib/nitcc/src/lrautomaton.nit index a60ce3f3ae..5b74d8e7d8 100644 --- a/contrib/nitcc/src/lrautomaton.nit +++ b/contrib/nitcc/src/lrautomaton.nit @@ -411,7 +411,7 @@ redef class Generator add "end" for p in gram.prods do - add "class Goto_{p.cname}" + add "private class Goto_{p.cname}" add "\tsuper LRGoto" for s in p.gotos do if s.gotos.length <= 1 then continue @@ -493,9 +493,8 @@ redef class Generator i = 0 var st = new Array[String] for c in alt.codes.as(not null) do - if c isa CodePop then - st.add "n{i}" - i += 1 + if c isa CodeGet then + st.add "n{c.pos}" else if c isa CodeNull then st.add "null" else if c isa CodeNew then @@ -527,9 +526,15 @@ redef class Generator var a1 = st.pop var a0 = st.last add "\t\t{a0}.children.add({a1})" + else + abort end end - assert st.length == 1 + assert st.length == 1 else + print alt + print st + print alt.codes or else "?" + end add "\t\tvar prod = {st.first}" add "\t\tparser.node_stack.push prod" diff --git a/contrib/nitcc/src/nitcc_semantic.nit b/contrib/nitcc/src/nitcc_semantic.nit index f78084cea6..33e175b0b0 100644 --- a/contrib/nitcc/src/nitcc_semantic.nit +++ b/contrib/nitcc/src/nitcc_semantic.nit @@ -59,12 +59,12 @@ class CollectNameVisitor v2.enter_visit(n) # Inline all the `?` - gram.inline(v2.gram.quesizes.values) + for p in v2.gram.quesizes.values do gram.inline_prod(p) # Inline all the prods suffixed by '_inline' #TODO use a real keyword for p in gram.prods do if not p.name.has_suffix("_inline") then continue print "inline {p}" - gram.inline([p]) + gram.inline_prod(p) end # Build the NFA automaton @@ -401,7 +401,7 @@ redef class Npriority # Inject a new alternative that goes to the next less priority class var alt = prod.new_alt2(prod.name + "_" + prod.alts.length.to_s, [next.as(not null)]) alt.trans = true - alt.codes = [new CodePop] + alt.codes = [new CodeGet(0)] v.pri = null v.prod = old @@ -503,7 +503,7 @@ redef class Nalt self.alt = alt if v.trans then alt.trans = true - alt.codes = [new CodePop] + alt.codes = [new CodeGet(0)] end end end diff --git a/contrib/nitcc/tests/sav/trans_inline2.input.res b/contrib/nitcc/tests/sav/trans_inline2.input.res new file mode 100644 index 0000000000..2094c30052 --- /dev/null +++ b/contrib/nitcc/tests/sav/trans_inline2.input.res @@ -0,0 +1,9 @@ +Start + p + 'a'@(1:1-1:2) + 'b'@(1:2-1:3) + q_inline_1 + 'z'@(1:3-1:4) + 'c'@(1:4-1:5) + 'd'@(1:5-1:6) + Eof@(1:6-1:6)='' diff --git a/contrib/nitcc/tests/sav/trans_inline3.input.res b/contrib/nitcc/tests/sav/trans_inline3.input.res new file mode 100644 index 0000000000..47481afb6c --- /dev/null +++ b/contrib/nitcc/tests/sav/trans_inline3.input.res @@ -0,0 +1,13 @@ +Start + p + 'a'@(1:1-1:2) + 'b'@(1:2-1:3) + q_inline + 'x'@(1:3-1:4) + 'y'@(1:4-1:5) + 'c'@(1:5-1:6) + q_inline + 'x'@(1:6-1:7) + 'y'@(1:7-1:8) + 'd'@(1:8-1:9) + Eof@(1:9-1:9)='' diff --git a/contrib/nitcc/tests/sav/trans_inline4.input.res b/contrib/nitcc/tests/sav/trans_inline4.input.res new file mode 100644 index 0000000000..1442e395c4 --- /dev/null +++ b/contrib/nitcc/tests/sav/trans_inline4.input.res @@ -0,0 +1,12 @@ +Start + p + 'a'@(1:1-1:2) + 'b'@(1:2-1:3) + q_inline_0 + 'x'@(1:3-1:4) + 'y'@(1:4-1:5) + 'c'@(1:5-1:6) + q_inline_1 + 'z'@(1:6-1:7) + 'd'@(1:7-1:8) + Eof@(1:8-1:8)='' diff --git a/contrib/nitcc/tests/sav/trans_inline5.input.res b/contrib/nitcc/tests/sav/trans_inline5.input.res new file mode 100644 index 0000000000..f5dd317287 --- /dev/null +++ b/contrib/nitcc/tests/sav/trans_inline5.input.res @@ -0,0 +1,8 @@ +Start + p + 'a'@(1:1-1:2) + 'b'@(1:2-1:3) + q_inline + 'c'@(1:3-1:4) + 'd'@(1:4-1:5) + Eof@(1:5-1:5)='' diff --git a/contrib/nitcc/tests/trans_inline2.input b/contrib/nitcc/tests/trans_inline2.input new file mode 100644 index 0000000000..554cf3e2f3 --- /dev/null +++ b/contrib/nitcc/tests/trans_inline2.input @@ -0,0 +1 @@ +abzcd \ No newline at end of file diff --git a/contrib/nitcc/tests/trans_inline2.sablecc b/contrib/nitcc/tests/trans_inline2.sablecc new file mode 100644 index 0000000000..f192d99a5a --- /dev/null +++ b/contrib/nitcc/tests/trans_inline2.sablecc @@ -0,0 +1,6 @@ +Grammar trans_inline; + +Parser + +p = 'a' 'b' q_inline 'c' 'd'; +q_inline = 'x' 'y' | 'z'; diff --git a/contrib/nitcc/tests/trans_inline3.input b/contrib/nitcc/tests/trans_inline3.input new file mode 100644 index 0000000000..0f1a3cf7bd --- /dev/null +++ b/contrib/nitcc/tests/trans_inline3.input @@ -0,0 +1 @@ +abxycxyd \ No newline at end of file diff --git a/contrib/nitcc/tests/trans_inline3.sablecc b/contrib/nitcc/tests/trans_inline3.sablecc new file mode 100644 index 0000000000..d55b55520b --- /dev/null +++ b/contrib/nitcc/tests/trans_inline3.sablecc @@ -0,0 +1,6 @@ +Grammar trans_inline; + +Parser + +p = 'a' 'b' q_inline 'c' q_inline 'd'; +q_inline = 'x' 'y' ; diff --git a/contrib/nitcc/tests/trans_inline4.input b/contrib/nitcc/tests/trans_inline4.input new file mode 100644 index 0000000000..05890242c8 --- /dev/null +++ b/contrib/nitcc/tests/trans_inline4.input @@ -0,0 +1 @@ +abxyczd \ No newline at end of file diff --git a/contrib/nitcc/tests/trans_inline4.sablecc b/contrib/nitcc/tests/trans_inline4.sablecc new file mode 100644 index 0000000000..9ebfe240fa --- /dev/null +++ b/contrib/nitcc/tests/trans_inline4.sablecc @@ -0,0 +1,6 @@ +Grammar trans_inline; + +Parser + +p = 'a' 'b' q_inline 'c' q_inline 'd'; +q_inline = 'x' 'y' | 'z' ; diff --git a/contrib/nitcc/tests/trans_inline5.input b/contrib/nitcc/tests/trans_inline5.input new file mode 100644 index 0000000000..85df50785d --- /dev/null +++ b/contrib/nitcc/tests/trans_inline5.input @@ -0,0 +1 @@ +abcd \ No newline at end of file diff --git a/contrib/nitcc/tests/trans_inline5.sablecc b/contrib/nitcc/tests/trans_inline5.sablecc new file mode 100644 index 0000000000..ec17e8664f --- /dev/null +++ b/contrib/nitcc/tests/trans_inline5.sablecc @@ -0,0 +1,6 @@ +Grammar trans_inline; + +Parser + +p = 'a' 'b' q_inline 'c' 'd'; +q_inline = Empty ;