From 60e558f31ab812dbab9a6ac5d017d73da67b3306 Mon Sep 17 00:00:00 2001 From: Tim Vieira Date: Wed, 3 Jul 2013 12:03:27 -0400 Subject: [PATCH] improvements to `examples/ptb.dyna` - (no longer a complete mess thanks to the new `load` commands) - does binarization - extracts and counts rules in a s-expression tree. - parses a sentence given as a string AND extracts the optimal parse by following backpointers. sexpr loader uses nested lists as it's representation 'in'(+,+)+ asserts second arg is a list. retract_rule: no longer crashes when retracting a backchained rule. There are possible bugs because we haven't deleted the memo tables affected by that rule. REPL: show changes after 'load' commands String display format should no longer render special characters like '\n' --- examples/ptb.dyna | 212 ++++++++--------------- src/Dyna/Backend/Python/Backend.hs | 2 +- src/Dyna/Backend/Python/interpreter.py | 32 +--- src/Dyna/Backend/Python/load/__init__.py | 2 +- src/Dyna/Backend/Python/load/sexpr.py | 26 +-- src/Dyna/Backend/Python/repl.py | 8 +- src/Dyna/Backend/Python/stdlib.py | 20 ++- src/Dyna/Backend/Python/term.py | 11 +- src/Dyna/Backend/Python/utils.py | 2 +- test/repl/english.par | 1 + 10 files changed, 125 insertions(+), 191 deletions(-) diff --git a/examples/ptb.dyna b/examples/ptb.dyna index 4a9cbd7..9e7990e 100644 --- a/examples/ptb.dyna +++ b/examples/ptb.dyna @@ -1,142 +1,34 @@ -sentence(0) := - &t("S", &t("NP-SBJ", &t("NNP", "Rolls-Royce"), - &t("@NP-SBJ", &t("NNP", "Motor"), - &t("@@NP-SBJ", &t("NNPS", "Cars"), - &t("NNP", "Inc.")))), - &t("@S", &t("VP", &t("VBD", "said"), - &t("SBAR", &t("-NONE-", "0"), - &t("S", &t("NP-SBJ", &t("PRP", "it")), - &t("VP", &t("VBZ", "expects"), - &t("S", &t("NP-SBJ", &t("PRP$", "its"), - &t("@NP-SBJ", &t("NNP", "U.S."), - &t("NNS", "sales"))), - &t("VP", &t("TO", "to"), - &t("VP", &t("VB", "remain"), - &t("@VP", &t("ADJP-PRD", &t("JJ", "steady")), - &t("@@VP", &t("PP-LOC-CLR", &t("IN", "at"), - &t("NP", &t("QP", &t("IN", "about"), - &t("CD", "1,200")), - &t("NNS", "cars"))), - &t("PP-TMP", &t("IN", "in"), - &t("NP", &t("CD", "1990")))))))))))), - &t(".", "."))). - -sentence(1) := - &t("S", &t("NP-SBJ", &t("DT", "The"), - &t("@NP-SBJ", &t("NN", "luxury"), - &t("@@NP-SBJ", &t("NN", "auto"), - &t("NN", "maker")))), - &t("@S", &t("NP-TMP", &t("JJ", "last"), - &t("NN", "year")), - &t("VP", &t("VBD", "sold"), - &t("@VP", &t("NP", &t("CD", "1,214"), - &t("NNS", "cars")), - &t("PP-LOC", &t("IN", "in"), - &t("NP", &t("DT", "the"), - &t("NNP", "U.S."))))))). - -sentence(2) := - &t("S", &t("NP-SBJ", &t("NP", &t("NNP", "Howard"), - &t("NNP", "Mosher")), - &t("@NP-SBJ", &t(",", ","), - &t("@@NP-SBJ", &t("NP", &t("NP", &t("NN", "president")), - &t("@NP", &t("CC", "and"), - &t("NP", &t("JJ", "chief"), - &t("@NP", &t("NN", "executive"), - &t("NN", "officer"))))), - &t(",", ",")))), - &t("@S", &t("VP", &t("VBD", "said"), - &t("SBAR", &t("-NONE-", "0"), - &t("S", &t("NP-SBJ", &t("PRP", "he")), - &t("VP", &t("VBZ", "anticipates"), - &t("NP", &t("NP", &t("NN", "growth")), - &t("@NP", &t("PP", &t("IN", "for"), - &t("NP", &t("DT", "the"), - &t("@NP", &t("NN", "luxury"), - &t("@@NP", &t("NN", "auto"), - &t("NN", "maker"))))), - &t("PP-LOC", &t("PP", &t("IN", "in"), - &t("NP", &t("NNP", "Britain"), - &t("@NP", &t("CC", "and"), - &t("NNP", "Europe")))), - &t("@PP-LOC", &t(",", ","), - &t("@@PP-LOC", &t("CC", "and"), - &t("PP", &t("IN", "in"), - &t("NP", &t("ADJP", &t("JJ", "Far"), - &t("JJ", "Eastern")), - &t("NNS", "markets")))))))))))), - &t(".", "."))). - -%toString(A) := -% needs(A), -% A. -% -%toString(&t(A)) := -% needs(&t(A)), -% A. -% -%toString(&t(A,B)) := -% needs(&t(A,B)), -% mod("(%s %s)", -% tuple(toString(A), -% toString(B))). -% -%toString(&t(A,B,C)) := -% needs(&t(A,B,C)), -% mod("(%s %s %s)", -% tuple(toString(A), -% toString(B), -% toString(C))). - -needs(A) |= needs(&t(A,B)). -needs(B) |= needs(&t(A,B)). - -needs(A) |= needs(&t(A,B,C)). -needs(B) |= needs(&t(A,B,C)). -needs(C) |= needs(&t(A,B,C)). - -%needs(sentence(0)). -%needs(sentence(1)). -%needs(sentence(2)). - -%zzz(0) := toString(sentence(0)). -%zzz(1) := toString(sentence(1)). -%zzz(2) := toString(sentence(2)). - -%sym(&t(X,_)) := X. -%sym(&t(X,_,_)) := X. - % the following rule order is important bc we use :=. -sym(A) := needs(A), A. -sym(&t(A,B)) := needs(&t(A,B)), A. -sym(&t(A,B,C)) := needs(&t(A,B,C)), A. +:- backchain sym/1. +sym(A) := A. +sym([A|_]) := A. % rule used to create top-level subtree -rules(&t(X,Y)) := needs(&t(X,Y)), &r(X, sym(Y)). -rules(&t(X,Y,Z)) := needs(&t(X,Y,Z)), needs(Z), &r(X, sym(Y), sym(Z)). - -:- dispos &t(&). -:- dispos &t(&,&). -:- dispos &t(&,&,&). +:- backchain rule/1. +rule([X,Y]) := &r(X, sym(Y)). +rule([X,Y,Z]) := &r(X, sym(Y), sym(Z)). % count the number of words in a subtree :- backchain numwords/1. numwords(X) := 1. -numwords(t(X,Y)) := numwords(Y). -numwords(t(X,Y,Z)) := numwords(Y) + numwords(Z). +numwords([X,Y]) := numwords(Y). +numwords([X,Y,Z]) := numwords(Y) + numwords(Z). % extract word and it's position in tree :- backchain word/2. word(0, X) := X. -word(I, t(X,Y)) := word(I, Y). -word(I, t(X,Y,Z)) := I < numwords(Y), word(I, Y). -word(I, t(X,Y,Z)) := I >= numwords(Y), word(I-numwords(Y), Z). - -%senword(S, I) := word(I, sentence(S)), between(0,I,numwords(S)). +word(I, [X,Y]) := word(I, Y). +word(I, [X,Y,Z]) := I < numwords(Y), word(I, Y). +word(I, [X,Y,Z]) := I >= numwords(Y), word(I-numwords(Y), Z). % unnormalized -c(X,Y) += r(X,Y) is rules(&t(X1,Y1)), 1. -c(X,Y,Z) += r(X,Y,Z) is rules(&t(X1,Y1,Z1)), 1. +c(X,Y) += 1 + for &r(X,Y) in f(T), + T is b(tree(_)). + +c(X,Y,Z) += 1 + for &r(X,Y,Z) in f(T), + T is b(tree(_)). % normalizing constants n(X) += c(X,Y). @@ -146,13 +38,61 @@ n(X) += c(X,Y,Z). p(X,Y) := c(X,Y) / n(X). p(X,Y,Z) := c(X,Y,Z) / n(X). -% activate fake backchaining. -needs(sentence(0)). -needs(sentence(1)). -needs(sentence(2)). - -% TODO: convert tree structure into word(word, position, sentence). - -% TODO: augment CKY rules with sentence index - -% TODO: implement constituent recall accuracy measure for two trees +% extract rules in a tree +:- backchain f/1. +f([X,Y]) bag= rule([X,Y]). +f([X,Y,Z]) bag= rule([X,Y,Z]). +f([X,Y]) bag= A for A in f(X). +f([X,Y]) bag= A for A in f(Y). +f([X,Y,Z]) bag= A for A in f(X). +f([X,Y,Z]) bag= A for A in f(Y). +f([X,Y,Z]) bag= A for A in f(Z). + +%zzz(I) := +% T is b(tree(0)), +% I in pycall("range", 0, numwords(T)), +% word(I, T). + +% binarize s-expression tree +:- backchain b/1. +b(X) := X. +b([X,Y]) := [X,b(Y)]. +b([X,Y,Z|Xs]) := [X, b(Y), b([&'@'(X), Z|Xs])]. +b([X,Y,Z]) := [X,b(Y),b(Z)]. + +% CKY parser +phrase(X,I,K) max= phrase(Y,I,J) * phrase(Z,J,K) * p(X,Y,Z). +phrase(X,I,K) max= phrase(Y,I,K) * p(X,Y). +phrase(X,I,I+1) max= 1 for [I,X] in enumerate(sentence). + +% backpointers +bk(X,I,K) argmax= [phrase(Y,I,J) * phrase(Z,J,K) * p(X,Y,Z), [[Y,I,J], [Z,J,K]]]. +bk(X,I,K) argmax= [phrase(Y,I,K) * p(X,Y), [[Y,I,K]]]. +bk(X,I,I+1) argmax= [1, X] for [I,X] in enumerate(sentence). + +% extract path from backpointers +path(X,I,K) := W is bk(X,I,K), W. +path(X,I,K) := [[Y,I,J], [Z,J,K]] is bk(X,I,K), [X, path(Y,I,J), path(Z,J,K)]. +path(X,I,K) := [[Y,I,K]] is bk(X,I,K), [X, path(Y,I,K)]. + +% utility function. +:- backchain enumerate/2. +:- backchain enumerate/1. +enumerate(X) = enumerate(X, 0). +enumerate([], I) = []. +enumerate([X|Xs], I) = [[I,X] | enumerate(Xs, I+1)]. + +:- backchain length/1. +length([]) = 0. +length([X|Xs]) = 1 + length(Xs). + +% declare a sentence. +sentence = split("George love -s Laura ."). + +%sentence(S, I) = +% T is b(tree(S)), +% I in pycall("range", 0, numwords(T)), +% word(I, T). + +zgoal(sentence) max= phrase("ROOT", 0, length(sentence)). +zzgoal(sentence) := path("ROOT", 0, length(sentence)). diff --git a/src/Dyna/Backend/Python/Backend.hs b/src/Dyna/Backend/Python/Backend.hs index e48e3d3..00fc65d 100644 --- a/src/Dyna/Backend/Python/Backend.hs +++ b/src/Dyna/Backend/Python/Backend.hs @@ -176,7 +176,7 @@ constants = go go ("int",_) = Just $ PDBS $ call "int" [] go ("pycall",_) = Just $ PDBS $ call "pycall" [] - go ("in",2) = Just $ PDBS $ infixOp " in " + go ("in",2) = Just $ PDBS $ call "in_list" [] go ("<=",2) = Just $ PDBS $ infixOp "<=" go ("<",2) = Just $ PDBS $ infixOp "<" diff --git a/src/Dyna/Backend/Python/interpreter.py b/src/Dyna/Backend/Python/interpreter.py index 2cde747..50f1824 100644 --- a/src/Dyna/Backend/Python/interpreter.py +++ b/src/Dyna/Backend/Python/interpreter.py @@ -44,11 +44,6 @@ TODO - TODO: True and 1 are equivalent. This sometimes leads to strange behavior. - - Hide all *.plan.py files - - - multi-line input in REPL. Consider using a fancier library such as cmd2 or - ipython's. - FASTER ====== @@ -98,13 +93,6 @@ USERS items proved: is it counting to infinity?). -NOTES -===== - - - `None` does not propagate, eventually it will because of the `?` prefix - operator. - - JUST FOR FUN ============ @@ -319,9 +307,11 @@ class Interpreter(object): xs.remove(u) assert u not in xs, 'Several occurrences of u in xs' # Step 2: run initializer in delete mode - rule.init(emit=self.delete_emit) - # Step 3; go! - return self.go() + if rule.init is not None: + rule.init(emit=self.delete_emit) + # Step 3; go! + return self.go() + # TODO: probably have to blast any memos from BC computations def go(self): try: @@ -622,13 +612,6 @@ def main(): crash_handler() - -# def pickle_interp(): -# import subprocess -# subprocess.Popen(['tar', 'czf', dotdynadir / 'crash.tar.gz', dotdynadir]) -# crash_handler.interp = pickle_interp - - if args.source: if not os.path.exists(args.source): @@ -672,8 +655,6 @@ def main(): print e exit(1) - interp.dump_charts(args.output) # should be a post-processor - if args.load: for cmd in args.load: load.run(interp, cmd) @@ -682,6 +663,9 @@ def main(): for cmd in args.post_process: post.run(interp, cmd) + if args.load or args.post_process or args.source: + interp.dump_charts(args.output) # should be a post-processor + if args.interactive or not args.source: interp.repl() diff --git a/src/Dyna/Backend/Python/load/__init__.py b/src/Dyna/Backend/Python/load/__init__.py index a733071..c5f3d0d 100644 --- a/src/Dyna/Backend/Python/load/__init__.py +++ b/src/Dyna/Backend/Python/load/__init__.py @@ -18,4 +18,4 @@ def run(interp, line): m = get_module('load', module)(interp, name) exec 'm.main(%s)' % args - interp.go() + return interp.go() diff --git a/src/Dyna/Backend/Python/load/sexpr.py b/src/Dyna/Backend/Python/load/sexpr.py index ea334fc..1653b2d 100644 --- a/src/Dyna/Backend/Python/load/sexpr.py +++ b/src/Dyna/Backend/Python/load/sexpr.py @@ -1,5 +1,6 @@ from cStringIO import StringIO from utils import parse_sexpr +from stdlib import todynalist class sexpr(object): @@ -15,8 +16,8 @@ class sexpr(object): ======== trees/1 ======= - trees(0) = node("a", node("b", "c"), node("d", "e")) - trees(1) = node("a", "b", "c") + trees(0) = ["a", ["b", "c"], ["d", "e"]] + trees(1) = ["a", "b", ["c"]] """ @@ -35,30 +36,11 @@ class sexpr(object): interp.new_fn(fn, ':=') return interp.build(fn, *a) - def node(*a): - fn = '%s/%s' % ('node', len(a)) - if interp.agg_name[fn] is None: - interp.new_fn(fn, ':=') - return interp.build(fn, *a) - def t(xs): if isinstance(xs, basestring): return xs else: - if len(xs) == 1: -# [sym] = map(t, xs) -# return node(sym, ()) - return t(xs[0]) # doesn't have a label. - elif len(xs) == 2: - [sym, a] = map(t, xs) - return node(sym, a) - elif len(xs) == 3: - [sym, a, b] = map(t, xs) - return node(sym, a, b) - else: - [sym, a] = t(xs[0]), t(xs[1]) - rest = t(['@' + xs[0]] + xs[2:]) - return node(sym, a, rest) + return todynalist([t(x) for x in xs]) contents = file(filename).read() diff --git a/src/Dyna/Backend/Python/repl.py b/src/Dyna/Backend/Python/repl.py index e0dfc04..93b5e23 100644 --- a/src/Dyna/Backend/Python/repl.py +++ b/src/Dyna/Backend/Python/repl.py @@ -394,10 +394,12 @@ class REPL(cmd.Cmd, object): """ try: - load.run(self.interp, line) + changed = load.run(self.interp, line) except: show_traceback() readline.write_history_file(self.hist) + else: + self._changed(changed) def do_post(self, line): """ @@ -414,10 +416,12 @@ class REPL(cmd.Cmd, object): """ try: - post.run(self.interp, line) + changed = post.run(self.interp, line) except: show_traceback() readline.write_history_file(self.hist) + else: + self._changed(changed) def do_trace(self, q): """ diff --git a/src/Dyna/Backend/Python/stdlib.py b/src/Dyna/Backend/Python/stdlib.py index f5d2965..0b62668 100644 --- a/src/Dyna/Backend/Python/stdlib.py +++ b/src/Dyna/Backend/Python/stdlib.py @@ -45,9 +45,13 @@ def todynalist(x): return _todynalist(list(x)) def _todynalist(x): - if not x: - return Nil - return Cons(x[0], _todynalist(x[1:])) +# if not x: +# return Nil +# return Cons(x[0], _todynalist(x[1:])) + c = Nil + for y in reversed(x): + c = Cons(y, c) + return c def get(x, i): return x[i] @@ -56,3 +60,13 @@ def iter_cons(x): if not (isinstance(x, Cons) or x is Nil): raise TypeError("Attemping to iterate something which isn't a list.") return x + +def in_list(x, a): + if not (isinstance(a, Cons) or a is Nil): + raise TypeError("Attemping to iterate something which isn't a list.") + return x in a.aslist + +# should probably be done with memoized backchaining... +def read_lines(filename): + with file(filename) as f: + return f.readlines() diff --git a/src/Dyna/Backend/Python/term.py b/src/Dyna/Backend/Python/term.py index fa2e069..3d5e96d 100644 --- a/src/Dyna/Backend/Python/term.py +++ b/src/Dyna/Backend/Python/term.py @@ -51,9 +51,10 @@ class Term(object): class Cons(Term): def __init__(self, head, tail): + if not (isinstance(tail, Cons) or tail is Nil): + raise TypeError('Malformed list') self.head = head self.tail = tail - assert isinstance(tail, (Cons, _Nil)), tail Term.__init__(self, 'cons/2', (head, tail)) self.aggregator = Aggregator() self.aslist = [self.head] + self.tail.aslist @@ -85,5 +86,13 @@ class _Nil(Term): self.aslist = [] def __repr__(self): return '[]' + def __iter__(self): + for a in self.aslist: + if not isinstance(a, Term): + yield a, (None,), a + else: + yield a, (None,), a + def __contains__(self, x): + return False Nil = _Nil() diff --git a/src/Dyna/Backend/Python/utils.py b/src/Dyna/Backend/Python/utils.py index d66b007..b1299b3 100644 --- a/src/Dyna/Backend/Python/utils.py +++ b/src/Dyna/Backend/Python/utils.py @@ -15,7 +15,7 @@ def _repr(x): return 'null' elif isinstance(x, basestring): # dyna doesn't accept single-quoted strings - return '"%s"' % x.replace('"', r'\"') + return '"%s"' % repr(x)[1:-1].replace('"', r'\"') else: return repr(x) diff --git a/test/repl/english.par b/test/repl/english.par index 6bdac28..e7c49ba 100644 --- a/test/repl/english.par +++ b/test/repl/english.par @@ -20,3 +20,4 @@ (ROOT (S (NP Papa) (VP (Modal would) (VP (V have) (VP (V (V eat) -ed) (NP (Det his) (N (N sandwich) -s)))))) .) (ROOT (S (NP (Det every) (N sandwich)) (VP (V was) (VP (V (V go) -ing) (VP (V to (V have)) (VP (V been) (Adj delicious)))))) .) (ROOT (S (NP (NP (Det the) (N (Adj (Adj fine) and (Adj blue)) (N woman))) and (NP (Det every) (N man))) (VP (VP (Modal must) (VP (V have) (VP (VP (V (V eat) -ed) (NP (Det two) (N (N sandwich) -s))) and (VP (V (V sleep) -ed))))) (PP (P on) (NP (Det the) (N floor))))) .) +(a b c d) -- 2.50.1