]> hydra-www.ietfng.org Git - dyna2/commitdiff
improvements to `examples/ptb.dyna`
authorTim Vieira <tim.f.vieira@gmail.com>
Wed, 3 Jul 2013 16:03:27 +0000 (12:03 -0400)
committerTim Vieira <tim.f.vieira@gmail.com>
Wed, 3 Jul 2013 16:11:18 +0000 (12:11 -0400)
 - (no longer a complete mess thanks to the new `load` commands)

 - does binarization

 - extracts and counts rules in a s-expression tree.

 - parses a sentence given as a string AND extracts the optimal parse by
   following backpointers.

sexpr loader uses nested lists as it's representation

'in'(+,+)+ asserts second arg is a list.

retract_rule: no longer crashes when retracting a backchained rule. There are
possible bugs because we haven't deleted the memo tables affected by that rule.

REPL: show changes after 'load' commands

String display format should no longer render special characters like '\n'

examples/ptb.dyna
src/Dyna/Backend/Python/Backend.hs
src/Dyna/Backend/Python/interpreter.py
src/Dyna/Backend/Python/load/__init__.py
src/Dyna/Backend/Python/load/sexpr.py
src/Dyna/Backend/Python/repl.py
src/Dyna/Backend/Python/stdlib.py
src/Dyna/Backend/Python/term.py
src/Dyna/Backend/Python/utils.py
test/repl/english.par

index 4a9cbd7a4ce479ded5515cf242ed3af7e09757af..9e7990e61f0768afadef4dea62e1bb7786df1405 100644 (file)
-sentence(0) :=
-    &t("S", &t("NP-SBJ", &t("NNP", "Rolls-Royce"),
-                         &t("@NP-SBJ", &t("NNP", "Motor"),
-                                       &t("@@NP-SBJ", &t("NNPS", "Cars"),
-                                                      &t("NNP", "Inc.")))),
-            &t("@S", &t("VP", &t("VBD", "said"),
-                              &t("SBAR", &t("-NONE-", "0"),
-                                         &t("S", &t("NP-SBJ", &t("PRP", "it")),
-                                                 &t("VP", &t("VBZ", "expects"),
-                                                          &t("S", &t("NP-SBJ", &t("PRP$", "its"),
-                                                                               &t("@NP-SBJ", &t("NNP", "U.S."),
-                                                                                             &t("NNS", "sales"))),
-                                                                  &t("VP", &t("TO", "to"),
-                                                                           &t("VP", &t("VB", "remain"),
-                                                                                    &t("@VP", &t("ADJP-PRD", &t("JJ", "steady")),
-                                                                                              &t("@@VP", &t("PP-LOC-CLR", &t("IN", "at"),
-                                                                                                                          &t("NP", &t("QP", &t("IN", "about"),
-                                                                                                                                            &t("CD", "1,200")),
-                                                                                                                                   &t("NNS", "cars"))),
-                                                                                                         &t("PP-TMP", &t("IN", "in"),
-                                                                                                                      &t("NP", &t("CD", "1990")))))))))))),
-                     &t(".", "."))).
-
-sentence(1) :=
-    &t("S", &t("NP-SBJ", &t("DT", "The"),
-                         &t("@NP-SBJ", &t("NN", "luxury"),
-                                       &t("@@NP-SBJ", &t("NN", "auto"),
-                                                      &t("NN", "maker")))),
-            &t("@S", &t("NP-TMP", &t("JJ", "last"),
-                                  &t("NN", "year")),
-                     &t("VP", &t("VBD", "sold"),
-                              &t("@VP", &t("NP", &t("CD", "1,214"),
-                                                 &t("NNS", "cars")),
-                                        &t("PP-LOC", &t("IN", "in"),
-                                                     &t("NP", &t("DT", "the"),
-                                                              &t("NNP", "U.S."))))))).
-
-sentence(2) :=
-    &t("S", &t("NP-SBJ", &t("NP", &t("NNP", "Howard"),
-                                  &t("NNP", "Mosher")),
-                         &t("@NP-SBJ", &t(",", ","),
-                                       &t("@@NP-SBJ", &t("NP", &t("NP", &t("NN", "president")),
-                                                               &t("@NP", &t("CC", "and"),
-                                                                         &t("NP", &t("JJ", "chief"),
-                                                                                  &t("@NP", &t("NN", "executive"),
-                                                                                            &t("NN", "officer"))))),
-                                                      &t(",", ",")))),
-            &t("@S", &t("VP", &t("VBD", "said"),
-                              &t("SBAR", &t("-NONE-", "0"),
-                                         &t("S", &t("NP-SBJ", &t("PRP", "he")),
-                                                 &t("VP", &t("VBZ", "anticipates"),
-                                                          &t("NP", &t("NP", &t("NN", "growth")),
-                                                                   &t("@NP", &t("PP", &t("IN", "for"),
-                                                                                      &t("NP", &t("DT", "the"),
-                                                                                               &t("@NP", &t("NN", "luxury"),
-                                                                                                         &t("@@NP", &t("NN", "auto"),
-                                                                                                                    &t("NN", "maker"))))),
-                                                                             &t("PP-LOC", &t("PP", &t("IN", "in"),
-                                                                                                   &t("NP", &t("NNP", "Britain"),
-                                                                                                            &t("@NP", &t("CC", "and"),
-                                                                                                                      &t("NNP", "Europe")))),
-                                                                                          &t("@PP-LOC", &t(",", ","),
-                                                                                                        &t("@@PP-LOC", &t("CC", "and"),
-                                                                                                                       &t("PP", &t("IN", "in"),
-                                                                                                                                &t("NP", &t("ADJP", &t("JJ", "Far"),
-                                                                                                                                                    &t("JJ", "Eastern")),
-                                                                                                                                         &t("NNS", "markets")))))))))))),
-                     &t(".", "."))).
-
-%toString(A) :=
-%  needs(A),
-%  A.
-%
-%toString(&t(A)) :=
-%  needs(&t(A)),
-%  A.
-%
-%toString(&t(A,B)) :=
-%  needs(&t(A,B)),
-%  mod("(%s %s)",
-%      tuple(toString(A),
-%            toString(B))).
-%
-%toString(&t(A,B,C)) :=
-%  needs(&t(A,B,C)),
-%  mod("(%s %s %s)",
-%      tuple(toString(A),
-%            toString(B),
-%            toString(C))).
-
-needs(A) |= needs(&t(A,B)).
-needs(B) |= needs(&t(A,B)).
-
-needs(A) |= needs(&t(A,B,C)).
-needs(B) |= needs(&t(A,B,C)).
-needs(C) |= needs(&t(A,B,C)).
-
-%needs(sentence(0)).
-%needs(sentence(1)).
-%needs(sentence(2)).
-
-%zzz(0) := toString(sentence(0)).
-%zzz(1) := toString(sentence(1)).
-%zzz(2) := toString(sentence(2)).
-
-%sym(&t(X,_)) := X.
-%sym(&t(X,_,_)) := X.
-
 % the following rule order is important bc we use :=.
-sym(A) := needs(A), A.
-sym(&t(A,B)) := needs(&t(A,B)), A.
-sym(&t(A,B,C)) := needs(&t(A,B,C)), A.
+:- backchain sym/1.
+sym(A) := A.
+sym([A|_]) := A.
 
 % rule used to create top-level subtree
-rules(&t(X,Y)) := needs(&t(X,Y)), &r(X, sym(Y)).
-rules(&t(X,Y,Z)) := needs(&t(X,Y,Z)), needs(Z), &r(X, sym(Y), sym(Z)).
-
-:- dispos &t(&).
-:- dispos &t(&,&).
-:- dispos &t(&,&,&).
+:- backchain rule/1.
+rule([X,Y]) := &r(X, sym(Y)).
+rule([X,Y,Z]) := &r(X, sym(Y), sym(Z)).
 
 % count the number of words in a subtree
 :- backchain numwords/1.
 numwords(X) := 1.
-numwords(t(X,Y)) := numwords(Y).
-numwords(t(X,Y,Z)) := numwords(Y) + numwords(Z).
+numwords([X,Y]) := numwords(Y).
+numwords([X,Y,Z]) := numwords(Y) + numwords(Z).
 
 % extract word and it's position in tree
 :- backchain word/2.
 word(0, X) := X.
-word(I, t(X,Y)) := word(I, Y).
-word(I, t(X,Y,Z)) := I < numwords(Y), word(I, Y).
-word(I, t(X,Y,Z)) := I >= numwords(Y), word(I-numwords(Y), Z).
-
-%senword(S, I) := word(I, sentence(S)), between(0,I,numwords(S)).
+word(I, [X,Y]) := word(I, Y).
+word(I, [X,Y,Z]) := I < numwords(Y), word(I, Y).
+word(I, [X,Y,Z]) := I >= numwords(Y), word(I-numwords(Y), Z).
 
 % unnormalized
-c(X,Y) += r(X,Y) is rules(&t(X1,Y1)), 1.
-c(X,Y,Z) += r(X,Y,Z) is rules(&t(X1,Y1,Z1)), 1.
+c(X,Y) += 1
+  for &r(X,Y) in f(T),
+      T is b(tree(_)).
+
+c(X,Y,Z) += 1
+  for &r(X,Y,Z) in f(T),
+      T is b(tree(_)).
 
 % normalizing constants
 n(X) += c(X,Y).
@@ -146,13 +38,61 @@ n(X) += c(X,Y,Z).
 p(X,Y) := c(X,Y) / n(X).
 p(X,Y,Z) := c(X,Y,Z) / n(X).
 
-% activate fake backchaining.
-needs(sentence(0)).
-needs(sentence(1)).
-needs(sentence(2)).
-
-% TODO: convert tree structure into word(word, position, sentence).
-
-% TODO: augment CKY rules with sentence index
-
-% TODO: implement constituent recall accuracy measure for two trees
+% extract rules in a tree
+:- backchain f/1.
+f([X,Y]) bag= rule([X,Y]).
+f([X,Y,Z]) bag= rule([X,Y,Z]).
+f([X,Y]) bag= A for A in f(X).
+f([X,Y]) bag= A for A in f(Y).
+f([X,Y,Z]) bag= A for A in f(X).
+f([X,Y,Z]) bag= A for A in f(Y).
+f([X,Y,Z]) bag= A for A in f(Z).
+
+%zzz(I) :=
+%  T is b(tree(0)),
+%  I in pycall("range", 0, numwords(T)),
+%  word(I, T).
+
+% binarize s-expression tree
+:- backchain b/1.
+b(X) := X.
+b([X,Y]) := [X,b(Y)].
+b([X,Y,Z|Xs]) := [X, b(Y), b([&'@'(X), Z|Xs])].
+b([X,Y,Z]) := [X,b(Y),b(Z)].
+
+% CKY parser
+phrase(X,I,K) max= phrase(Y,I,J) * phrase(Z,J,K) * p(X,Y,Z).
+phrase(X,I,K) max= phrase(Y,I,K) * p(X,Y).
+phrase(X,I,I+1) max= 1 for [I,X] in enumerate(sentence).
+
+% backpointers
+bk(X,I,K) argmax= [phrase(Y,I,J) * phrase(Z,J,K) * p(X,Y,Z), [[Y,I,J], [Z,J,K]]].
+bk(X,I,K) argmax= [phrase(Y,I,K) * p(X,Y), [[Y,I,K]]].
+bk(X,I,I+1) argmax= [1, X] for [I,X] in enumerate(sentence).
+
+% extract path from backpointers
+path(X,I,K) := W is bk(X,I,K), W.
+path(X,I,K) := [[Y,I,J], [Z,J,K]] is bk(X,I,K), [X, path(Y,I,J), path(Z,J,K)].
+path(X,I,K) := [[Y,I,K]] is bk(X,I,K), [X, path(Y,I,K)].
+
+% utility function.
+:- backchain enumerate/2.
+:- backchain enumerate/1.
+enumerate(X) = enumerate(X, 0).
+enumerate([], I) = [].
+enumerate([X|Xs], I) = [[I,X] | enumerate(Xs, I+1)].
+
+:- backchain length/1.
+length([]) = 0.
+length([X|Xs]) = 1 + length(Xs).
+
+% declare a sentence.
+sentence = split("George love -s Laura .").
+
+%sentence(S, I) =
+%  T is b(tree(S)),
+%  I in pycall("range", 0, numwords(T)),
+%  word(I, T).
+
+zgoal(sentence) max= phrase("ROOT", 0, length(sentence)).
+zzgoal(sentence) := path("ROOT", 0, length(sentence)).
index e48e3d35fd2dd38fd017034c0e328bd94dcc9dd4..00fc65d41b541422358f3c2c87a312e1a0f5df9e 100644 (file)
@@ -176,7 +176,7 @@ constants = go
   go ("int",_)     = Just $ PDBS $ call "int" []
   go ("pycall",_)  = Just $ PDBS $ call "pycall" []
 
-  go ("in",2)    = Just $ PDBS $ infixOp " in "
+  go ("in",2)    = Just $ PDBS $ call "in_list" []
 
   go ("<=",2)    = Just $ PDBS $ infixOp "<="
   go ("<",2)     = Just $ PDBS $ infixOp "<"
index 2cde74710f7cd07a71073fba2ce1b99daa3d70c5..50f1824f70e5cc838f3322dbec7582938b41ce8f 100644 (file)
@@ -44,11 +44,6 @@ TODO
 
  - TODO: True and 1 are equivalent. This sometimes leads to strange behavior.
 
- - Hide all *.plan.py files
-
- - multi-line input in REPL. Consider using a fancier library such as cmd2 or
-   ipython's.
-
 
 FASTER
 ======
@@ -98,13 +93,6 @@ USERS
    items proved: is it counting to infinity?).
 
 
-NOTES
-=====
-
- - `None` does not propagate, eventually it will because of the `?` prefix
-   operator.
-
-
 JUST FOR FUN
 ============
 
@@ -319,9 +307,11 @@ class Interpreter(object):
                     xs.remove(u)
                     assert u not in xs, 'Several occurrences of u in xs'
         # Step 2: run initializer in delete mode
-        rule.init(emit=self.delete_emit)
-        # Step 3; go!
-        return self.go()
+        if rule.init is not None:
+            rule.init(emit=self.delete_emit)
+            # Step 3; go!
+            return self.go()
+        # TODO: probably have to blast any memos from BC computations
 
     def go(self):
         try:
@@ -622,13 +612,6 @@ def main():
 
     crash_handler()
 
-
-#    def pickle_interp():
-#        import subprocess
-#        subprocess.Popen(['tar', 'czf', dotdynadir / 'crash.tar.gz', dotdynadir])
-#    crash_handler.interp = pickle_interp
-
-
     if args.source:
 
         if not os.path.exists(args.source):
@@ -672,8 +655,6 @@ def main():
             print e
             exit(1)
 
-        interp.dump_charts(args.output)      # should be a post-processor
-
     if args.load:
         for cmd in args.load:
             load.run(interp, cmd)
@@ -682,6 +663,9 @@ def main():
         for cmd in args.post_process:
             post.run(interp, cmd)
 
+    if args.load or args.post_process or args.source:
+        interp.dump_charts(args.output)      # should be a post-processor
+
     if args.interactive or not args.source:
         interp.repl()
 
index a733071861abe7526f2685f813694f82f8e4aba6..c5f3d0d3a29230aa8b3f02bce28434176561cc58 100644 (file)
@@ -18,4 +18,4 @@ def run(interp, line):
 
     m = get_module('load', module)(interp, name)
     exec 'm.main(%s)' % args
-    interp.go()
+    return interp.go()
index ea334fcd9d8762be6790896f8a8abab0269edf6e..1653b2d54148df90f35a355c9d569e730bfa409d 100644 (file)
@@ -1,5 +1,6 @@
 from cStringIO import StringIO
 from utils import parse_sexpr
+from stdlib import todynalist
 
 
 class sexpr(object):
@@ -15,8 +16,8 @@ class sexpr(object):
     ========
     trees/1
     =======
-    trees(0) = node("a", node("b", "c"), node("d", "e"))
-    trees(1) = node("a", "b", "c")
+    trees(0) = ["a", ["b", "c"], ["d", "e"]]
+    trees(1) = ["a", "b", ["c"]]
 
     """
 
@@ -35,30 +36,11 @@ class sexpr(object):
                 interp.new_fn(fn, ':=')
             return interp.build(fn, *a)
 
-        def node(*a):
-            fn = '%s/%s' % ('node', len(a))
-            if interp.agg_name[fn] is None:
-                interp.new_fn(fn, ':=')
-            return interp.build(fn, *a)
-
         def t(xs):
             if isinstance(xs, basestring):
                 return xs
             else:
-                if len(xs) == 1:
-#                    [sym] = map(t, xs)
-#                    return node(sym, ())
-                    return t(xs[0])          # doesn't have a label.
-                elif len(xs) == 2:
-                    [sym, a] = map(t, xs)
-                    return node(sym, a)
-                elif len(xs) == 3:
-                    [sym, a, b] = map(t, xs)
-                    return node(sym, a, b)
-                else:
-                    [sym, a] = t(xs[0]), t(xs[1])
-                    rest = t(['@' + xs[0]] + xs[2:])
-                    return node(sym, a, rest)
+                return todynalist([t(x) for x in xs])
 
         contents = file(filename).read()
 
index e0dfc04bd922b575d397c2f50fe5ff4c35bcc8a7..93b5e2352fd8361b3c6ec82e40f1e09f1d05560f 100644 (file)
@@ -394,10 +394,12 @@ class REPL(cmd.Cmd, object):
 
         """
         try:
-            load.run(self.interp, line)
+            changed = load.run(self.interp, line)
         except:
             show_traceback()
             readline.write_history_file(self.hist)
+        else:
+            self._changed(changed)
 
     def do_post(self, line):
         """
@@ -414,10 +416,12 @@ class REPL(cmd.Cmd, object):
 
         """
         try:
-            post.run(self.interp, line)
+            changed = post.run(self.interp, line)
         except:
             show_traceback()
             readline.write_history_file(self.hist)
+        else:
+            self._changed(changed)
 
     def do_trace(self, q):
         """
index f5d296501133ed91a6262f1de0fb0032edf4c69b..0b626688cc3181c0bac2281b9021f98e57628b3a 100644 (file)
@@ -45,9 +45,13 @@ def todynalist(x):
     return _todynalist(list(x))
 
 def _todynalist(x):
-    if not x:
-        return Nil
-    return Cons(x[0], _todynalist(x[1:]))
+#    if not x:
+#        return Nil
+#    return Cons(x[0], _todynalist(x[1:]))
+    c = Nil
+    for y in reversed(x):
+        c = Cons(y, c)
+    return c
 
 def get(x, i):
     return x[i]
@@ -56,3 +60,13 @@ def iter_cons(x):
     if not (isinstance(x, Cons) or x is Nil):
         raise TypeError("Attemping to iterate something which isn't a list.")
     return x
+
+def in_list(x, a):
+    if not (isinstance(a, Cons) or a is Nil):
+        raise TypeError("Attemping to iterate something which isn't a list.")
+    return x in a.aslist
+
+# should probably be done with memoized backchaining...
+def read_lines(filename):
+    with file(filename) as f:
+        return f.readlines()
index fa2e0693a91d815640ed038d80e49d8834046356..3d5e96df46ff010540bf9b005313b2de3c842a40 100644 (file)
@@ -51,9 +51,10 @@ class Term(object):
 class Cons(Term):
 
     def __init__(self, head, tail):
+        if not (isinstance(tail, Cons) or tail is Nil):
+            raise TypeError('Malformed list')
         self.head = head
         self.tail = tail
-        assert isinstance(tail, (Cons, _Nil)), tail
         Term.__init__(self, 'cons/2', (head, tail))
         self.aggregator = Aggregator()
         self.aslist = [self.head] + self.tail.aslist
@@ -85,5 +86,13 @@ class _Nil(Term):
         self.aslist = []
     def __repr__(self):
         return '[]'
+    def __iter__(self):
+        for a in self.aslist:
+            if not isinstance(a, Term):
+                yield a, (None,), a
+            else:
+                yield a, (None,), a
+    def __contains__(self, x):
+        return False
 
 Nil = _Nil()
index d66b00700b101da135e46a5729e9b6377d844dc3..b1299b35532abbf754b230ac354c8babbe23cff0 100644 (file)
@@ -15,7 +15,7 @@ def _repr(x):
         return 'null'
     elif isinstance(x, basestring):
         # dyna doesn't accept single-quoted strings
-        return '"%s"' % x.replace('"', r'\"')
+        return '"%s"' % repr(x)[1:-1].replace('"', r'\"')
     else:
         return repr(x)
 
index 6bdac280c60d59c57c1493584f0dc5e80f084341..e7c49bae84231aa015fa8d4cb369adb7f79cf333 100644 (file)
@@ -20,3 +20,4 @@
 (ROOT (S (NP Papa) (VP (Modal would) (VP (V have) (VP (V (V eat) -ed) (NP (Det his) (N (N sandwich) -s)))))) .)
 (ROOT (S (NP (Det every) (N sandwich)) (VP (V was) (VP (V (V go) -ing) (VP (V to (V have)) (VP (V been) (Adj delicious)))))) .)
 (ROOT (S (NP (NP (Det the) (N (Adj (Adj fine) and (Adj blue)) (N woman))) and (NP (Det every) (N man))) (VP (VP (Modal must) (VP (V have) (VP (VP (V (V eat) -ed) (NP (Det two) (N (N sandwich) -s))) and (VP (V (V sleep) -ed))))) (PP (P on) (NP (Det the) (N floor))))) .)
+(a b c d)