From: timv Date: Mon, 17 Jun 2013 01:42:15 +0000 (-0400) Subject: First pass at "bulk loader" API -- at the moment loading is only supported at X-Git-Url: https://hydra-www.ietfng.org/gitweb/?a=commitdiff_plain;h=8922920572018b2be78df06a9781d4f80c7ff0e4;p=dyna2 First pass at "bulk loader" API -- at the moment loading is only supported at the REPL (cmd-line interface coming soon). - Read tsv/csv format - matrix-like - S-Expression - work in progress Exceptions and errors handlers live in their on module. Added `pycall` -- a more convenient way to do `eval` and `mod` --- diff --git a/examples/cky.dyna b/examples/cky.dyna index 2849fed..1006888 100644 --- a/examples/cky.dyna +++ b/examples/cky.dyna @@ -1,3 +1,23 @@ % CKY-like parsing -phrase(X,I,K) += phrase(Y,I,K) * rewrite(X,Y). -phrase(X,I,K) += phrase(Y,I,J) * phrase(Z,J,K) * rewrite(X,Y,Z). +phrase(X,I,K) min= + t(A,B) is phrase(Y,I,K), + &t(A + rewrite(X,Y), &e(X, B)). + +phrase(X,I,K) min= + t(YV,YB) is phrase(Y,I,J), + t(ZV,ZB) is phrase(Z,J,K), + &t(YV + ZV + rewrite(X,Y,Z), + &e(X,YB,ZB)). + +phrase(W,I,I+1) min= word(W,I), &t(0.0, W). + +% sentence +% "Papa at the caviar with the spoon ." +word( "Papa", 0). +word( "ate", 1). +word( "the", 2). +word("caviar", 3). +word( "with", 4). +word( "a", 5). +word( "spoon", 6). +word( ".", 7). diff --git a/src/Dyna/Backend/Python/Backend.hs b/src/Dyna/Backend/Python/Backend.hs index dabe150..19e82ca 100644 --- a/src/Dyna/Backend/Python/Backend.hs +++ b/src/Dyna/Backend/Python/Backend.hs @@ -144,6 +144,12 @@ constants = go go ("log",1) = Just $ PDBS $ call "log" go ("exp",1) = Just $ PDBS $ call "exp" + + + go ("pycall", _) = Just $ PDBS $ call "pycall" + go ("getattr", 2) = Just $ PDBS $ call "getattr" + + go ("uniform", _) = Just $ PDBS $ call "uniform" go ("<=",2) = Just $ PDBS $ infixOp "<=" diff --git a/src/Dyna/Backend/Python/chart.py b/src/Dyna/Backend/Python/chart.py index 7cedf17..a6209d7 100644 --- a/src/Dyna/Backend/Python/chart.py +++ b/src/Dyna/Backend/Python/chart.py @@ -1,5 +1,4 @@ from collections import defaultdict -from utils import notimplemented from defn import aggregator from term import Term, _repr diff --git a/src/Dyna/Backend/Python/errors.py b/src/Dyna/Backend/Python/errors.py new file mode 100644 index 0000000..70a9728 --- /dev/null +++ b/src/Dyna/Backend/Python/errors.py @@ -0,0 +1,74 @@ +import sys +from IPython.core.ultratb import VerboseTB +from utils import parse_attrs +from config import dotdynadir + + +class DynaCompilerError(Exception): + pass + + +#class AggregatorConflict(Exception): +# def __init__(self, key, expected, got): +# msg = "Aggregator conflict %r was %r trying to set to %r." \ +# % (key, expected, got) +# super(AggregatorConflict, self).__init__(msg) + + +class DynaInitializerException(Exception): + def __init__(self, exception, init): + msg = '%r in ininitializer for rule\n %s\n %s' % \ + (exception, + parse_attrs(init)['Span'], + parse_attrs(init)['rule']) + super(DynaInitializerException, self).__init__(msg) + + +def exception_handler(etype, evalue, tb): + + # once for the log file. + with file(dotdynadir / 'crash.log', 'wb') as crashreport: + h = VerboseTB(color_scheme='Linux', + call_pdb=False, + ostream=crashreport, + long_header=True, + include_vars=True, + check_cache=None) + h(etype, evalue, tb) + + show_traceback((etype, evalue, tb)) + + # TODO: we should package up all relevant state including compiler + # version, codegen output, interpreter state (possibly without the + # chart -- because it might be too big to email); input to repl. + # This should all go into a tarball. + + print 'FATAL ERROR (%s): %s' % (etype.__name__, evalue) + print 'Please report this error by emailing bugs@dyna.org. ' \ + 'Please attach the following file %s' % crashreport.name + + +def enable_crash_handler(): + """ + Use our custom exception handler for handling uncaught exceptions. + """ + sys.excepthook = exception_handler + + +def show_traceback(einfo=None): + if not einfo: + einfo = sys.exc_info() + (etype, evalue, tb) = einfo + # once for the user + h = VerboseTB(color_scheme='Linux', + call_pdb=False, + ostream=None, + tb_offset=0, + long_header=False, + include_vars=False, + check_cache=None) + h(etype, evalue, tb) + + +def notimplemented(*_,**__): + raise NotImplementedError diff --git a/src/Dyna/Backend/Python/interpreter.py b/src/Dyna/Backend/Python/interpreter.py index 4bafdf0..966bb51 100644 --- a/src/Dyna/Backend/Python/interpreter.py +++ b/src/Dyna/Backend/Python/interpreter.py @@ -4,6 +4,9 @@ TODO ==== + - More info in crash handler. (stack trace, repl transcript, cmd-line args, + version control info, and dyna source is enough) + - call pre/post-processors from repl. - vbench: a script which tracks performace over time (= git commits). @@ -173,12 +176,13 @@ from time import time from chart import Chart, Term, _repr from defn import aggregator -from utils import ip, red, green, blue, magenta, yellow, \ - notimplemented, parse_attrs, ddict, dynac, enable_crash_handler, \ - DynaCompilerError, DynaInitializerException +from utils import ip, red, green, blue, magenta, yellow, parse_attrs, \ + ddict, dynac + from prioritydict import prioritydict from config import dotdynadir - +from errors import notimplemented, enable_crash_handler, \ + DynaInitializerException, DynaCompilerError try: from numpy import log, exp, sqrt @@ -189,6 +193,22 @@ except ImportError: # XXX: should probably issue a warning def uniform(a=0, b=1): return _random() * (b - a) + a +import re +def split(s, delim='\s+'): + return re.split(delim, s) + +# used as a work around to bring arbitrary python functions into dyna +def pycall(name, *args): + x = eval(name)(*args) + if isinstance(x, list): + return todynalist(x) + return x + +def todynalist(x): + if not x: + return Term('nil/0', ()) + return Term('cons/2', (x[0], todynalist(x[1:]))) + class Rule(object): def __init__(self, idx): @@ -245,7 +265,6 @@ class Interpreter(object): def __setstate__(self, state): ((self.chart, self.agenda, self.error, self.agg_name, self.parser_state), code) = state - self.edges = defaultdict(set) self.updaters = defaultdict(list) self.rules = ddict(Rule) self.do(self.dynac_code(code), initialize=False) @@ -488,7 +507,8 @@ class Interpreter(object): for k,v in [('chart', self.chart), ('build', self.build), ('peel', peel), - ('uniform', uniform), ('log', log), ('exp', exp), ('sqrt', sqrt)]: + ('uniform', uniform), ('log', log), ('exp', exp), ('sqrt', sqrt), + ('pycall', pycall)]: setattr(env, k, v) emits = [] diff --git a/src/Dyna/Backend/Python/loadmat.py b/src/Dyna/Backend/Python/loadmat.py new file mode 100644 index 0000000..9bd7551 --- /dev/null +++ b/src/Dyna/Backend/Python/loadmat.py @@ -0,0 +1,50 @@ +""" +loadmat - Load a text file as a (jagged) matrix. + +For example + +1 2 3 +4 5 + +6 7 + + +m(0,0) := 1. m(0,1) := 2. m(0,2) := 3 +m(1,0) := 4. m(1,1) := 5. + +m(3,0) := 6. m(3,1) := 7. + +""" + +import re + +interp = None +name = None + +# TODO: option for strict width +# TODO: option for stripping comments +def main(filename, astype=float, delim='\s+'): + + fn = '%s/2' % name + if interp.agg_name[fn] is None: + interp.new_fn(fn, ':=') + + def term(a, v): + interp.emit(interp.build(fn, *a), + v, + ruleix=None, + variables=None, + delete=False) + + with file(filename) as f: + for i, line in enumerate(f): + line = line.rstrip() + if not line: + continue + if delim is not None: + line = re.split(delim, line) + else: + line = [line] + + for j, v in enumerate(line): + term((i, j), astype(v)) diff --git a/src/Dyna/Backend/Python/repl.py b/src/Dyna/Backend/Python/repl.py index 00bd146..a3ea601 100644 --- a/src/Dyna/Backend/Python/repl.py +++ b/src/Dyna/Backend/Python/repl.py @@ -1,7 +1,8 @@ import os, cmd, readline import debug, interpreter -from utils import DynaCompilerError, DynaInitializerException, ip +from utils import ip +from errors import DynaCompilerError, DynaInitializerException from chart import _repr from config import dotdynadir @@ -131,3 +132,24 @@ class REPL(cmd.Cmd, object): self.cmdloop() finally: readline.write_history_file(self.hist) + + def do_load(self, cmd): + try: + self._load(cmd) + except: + from errors import show_traceback + show_traceback() + + def _load(self, cmd): + import re + print 'cmd:', repr(cmd) + [(name, module, args)] = re.findall('^([a-z][a-zA-Z_0-9]*) = ([a-z][a-zA-Z_0-9]*)\((.*)\)', cmd) + + m = __import__(module) + m.interp = self.interp + m.name = name + + exec 'm.main(%s)' % args + + self.interp.go() + self.interp.dump_charts() diff --git a/src/Dyna/Backend/Python/sexpr.py b/src/Dyna/Backend/Python/sexpr.py new file mode 100644 index 0000000..7a2eff9 --- /dev/null +++ b/src/Dyna/Backend/Python/sexpr.py @@ -0,0 +1,73 @@ +import sys +from cStringIO import StringIO +from utils import parse_sexpr + + +if __name__ == '__main__': + + def t(xs): + if isinstance(xs, basestring): +# return '"%s"' % xs + return xs + else: + assert len(xs) > 1 + if len(xs) == 2: + [sym, a] = map(t, xs) +# return '&t(%s)' % ', '.join(t(x) for x in xs) + return [sym, a] + elif len(xs) == 3: + [sym, a, b] = map(t, xs) +# return '&t(%s, %s, %s)' % (sym, a, b) + return [sym, a, b] + else: + [sym, a] = t(xs[0]), t(xs[1]) + rest = t(['@' + xs[0]] + xs[2:]) +# return '&t(%s, %s, %s)' % (sym, a, rest) + return [sym, a, rest] + + + def check_binary(x): + if isinstance(x, basestring): + return True + elif len(x) in (2, 3): + return all(map(check_binary, x)) + else: + return False + + + def pretty(t, initialindent=0): + "Pretty print tree as a tabbified s-expression." + f = StringIO() + out = f.write + def pp(t, indent=initialindent, indentme=True): + if indentme: + out(' '*indent) + if isinstance(t, basestring): # base case + return out('"%s"' % t) + if len(t) == 1: + if t[0]: + pp('"%s"' % t[0], indent, indentme) + return + label, children = t[0], t[1:] + + label = '"%s"' % label + + assert isinstance(label, basestring) + out('&t(%s, ' % label) + n = len(children) + for i, child in enumerate(children): + pp(child, indent + len(label) + 5, i != 0) # first child already indented + if i != n-1: # no newline after last child + out(',\n') + out(')') + pp(t) + out('\n') + return f.getvalue() + + for i, [x] in enumerate(parse_sexpr(sys.stdin.read())): + btree = t(x) + + assert check_binary(btree) + print + print 'sentence(%s) :=\n%s.' % (i, pretty(btree, 4).rstrip()) + print diff --git a/src/Dyna/Backend/Python/term.py b/src/Dyna/Backend/Python/term.py index da0bec7..f1816d5 100644 --- a/src/Dyna/Backend/Python/term.py +++ b/src/Dyna/Backend/Python/term.py @@ -1,4 +1,5 @@ -from utils import notimplemented +from errors import notimplemented + # TODO: codegen should output a derived Term instance for each functor class Term(object): diff --git a/src/Dyna/Backend/Python/tsv.py b/src/Dyna/Backend/Python/tsv.py new file mode 100644 index 0000000..85bc48d --- /dev/null +++ b/src/Dyna/Backend/Python/tsv.py @@ -0,0 +1,30 @@ +import re + +interp = None +name = None + +# TODO: option for stripping comments +def main(filename, ncols=None, delim='\t'): + + def term(a): + fn = '%s/%s' % (name, len(a)) + + if interp.agg_name[fn] is None: + interp.new_fn(fn, ':=') + + interp.emit(interp.build(fn, *a), + True, + ruleix=None, + variables=None, + delete=False) + + with file(filename) as f: + for i, line in enumerate(f): + line = line.rstrip() + if not line: + continue + if delim is not None: + line = re.split(delim, line) + else: + line = [line] + term([i] + line) diff --git a/src/Dyna/Backend/Python/utils.py b/src/Dyna/Backend/Python/utils.py index 0972c84..cc295ad 100644 --- a/src/Dyna/Backend/Python/utils.py +++ b/src/Dyna/Backend/Python/utils.py @@ -1,7 +1,6 @@ import re, sys from subprocess import Popen, PIPE from IPython.frontend.terminal.embed import InteractiveShellEmbed -from IPython.core.ultratb import VerboseTB from config import dynahome, dotdynadir import signal from contextlib import contextmanager @@ -15,31 +14,13 @@ black, red, green, yellow, blue, magenta, cyan, white = \ map('\033[3%sm%%s\033[0m'.__mod__, range(8)) -class DynaCompilerError(Exception): - pass - - -#class AggregatorConflict(Exception): -# def __init__(self, key, expected, got): -# msg = "Aggregator conflict %r was %r trying to set to %r." \ -# % (key, expected, got) -# super(AggregatorConflict, self).__init__(msg) - - -class DynaInitializerException(Exception): - def __init__(self, exception, init): - msg = '%r in ininitializer for rule\n %s\n %s' % \ - (exception, - parse_attrs(init)['Span'], - parse_attrs(init)['rule']) - super(DynaInitializerException, self).__init__(msg) - def dynac(f, out): """ Run compiler on file, ``f``, write results to ``out``. Raises ``DynaCompilerError`` on failure. """ + from errors import DynaCompilerError p = Popen(['%s/dist/build/dyna/dyna' % dynahome, '-B', 'python', '-o', out, f], stdout=PIPE, stderr=PIPE) stdout, stderr = p.communicate() @@ -48,45 +29,6 @@ def dynac(f, out): raise DynaCompilerError(stderr) -def exception_handler(etype, evalue, tb): - - # once for the log file. - with file(dotdynadir / 'crash.log', 'wb') as crashreport: - h = VerboseTB(color_scheme='Linux', - call_pdb=False, - ostream=crashreport, - long_header=True, - include_vars=True, - check_cache=None) - h(etype, evalue, tb) - - # once for the user - h = VerboseTB(color_scheme='Linux', - call_pdb=False, - ostream=None, - tb_offset=0, - long_header=False, - include_vars=False, - check_cache=None) - h(etype, evalue, tb) - - # TODO: we should package up all relevant state including compiler - # version, codegen output, interpreter state (possibly without the - # chart -- because it might be too big to email); input to repl. - # This should all go into a tarball. - - print 'FATAL ERROR (%s): %s' % (etype.__name__, evalue) - print 'Please report this error by emailing bugs@dyna.org. ' \ - 'Please attach the following file %s' % crashreport.name - - -def enable_crash_handler(): - """ - Use our custom exception handler for handling uncaught exceptions. - """ - sys.excepthook = exception_handler - - @contextmanager def interrupt_after(): @@ -106,11 +48,6 @@ def interrupt_after(): raise KeyboardInterrupt - -def notimplemented(*_,**__): - raise NotImplementedError - - class ddict(dict): """ Default Dict where the default function gets the key as an argument, unlike @@ -206,76 +143,3 @@ def rule_source(span, src=None): else: [line] = rlines return line[bc-1:ec] - - -if __name__ == '__main__': - #rule_source('examples/papa.dyna:4:1-examples/papa.dyna:4:47') - import sys - - def t(xs): - if isinstance(xs, basestring): -# return '"%s"' % xs - return xs - else: - assert len(xs) > 1 - if len(xs) == 2: - [sym, a] = map(t, xs) -# return '&t(%s)' % ', '.join(t(x) for x in xs) - return [sym, a] - elif len(xs) == 3: - [sym, a, b] = map(t, xs) -# return '&t(%s, %s, %s)' % (sym, a, b) - return [sym, a, b] - else: - [sym, a] = t(xs[0]), t(xs[1]) - rest = t(['@' + xs[0]] + xs[2:]) -# return '&t(%s, %s, %s)' % (sym, a, rest) - return [sym, a, rest] - - - def check_binary(x): - if isinstance(x, basestring): - return True - elif len(x) in (2, 3): - return all(map(check_binary, x)) - else: - return False - - from cStringIO import StringIO - - def pretty(t, initialindent=0): - "Pretty print tree as a tabbified s-expression." - f = StringIO() - out = f.write - def pp(t, indent=initialindent, indentme=True): - if indentme: - out(' '*indent) - if isinstance(t, basestring): # base case - return out('"%s"' % t) - if len(t) == 1: - if t[0]: - pp('"%s"' % t[0], indent, indentme) - return - label, children = t[0], t[1:] - - label = '"%s"' % label - - assert isinstance(label, basestring) - out('&t(%s, ' % label) - n = len(children) - for i, child in enumerate(children): - pp(child, indent + len(label) + 5, i != 0) # first child already indented - if i != n-1: # no newline after last child - out(',\n') - out(')') - pp(t) - out('\n') - return f.getvalue() - - for i, [x] in enumerate(parse_sexpr(sys.stdin.read())): - btree = t(x) - - assert check_binary(btree) - print - print 'sentence(%s) :=\n%s.' % (i, pretty(btree, 4).rstrip()) - print