From 3621dcbef7b8a81a7ba4c38204d91629ac195307 Mon Sep 17 00:00:00 2001 From: Nathaniel Wesley Filardo Date: Thu, 29 Nov 2012 00:09:30 -0500 Subject: [PATCH] First part of new NormalizeParse --- src/Dyna/Analysis/NormalizeParse.hs | 221 ++++++++++++-------- src/Dyna/Analysis/NormalizeParseSelftest.hs | 22 +- 2 files changed, 142 insertions(+), 101 deletions(-) diff --git a/src/Dyna/Analysis/NormalizeParse.hs b/src/Dyna/Analysis/NormalizeParse.hs index c9b48a0..4934793 100644 --- a/src/Dyna/Analysis/NormalizeParse.hs +++ b/src/Dyna/Analysis/NormalizeParse.hs @@ -3,23 +3,27 @@ -- -- In Dyna's surface syntax, there exists both \"in-place evaluation\" and -- \"in-place construction\". How do we deal with this? Well, it's a --- little messy: +-- little messy. -- --- 1. If a term is wrapped by the unary @*@ functor, it is explicitly --- moved to an ANF evaluation and replaced by a variable. So --- @f(*g(X))@ rewrites as @Y is g(X), f(Y)@. +-- 1. There are explicit \"eval\" (@*@) and \"quote\" (@&@) operators +-- which may be used to manually specify which is intended. -- --- 2. Each functor gets a chance to specify, for each argument, that it --- would prefer to evaluate a given position. If so, and if the position --- is occupied by a non-variable, non-primitive term (e.g., atom or --- structure) whose disposition is to evaluate and which is not quoted by --- the unary @&@ functor, then it is moved to an ANF evaluation. If f --- prefers to evaluate only its first position, and @g@ is disposed to --- evaluation by default, @f(g(X),h(Y))@ rewrites as @Z is g(X), --- f(Z,h(Y))@. +-- 2. Functors specify \"argument dispositions\", indicating whether they +-- prefer to evaluate or build structure in each argument position. -- --- 3. Otherwise, a non-variable, non-primitive term in an argument will --- be interpreted as structure-building. +-- 3. Functors further specify \"self disposition\", indicating whether +-- they 1) leave the decision to the parent, 2) prefer to build structure +-- unless explicitly evaluated, or 3) prefer to be evaluated unless +-- explicitly quoted. +-- +-- In short, explicit marks are always obeyed; absent one, the functor's +-- self disposition is obeyed; if the functor has no preference, the outer +-- functor's argument disposition is used as a last resort. There is, +-- however, one important caveat: /variables/ and /primitive terms/ (e.g. +-- numerics, strings, literal dynabases, foreign terms, ...) have self +-- dispositions of preferring structural interpretation. Variables may be +-- meaningfully explicitly evaluated, with the effect of evaluating their +-- bindings. Attempting to evaluate a primitive is an error. -- -- Note that in rules, the head is by default not evaluated (regardless of -- the disposition of their outer functor), while the body is interpreted as @@ -49,6 +53,7 @@ import qualified Data.ByteString.UTF8 as BU import qualified Data.ByteString as B import qualified Data.Map as M import qualified Data.Set as S +import Text.PrettyPrint.Free import qualified Text.Trifecta as T import qualified Dyna.ParserHS.Parser as P @@ -57,8 +62,17 @@ import Dyna.Term.TTerm import qualified Data.Char as C -import Text.PrettyPrint +data SelfDispos = SDInherit + | SDEval + | SDQuote + +data ArgDispos = ADEval + | ADQuote + +data ECSrc = ECFunctor + | ECExplicit +type EvalCtx = (ECSrc,ArgDispos) data ANFDict = AD { -- | A map from (functor,arity) to a list of bits indicating whether to @@ -68,12 +82,19 @@ data ANFDict = AD -- -- XXX Stronger type desired: we'd like static assurance that the -- length of the list matches the arity in the key! - ad_arg_dispos :: (B.ByteString,Int) -> [Bool] + ad_arg_dispos :: (DFunct,Int) -> [ArgDispos] - -- | The set of functors that prefer not to be evaluated. - , ad_self_dispos :: S.Set (B.ByteString,Int) + -- | A map from (functor,arity) to self disposition. + , ad_self_dispos :: (DFunct,Int) -> SelfDispos } +mergeDispositions = md + where + md SDInherit (_,d) = d + md SDEval (ECExplicit,ADQuote) = ADQuote + md SDEval (_,_) = ADEval + md SDQuote (ECExplicit,ADEval) = ADEval + md SDQuote (_,_) = ADQuote {- This stage of ANF does not actually link evaluations to - their semantic interpretation. That is, we have not yet @@ -81,12 +102,14 @@ data ANFDict = AD -} data ANFState = AS { as_next :: !Int - , as_evals :: M.Map B.ByteString DTerm - , as_unifs :: M.Map B.ByteString DTerm + , as_evals :: M.Map DVar DTerm + , as_unifs :: M.Map DVar DTerm + , as_annot :: M.Map DVar [T.Spanned (Annotation DTerm)] + , as_warns :: [(B.ByteString, [T.Span])] } deriving (Show) -nextVar :: (MonadState ANFState m) => String -> m B.ByteString +nextVar :: (MonadState ANFState m) => String -> m DVar nextVar pfx = do vn <- gets as_next modify (\s -> s { as_next = vn + 1 }) @@ -96,16 +119,18 @@ newEval :: (MonadState ANFState m) => String -> DTerm -> m DTerm newEval pfx t = do n <- nextVar pfx evs <- gets as_evals - modify (\s -> s { as_evals = M.insert n t evs}) + modify (\s -> s { as_evals = M.insert n t evs }) return $ UVar n newUnif :: (MonadState ANFState m) => String -> DTerm -> m DTerm newUnif pfx t = do n <- nextVar pfx uns <- gets as_unifs - modify (\s -> s { as_unifs = M.insert n t uns}) + modify (\s -> s { as_unifs = M.insert n t uns }) return $ UVar n +newWarn :: (MonadState ANFState m) => B.ByteString -> [T.Span] -> m () +newWarn msg loc = modify (\s -> s { as_warns = (msg,loc):(as_warns s) }) unspan :: T.Spanned P.Term -> DTerm unspan (P.TVar v T.:~ _) = UVar v @@ -117,65 +142,81 @@ unspan (P.TAnnot a t T.:~ _) = UTerm $ TAnnot (fmap unspan a) (unspan t) -- | Convert a syntactic term into ANF; while here, move to a -- Control.Unification term representation. -- +-- The ANFState ensures that variables are unique; we additionally give them +-- \"semi-meaningful\" prefixes, but these should not be relied upon. +-- +-- XXX On second thought, we should just move to a @TermF B.ByteString +-- Var@ representation, since we want everything flattened. +-- -- XXX This sheds span information entirely, which is probably not what we -- actually want. Note that we're careful to keep a stack of contexts -- around, so we should probably do something clever like attach them to -- operations we extract? normTerm_ :: (MonadState ANFState m, MonadReader ANFDict m) - => Bool -- ^ In an evaluation context? - -> Bool -- ^ Unpack unifications? + => EvalCtx -- ^ In an evaluation context? -> [T.Span] -- ^ List of spans traversed -> P.Term -- ^ Term being digested -> m DTerm - -- Variables don't evaluate and don't need to be moved -normTerm_ _ _ _ (P.TVar v) = return $ UVar v +-- Variables only evaluate in explicit context +normTerm_ c _ (P.TVar v) = + case c of + (ECExplicit,ADEval) -> newEval "_$v" + _ -> return + $ UVar v + +-- Numerics get returned in-place and raise a warning if they are evaluated. +normTerm_ c ss (P.TNumeric n) = do + case c of + (ECExplicit,ADEval) -> newWarn "Ignoring request to evaluate numeric" ss + _ -> return () + return $ UTerm $ TNumeric n + +-- Quote makes the context explicitly a quoting one +normTerm_ _ ss (P.TFunctor "&" [t T.:~ st]) = do + normTerm_ (ECExplicit,ADQuote) (st:ss) t + +-- Evaluation is a little different: in addition to forcing the context to +-- evaluate, it must also evaluate if the context from on high is one of +-- evaluation! +normTerm_ c ss (P.TFunctor "*" [t T.:~ st]) = + normTerm_ (ECExplicit,ADEval) (st:ss) t + >>= case c of + (_,ADEval) -> newEval "_$s" + _ -> return + +-- Annotations are stripped of their span information +-- +-- XXX this is probably the wrong thing to do +normTerm_ c ss (P.TAnnot a (t T.:~ st)) = do + nt <- normTerm_ c (st:ss) t + return $ UTerm $ TAnnot (fmap unspan a) nt - -- Numerics also get returned in-place. -normTerm_ _ _ _ (P.TNumeric n) = return $ UTerm $ TNumeric n +-- Functors have both top-down and bottom-up dispositions on +-- their handling. +normTerm_ c ss (P.TFunctor f as) = do - -- FIXME: (nwf) Quote simply disappears having converted the context to a - -- non-evaluation context. -normTerm_ _ _ ss (P.TFunctor "&" [t T.:~ st]) = do - normTerm_ False True (st:ss) t + argdispos <- asks $ flip ($) (f,length as) . ad_arg_dispos + normas <- mapM (\(a T.:~ s,d) -> normTerm_ (ECFunctor,d) (s:ss) a) + (zip as argdispos) - -- Star forces evaluation even when the argument would prefer - -- to not be evaluated, thus the sort of odd "normalize in - -- nonevaluation context then eval" here. -normTerm_ _ _ ss (P.TFunctor "*" [t T.:~ st]) = do - normTerm_ False True (st:ss) t >>= newEval "_s" + selfdispos <- asks $ flip ($) (f,length as) . ad_self_dispos - -- Annotations are stripped of their span information - -- - -- XXX this is probably the wrong thing to do -normTerm_ c u ss (P.TAnnot a (t T.:~ st)) = do - nt <- normTerm_ c u (st:ss) t - return $ UTerm $ TAnnot (fmap unspan a) nt + let dispos = mergeDispositions selfdispos c - -- Functors have both top-down and bottom-up dispositions on - -- their handling. -normTerm_ c u ss (P.TFunctor f as) = do - argdispos <- asks $ flip ($) (f,length as) . ad_arg_dispos - normas <- mapM (\(a T.:~ s,d) -> normTerm_ d True (s:ss) a) (zip as argdispos) - selfdispos <- getSelfDispos - (case () of - _ | c && selfdispos -> newEval "_f" - _ | u -> newUnif "_u" - _ -> return) + (case dispos of + ADEval -> newEval "_$f" + ADQuote -> newUnif "_$u") $ UTerm $ TFunctor f normas - where - getSelfDispos = do - set <- asks $ not . S.member (f,length as) . ad_self_dispos - return set - normTerm :: (MonadState ANFState m, MonadReader ANFDict m) => Bool -- ^ In an evaluation context? -> T.Spanned P.Term -- ^ Term to digest -> m DTerm -normTerm c (t T.:~ s) = normTerm_ c False [s] t +normTerm c (t T.:~ s) = normTerm_ (ECFunctor,if c then ADEval else ADQuote) + [s] t - -- XXX +-- XXX normRule :: (MonadState ANFState m, MonadReader ANFDict m) => T.Spanned P.Rule -- ^ Term to digest -> m DRule @@ -188,28 +229,36 @@ normRule (P.Rule h a es r T.:~ _) = do nes <- mapM (normTerm True) es return $ Rule nh a nes nr - -- XXX -dynaFunctorArgDispositions :: (B.ByteString, Int) -> [Bool] +-- XXX +dynaFunctorArgDispositions :: (DFunct, Int) -> [ArgDispos] dynaFunctorArgDispositions x = case x of - ("is", 2) -> [False,True] + ("is", 2) -> [ADQuote,ADEval] -- evaluate arithmetic / math - ("exp", 1) -> [True] - ("log", 1) -> [True] + ("exp", 1) -> [ADEval] + ("log", 1) -> [ADEval] -- logic - ("and", 2) -> [True, True] - ("or", 2) -> [True, True] - ("not", 1) -> [True] - (name, arity) -> take arity $ repeat $ not.C.isAlphaNum $ head $ BU.toString name - - -- XXX - -- - -- Functors which prefer not to be evaluated -dynaFunctorSelfDispositions :: S.Set (B.ByteString,Int) -dynaFunctorSelfDispositions = S.fromList - [ ("true",0) - , ("false",0) - , ("pair",2) - ] + ("and", 2) -> [ADEval, ADEval] + ("or", 2) -> [ADEval, ADEval] + ("not", 1) -> [ADEval] + (name, arity) -> + let d = if C.isAlphaNum $ head $ BU.toString name + then ADEval + else ADQuote + in take arity $ repeat $ d + +-- XXX +-- +-- Functors which prefer not to be evaluated +dynaFunctorSelfDispositions :: (DFunct,Int) -> SelfDispos +dynaFunctorSelfDispositions x = case x of + ("true",0) -> SDQuote + ("false",0) -> SDQuote + ("pair",2) -> SDQuote + (name, arity) -> + let d = if C.isAlphaNum $ head $ BU.toString name + then SDInherit + else SDEval + in d -- | Run the normalization routine. @@ -217,11 +266,9 @@ dynaFunctorSelfDispositions = S.fromList -- Use as @runNormalize nRule runNormalize :: ReaderT ANFDict (State ANFState) a -> (a, ANFState) runNormalize = - flip runState (AS 0 M.empty M.empty) . + flip runState (AS 0 M.empty M.empty M.empty []) . flip runReaderT (AD dynaFunctorArgDispositions dynaFunctorSelfDispositions) - - -- FIXME: "str" is the same a constant str. -- TODO: ANF Normalizer should return *flat terms* so that we have type-safety @@ -246,11 +293,9 @@ runNormalize = -- -- TODO: there might too much special handling of the comma operator... -- -bs :: B.ByteString -> Doc -- PrettyPrinter doesn't like ByteStrings -bs = text . show pp ((Rule h a e result), AS {as_evals = evals, as_unifs = unifs}) = - parens $ (bs a) + parens $ (pretty a) <+> vcat [ (p h) , parens $ text "side" <+> (vcat $ map (text.show) e) , parens $ text "evals" <+> (q evals) @@ -258,9 +303,9 @@ pp ((Rule h a e result), AS {as_evals = evals, as_unifs = unifs}) = , parens $ text "result" <+> (text $ show result) ] where - p (UTerm (TFunctor fn args)) = parens $ fcat $ punctuate (text " ") $ (bs fn : (map g args)) + p (UTerm (TFunctor fn args)) = parens $ hcat $ punctuate (text " ") $ (pretty fn : (map g args)) - q x = vcat $ map (\(x,y)-> parens $ bs x <+> p y) $ M.toList x + q x = vcat $ map (\(x,y)-> parens $ pretty x <+> p y) $ M.toList x -- todo: doesn't cover annotations or Functor (will `g` ever be passed a Functor?) g (UTerm (TNumeric (Left x))) = text $ show x diff --git a/src/Dyna/Analysis/NormalizeParseSelftest.hs b/src/Dyna/Analysis/NormalizeParseSelftest.hs index f18639a..36350f2 100644 --- a/src/Dyna/Analysis/NormalizeParseSelftest.hs +++ b/src/Dyna/Analysis/NormalizeParseSelftest.hs @@ -12,26 +12,22 @@ module Dyna.Analysis.NormalizeParseSelftest where -import Text.PrettyPrint import qualified Data.ByteString as B -import Dyna.Analysis.NormalizeParse +import qualified Data.List as L +import qualified Data.Map as M +import qualified Text.Trifecta as T +import Text.PrettyPrint.Free +import Dyna.Analysis.NormalizeParse import qualified Dyna.ParserHS.Parser as P -import qualified Data.ByteString as B - - -import Dyna.Term.TTerm - -import Dyna.XXX.TrifectaTest import Dyna.ParserHS.Selftest +import Dyna.Term.TTerm +import Dyna.XXX.TrifectaTest -import qualified Data.List as L -import qualified Data.Map as M -import qualified Text.Trifecta as T -testNormTerm :: B.ByteString -> (DTerm, ANFState) -testNormTerm = runNormalize . normTerm False . unsafeParse P.dterm +testNormTerm :: Bool -> B.ByteString -> (DTerm, ANFState) +testNormTerm c = runNormalize . normTerm c . unsafeParse P.dterm testNormRule :: B.ByteString -> (DRule, ANFState) testNormRule = runNormalize . normRule . unsafeParse P.drule -- 2.50.1