From ae03cd5d3ca636e26cc69671d84057fc8d001f9e Mon Sep 17 00:00:00 2001 From: Nathaniel Wesley Filardo Date: Thu, 6 Jun 2013 01:53:44 -0400 Subject: [PATCH] Rewrite ANF normalizer Now uses destination-passing style and a new theory of contexts. It has many more opportunities to raise warnings in bizarre cases and no longer generates quite so much cruft as before. --- src/Dyna/Analysis/ANF.hs | 384 +++++++++++++++++++------------------ src/Dyna/XXX/MonadUtils.hs | 9 + 2 files changed, 209 insertions(+), 184 deletions(-) diff --git a/src/Dyna/Analysis/ANF.hs b/src/Dyna/Analysis/ANF.hs index b29ec2c..b73b74c 100644 --- a/src/Dyna/Analysis/ANF.hs +++ b/src/Dyna/Analysis/ANF.hs @@ -16,17 +16,6 @@ -- unless explicitly evaluated, or 3) prefer to be evaluated unless -- explicitly quoted. -- --- In short, explicit marks ('ECExplicit') are always obeyed; absent one, --- ('ECFunctor') the functor's self disposition ('SDQuote' or 'SDEval') is --- obeyed; if the functor has no preference ('SDInherit'), the outer --- functor's argument disposition is used as a last resort ('ADQuote' or --- 'ADEval'). There is, however, one important caveat: /variables/ and --- /primitive terms/ (e.g. numerics, strings, literal dynabases, foreign --- terms, ...) have self dispositions of preferring structural --- interpretation. Variables may be meaningfully explicitly evaluated, with --- the effect of evaluating their bindings. Attempting to evaluate a --- primitive is an error. --- -- Note that in rules, the head is by default not evaluated (regardless of -- the disposition of their outer functor), while the body is interpreted as -- a term expression (or list of term expressions) to be evaluated. @@ -51,17 +40,6 @@ -- FIXME: "str" is the same a constant str. --- timv: should there ever be more than one side condition? shouldn't it be --- a single result variable after normalization? I see that if I use comma --- to combine my conditions I get mutliple variables but should side --- condtions be combined with comma? I was under the impression that we --- always want strong Boolean values (i.e. none of that three-values null --- stuff). --- --- It would also be nice if spans were killed... maybe there is an argument --- against this. --- - -- Header material {{{ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} @@ -78,7 +56,7 @@ module Dyna.Analysis.ANF ( normTerm, normRule, runNormalize, -- * Internals - SelfDispos(..), ArgDispos(..), ECSrc(..), EvalCtx, + SelfDispos(..), ArgDispos(..), EvalMarks, -- * Placeholders evalCruxFA, findHeadFA, r_cruxes, @@ -98,61 +76,49 @@ import qualified Data.IntMap as IM import qualified Data.Set as S -- import qualified Debug.Trace as XT import Dyna.Main.Defns +import Dyna.Main.Exception import qualified Dyna.ParserHS.Parser as P import Dyna.Term.TTerm import Dyna.Term.Normalized import Dyna.Term.SurfaceSyntax -import Dyna.XXX.DataUtils (mapInOrCons) +import Dyna.XXX.DataUtils (mapInOrCons, zipWithTails) +import Dyna.XXX.MonadUtils (timesM) -- import Dyna.Test.Trifecta -- XXX import qualified Text.Trifecta as T ------------------------------------------------------------------------}}} -- Preliminaries {{{ -data ECSrc = ECFunctor - | ECExplicit - -type EvalCtx = (ECSrc,ArgDispos) +type EvalMarks = (Int,Bool) type ANFAnnots = M.Map DVar [Annotation (T.Spanned P.Term)] type ANFWarns = [(BU.ByteString, [T.Span])] newtype ANFDict = AD { ad_dt :: DisposTab } -{- - { -- | A map from (functor,arity) to a list of bits indicating whether to - -- (True) or not to (False) evaluate that positional argument. - -- - -- XXX This isn't going to work when we get more complicated terms. - -- - -- XXX Stronger type desired: we'd like static assurance that the - -- length of the list matches the arity in the key! - ad_arg_dispos :: (DFunct,Int) -> [ArgDispos] - - -- | A map from (functor,arity) to self disposition. - , ad_self_dispos :: (DFunct,Int) -> SelfDispos - } --} - -mergeDispositions :: SelfDispos -> (ECSrc, ArgDispos) -> ArgDispos + +mergeDispositions :: EvalMarks -> SelfDispos -> ArgDispos -> Int mergeDispositions = md where - md SDInherit (_,d) = d - md SDEval (ECExplicit,ADQuote) = ADQuote - md SDEval (_,_) = ADEval - md SDQuote (ECExplicit,ADEval) = ADEval - md SDQuote (_,_) = ADQuote + md (0,True) _ _ = 0 -- Explicit "&" + md (0,False) SDQuote _ = 0 -- No marks, self-quote + md (0,False) SDInherit ADQuote = 0 -- No marks, inherit quoted + md (0,False) SDInherit ADEval = 1 -- No marks, inherit eval + md (0,False) SDEval _ = 1 -- No marks, self-eval + md (n,True) _ _ = n -- n *s followed by & + md (n,False) SDEval _ = n+1 -- n *s, self-eval + md (n,False) _ _ = n -- n *s, self-quote or eval ------------------------------------------------------------------------}}} -- Cruxes {{{ -data EvalCrux v = CCall v [v] DFunct - | CEval v v +data EvalCrux v = CCall v [v] DFunct -- ^ Known structure evaluation + | CEval v v -- ^ Indirect evaluation deriving (Eq,Ord,Show) -data UnifCrux v n = CStruct v [v] DFunct -- Known structure building - | CAssign v n -- Constant loading - | CEquals v v -- Equality constraint - | CNotEqu v v -- Disequality constraint +data UnifCrux v n = CStruct v [v] DFunct -- ^ Known structure building + | CAssign v n -- ^ Constant loading + | CEquals v v -- ^ Equality constraint + | CNotEqu v v -- ^ Disequality constraint deriving (Eq,Ord,Show) type Crux v n = Either (Int,EvalCrux v) (UnifCrux v n) @@ -199,16 +165,26 @@ nextVar pfx = do vn <- as_next_var <<%= (+1) return $ BU.fromString $ pfx ++ show vn -newEval :: (MonadState ANFState m) => String -> EVF -> m DVar -newEval pfx t = do - n <- nextVar pfx +mkFromUVar :: (MonadState ANFState m) => B.ByteString -> m B.ByteString +mkFromUVar v = if v == "_" then nextVar "_w" else return (BC.cons 'u' v) + +doEval :: (MonadState ANFState m) => EVF -> DVar -> m () +doEval t n = do ne <- as_next_eval <<%= (+1) as_ecruxes %= IM.insert ne (either (CEval n) (uncurry (flip (CCall n))) t) + +newEval :: (MonadState ANFState m) => String -> EVF -> m DVar +newEval pfx t = do + n <- nextVar pfx + doEval t n return n -newAssign :: (MonadState ANFState m) => String -> ENF -> m DVar -newAssign pfx t = +doLoadBase :: (MonadState ANFState m) => TBase -> DVar -> m () +doLoadBase n v = addUCrux (CAssign v n) + +newLoad :: (MonadState ANFState m) => String -> ENF -> m DVar +newLoad pfx t = case t of Left (NTVar v) -> return v Left (NTBase b) -> go (Left b) @@ -219,45 +195,44 @@ newAssign pfx t = addUCrux (either (CAssign n) (uncurry (flip (CStruct n))) u) return n -newAnnot :: (MonadState ANFState m) - => DVar -> Annotation (T.Spanned P.Term) -> m () -newAnnot v a = as_annot %= mapInOrCons v a - -{- -newAssignNT :: (MonadState ANFState m) => String -> NTV -> m DVar -newAssignNT _ (NTVar x) = return x -newAssignNT pfx x = newAssign pfx $ Left x --} +doStruct :: (MonadState ANFState m) => FDT -> DVar -> m () +doStruct (f,vs) v = addUCrux (CStruct v vs f) doUnif :: (MonadState ANFState m) => DVar -> DVar -> m () doUnif v w = if v == w then return () else addUCrux (CEquals v w) +doAnnot :: (MonadState ANFState m) + => Annotation (T.Spanned P.Term) -> DVar -> m () +doAnnot a v = as_annot %= mapInOrCons v a + newWarn :: (MonadState ANFState m) => B.ByteString -> [T.Span] -> m () newWarn msg loc = as_warns %= ((msg,loc):) ------------------------------------------------------------------------}}} -- Normalize a Term {{{ --- | Convert a syntactic term into ANF; while here, move to a --- flattened representation. +-- | Convert a syntactic term into ANF; while here, move to a flattened +-- representation. -- -- The ANFState ensures that variables are unique; we additionally give them --- \"semi-meaningful\" prefixes, but these should not be relied upon for +-- \"meaningful\" prefixes, but these should not be relied upon for -- anything actually meaningful (but they serve as great debugging aids!). -- While here, we stick a prefix on user variables to ensure that they are -- disjoint from the variables we generate and use internally. -- --- XXX This sheds span information entirely, which is probably not what we --- actually want. Note that we're careful to keep a stack of contexts --- around, so we should probably do something clever like attach them to --- operations we extract? +-- XXX This sheds span information entirely, except in the case of warnings, +-- which is probably not what we actually want. Note that we're careful to +-- keep a stack of contexts around, so we should probably do something +-- clever like attach them to operations we extract? normTerm_ :: (Functor m, MonadState ANFState m, MonadReader ANFDict m) - => EvalCtx -- ^ In an evaluation context? + => ArgDispos -- ^ The disposition of the outermost context + -> EvalMarks -- ^ Evaluation marks accumulated -> [T.Span] -- ^ List of spans traversed + -> Maybe DVar -- ^ Destination, if present -> P.Term -- ^ Term being digested - -> m NTV + -> m () -- Variables only evaluate in explicit context -- @@ -266,71 +241,53 @@ normTerm_ :: (Functor m, MonadState ANFState m, MonadReader ANFDict m) -- names. -- -- XXX is this the right place for that? -normTerm_ c _ (P.TVar v) = do - v' <- if v == "_" then nextVar "_w" else return (BC.cons 'u' v) - case c of - (ECExplicit,ADEval) -> NTVar `fmap` newEval "_v" (Left v') - _ -> return $ NTVar v' +normTerm_ a m _ d (P.TVar v) = do + v' <- mkFromUVar v + v'' <- timesM (newEval "_v" . Left) (mergeDispositions m SDQuote a) v' + maybe (return ()) (doUnif v'') d -- Numerics get returned in-place and raise a warning if they are evaluated. -normTerm_ c ss (P.TBase x@(TNumeric _)) = do - case c of - (ECExplicit,ADEval) -> newWarn "Ignoring request to evaluate numeric" ss - _ -> return () - return $ NTBase x +normTerm_ _ m ss d (P.TBase x@(TNumeric _)) = do + case m of + (_,True) -> newWarn "Quoting numerics is unnecessary" ss + (0,False) -> return () + (_,False) -> newWarn "Ignoring request to evaluate numeric" ss + maybe (newWarn "Numeric literal is discarded" ss) + (doLoadBase x) + d -- Strings too -normTerm_ c ss (P.TBase x@(TString _)) = do - case c of - (ECExplicit,ADEval) -> newWarn "Ignoring request to evaluate string" ss - _ -> return () - return $ NTBase x - --- Annotations --- --- XXX this is probably the wrong thing to do -normTerm_ c ss (P.TAnnot a (t T.:~ st)) = do - v <- normTerm_ c (st:ss) t >>= newAssign "_a" . Left - newAnnot v a - return $ NTVar v - --- Quote makes the context explicitly a quoting one -normTerm_ _ ss (P.TFunctor f [t T.:~ st]) | f == dynaQuoteOper = do - normTerm_ (ECExplicit,ADQuote) (st:ss) t - --- Evaluation is a little different: in addition to forcing the context to --- evaluate, it must also evaluate if the context from on high is one of --- evaluation! --- --- XXX This is not right. Consider ":-dispos &pair(*,*)." If I want to --- evaluate a pair/2 in evaluation context, then I need to write &(*pair(1,2)), --- so that the & suppresses the context's influence and * overrides the --- pair's disposition.... actually that's true of variables, too. f(*X) --- does not mean what it should in the new syntax. -normTerm_ c ss (P.TFunctor f [t T.:~ st]) | f == dynaEvalOper = - normTerm_ (ECExplicit,ADEval) (st:ss) t - >>= \nt -> case c of - (_,ADEval) -> case nt of - NTVar v -> NTVar `fmap` newEval "_s" (Left v) - NTBase b -> do - newWarn "Ignoring deref of literal" ss - return $ NTBase b - _ -> return nt +normTerm_ _ m ss d (P.TBase x@(TString _)) = do + case m of + (_,True) -> newWarn "Quoting strings is unnecessary" ss + (0,False) -> return () + (_,False) -> newWarn "Ignoring request to evaluate string" ss + maybe (newWarn "String literal is discarded" ss) + (doLoadBase x) + d -- "is/2" is sort of exciting. We normalize the second argument in an -- evaluation context and the first in a quoted context. Then, if the -- result is quoted, we simply build up some structure. If it's evaluated, -- on the other hand, we reduce it to a unification of these two pieces and -- return (XXX what ought to be) a unit. -normTerm_ c ss (P.TFunctor f [x T.:~ sx, v T.:~ sv]) | f == dynaEvalAssignOper = do - nx <- normTerm_ (ECFunctor, ADQuote) (sx:ss) x >>= newAssign "_i" . Left - nv <- normTerm_ (ECFunctor, ADEval ) (sv:ss) v >>= newAssign "_i" . Left - case c of - (_,ADEval) -> do - _ <- doUnif nx nv - NTVar `fmap` newAssign "_i" (Right (dynaUnitTerm,[])) - _ -> do - NTVar `fmap` newAssign "_i" (Right (dynaEvalAssignOper,[nx,nv])) +normTerm_ a m ss d (P.TFunctor f [x T.:~ sx, v T.:~ sv]) + | f == dynaEvalAssignOper = do + nx <- nextVar "_i" + normTerm_ ADQuote (0,False) (sx:ss) (Just nx) x + + nv <- nextVar "_j" + normTerm_ ADEval (0,False) (sv:ss) (Just nv) v + + case (d, mergeDispositions m SDInherit a) of + (Nothing, 0) -> newWarn "Quoted functor discarded" ss + (Just d', 0) -> doStruct (dynaEvalAssignOper,[nx,nv]) d' + (Nothing, 1) -> doUnif nx nv + (_ , n) -> do + _ <- doUnif nx nv + t <- newLoad "_x" (Right (dynaUnitTerm,[])) + r <- timesM (newEval "_x" . Left) (n-1) t + maybe (return ()) (doUnif r) d -- ",/2", "whenever/2", and "for/2" are also reserved words of the language -- and get handled here. @@ -340,58 +297,117 @@ normTerm_ c ss (P.TFunctor f [x T.:~ sx, v T.:~ sv]) | f == dynaEvalAssignOper -- reason to make the backend know about them since that's also wrong! -- -- XXX XREF:ANFRESERVED -normTerm_ (_,ADEval) ss (P.TFunctor f [i T.:~ si, r T.:~ sr]) | f == dynaConjOper = do - ni <- normTerm_ (ECFunctor, ADEval) (si:ss) i >>= newAssign "_e" . Left - nv <- normTerm_ (ECFunctor, ADEval) (sr:ss) r >>= newAssign "_e" . Left - - t' <- newAssign "_e" (Right (dynaUnitTerm,[])) - _ <- doUnif ni t' - return $ NTVar nv - -normTerm_ c@(_,ADEval) ss (P.TFunctor f [sr, si]) | f `elem` dynaRevConjOpers = - normTerm_ c ss (P.TFunctor dynaConjOper [si,sr]) - --- Functors have both top-down and bottom-up dispositions on --- their handling. -normTerm_ c ss (P.TFunctor f as) = do - - argdispos <- asks $ flip dt_argEvalDispos (f,length as) . ad_dt - normas <- mapM (\(a T.:~ s,d) -> normTerm_ (ECFunctor,d) (s:ss) a) - (zip as argdispos) - - -- Convert everything to DVars and, while here, do a linearization - -- pass to strip duplicate vars out. We need pattern matching to be - -- linear-with-checks in later pipeline stages so that we can, for - -- example, correctly reject updates that are not the right shape. - normas' <- let delin (vs,r) x = - case x of - NTVar v | not (v `elem` vs) -> do - return (v:vs,v:r) - NTVar v -> do - v' <- nextVar "_x" - doUnif v v' - return (vs,v':r) - _ -> do - v' <- newAssign "_x" (Left x) - return (vs,v':r) - in (reverse . snd) `fmap` foldM delin ([],[]) normas - - selfdispos <- asks $ flip dt_selfEvalDispos (f,length as) . ad_dt - - let dispos = mergeDispositions selfdispos c - - fmap NTVar $ - case dispos of - ADEval -> newEval "_f" . Right - ADQuote -> newAssign "_u" . Right - $ (f,normas') +normTerm_ a m ss d (P.TFunctor f [i T.:~ si, r T.:~ sr]) + | f == dynaConjOper = + normConjunct ss f i si r sr (mergeDispositions m SDInherit a) d False + +normTerm_ a m ss d (P.TFunctor f [r T.:~ sr, i T.:~ si]) + | f `elem` dynaRevConjOpers = + normConjunct ss f i si r sr (mergeDispositions m SDInherit a) d True + +-- Annotations +-- +-- XXX this is probably the wrong thing to do +normTerm_ a m ss d (P.TAnnot an (t T.:~ st)) = do + normTerm_ a m (st:ss) d t + maybe (newWarn "Annotation discarded" ss) + (doAnnot an) + d + +-- Quote makes the context explicitly a quoting one +normTerm_ a (n,q) ss d (P.TFunctor f [t T.:~ st]) | f == dynaQuoteOper = do + when q $ newWarn "Superfluous quotation marker" ss + normTerm_ a (n,True) (st:ss) d t + +-- Evaluation just bumps the number of evaluations and resets the quoted +-- flag to False +normTerm_ a (n,_) ss d (P.TFunctor f [t T.:~ st]) | f == dynaEvalOper = + normTerm_ a (n+1,False) (st:ss) d t + +-- Ah, the "boring" case of functors! +normTerm_ ctx m ss d (P.TFunctor f as) = do + + -- Look up argument disposition + argdispos <- asks $ flip dt_argEvalDispos (f,length as) . ad_dt + + -- Conjure up destinations for all arguments, trying to preserve the + -- original variables here if we can, but doing a linearization + -- pass while we're at it. + argvars <- flip evalStateT S.empty $ forM as $ \a -> do + already <- get + case a of + P.TVar avv T.:~ _ + | not (avv `S.member` already) + -> modify (S.insert avv) >> lift (mkFromUVar avv) + _ -> lift (nextVar "_a") + + -- Normalize all arguments appropriately + mapM_ (\(a T.:~ s,(v,c)) -> normTerm_ c (0,False) (s:ss) (Just v) a) + ( zipWithTails (,) panic panic as + $ zipWithTails (,) panic panic argvars argdispos) + + -- Look up self disposition + selfdispos <- asks $ flip dt_selfEvalDispos (f,length as) . ad_dt + + -- And bring everything together + case (mergeDispositions m selfdispos ctx, d) of + (0,Nothing) -> newWarn "Quoted functor discarded" ss + (0,Just d') -> doStruct (f,argvars) d' + (1,Just d') -> doEval (Right (f,argvars)) d' + (n,_ ) -> do + t <- newEval "_e" (Right (f,argvars)) + ct <- timesM (newEval "_x" . Left) (n-1) t + maybe (return ()) (doUnif ct) d + + where + panic = dynacPanic "Length mismatch in disposition table while normalizing" + + + +normConjunct :: (Functor m, MonadReader ANFDict m, MonadState ANFState m) + => [T.Span] + -> DFunct + -> P.Term -> T.Span -> P.Term -> T.Span + -> Int + -> Maybe DVar + -> Bool + -> m () +normConjunct ss f i si r sr n d rev = + case (n,d) of + (0,Nothing) -> do + go Nothing Nothing + newWarn "Quoted functor discarded" ss + (0,Just d') -> do + di <- nextVar "_b" + dr <- nextVar "_c" + go (Just di) (Just dr) + doStruct (selfstruct di dr) d' + (1,Just d') -> go Nothing (Just d') + (_,_ ) -> do + di <- nextVar "_b" + dr <- nextVar "_c" + go (Just di) (Just dr) + t <- newLoad "_x" (Right $ selfstruct di dr) + ct <- timesM (newEval "_x" . Left) (n-1) t + maybe (return ()) (doUnif ct) d + + + where + selfstruct ni nr = (f,if rev then [nr,ni] else [ni,nr]) + + go di dr = do + normTerm_ ADEval (0,False) (si:ss) di i + normTerm_ ADEval (0,False) (sr:ss) dr r + normTerm :: (Functor m, MonadState ANFState m, MonadReader ANFDict m) - => Bool -- ^ In an evaluation context? + => ArgDispos -- ^ In an evaluation context? -> T.Spanned P.Term -- ^ Term to digest - -> m NTV -normTerm c (t T.:~ s) = normTerm_ (ECFunctor,if c then ADEval else ADQuote) - [s] t + -> m DVar +normTerm a (t T.:~ s) = do + v <- nextVar "_t" + normTerm_ a (0,False) [s] (Just v) t + return v ------------------------------------------------------------------------}}} -- Normalize a Rule {{{ @@ -412,8 +428,8 @@ normRule :: (RuleIx, DisposTab, T.Spanned P.Rule) -- ^ Rule to digest -> (Rule, ANFWarns) normRule (i, dt, P.Rule h a r T.:~ sp) = let (ru,s) = runNormalize dt $ do - nh <- normTerm False h >>= newAssign "_h" . Left - nr <- normTerm True r >>= newAssign "_r" . Left + nh <- normTerm ADQuote h + nr <- normTerm ADEval r return $ Rule i nh a nr sp in (ru (s^.as_annot) (s^.as_ucruxes) (s^.as_ecruxes),s^.as_warns) diff --git a/src/Dyna/XXX/MonadUtils.hs b/src/Dyna/XXX/MonadUtils.hs index 9d1c069..db9aaa4 100644 --- a/src/Dyna/XXX/MonadUtils.hs +++ b/src/Dyna/XXX/MonadUtils.hs @@ -1,6 +1,8 @@ module Dyna.XXX.MonadUtils( -- * Data utilities generalizing 'Dyna.XXX.DataUtils' mapForallM, mapExistsM, setForallM, setExistsM, + -- * Control-flow + timesM, -- * Logic utilities andM, andM1, orM, orM1, allM, anyM, -- * MonadState utilities @@ -34,6 +36,13 @@ allM = foldM andM1 True anyM :: Monad m => [m Bool] -> m Bool anyM = foldM orM1 False +timesM :: Monad m => (a -> m a) -> Int -> a -> m a +timesM f = go + where + go n _ | n < 0 = error "timesM negative iteration count" + go 0 a = return a + go n a = f a >>= go (n-1) + mapForallM, mapExistsM :: (Monad m) => (k -> v -> m Bool) -> M.Map k v -> m Bool mapForallM p m = M.foldrWithKey (\k v -> (andM $ p k v)) (return True ) m -- 2.50.1