From 3621dcbef7b8a81a7ba4c38204d91629ac195307 Mon Sep 17 00:00:00 2001
From: Nathaniel Wesley Filardo <nwf@cs.jhu.edu>
Date: Thu, 29 Nov 2012 00:09:30 -0500
Subject: [PATCH] First part of new NormalizeParse

---
 src/Dyna/Analysis/NormalizeParse.hs         | 221 ++++++++++++--------
 src/Dyna/Analysis/NormalizeParseSelftest.hs |  22 +-
 2 files changed, 142 insertions(+), 101 deletions(-)

diff --git a/src/Dyna/Analysis/NormalizeParse.hs b/src/Dyna/Analysis/NormalizeParse.hs
index c9b48a0..4934793 100644
--- a/src/Dyna/Analysis/NormalizeParse.hs
+++ b/src/Dyna/Analysis/NormalizeParse.hs
@@ -3,23 +3,27 @@
 --
 -- In Dyna's surface syntax, there exists both \"in-place evaluation\" and
 -- \"in-place construction\".  How do we deal with this?  Well, it's a
--- little messy:
+-- little messy.
 --
---   1. If a term is wrapped by the unary @*@ functor, it is explicitly
---   moved to an ANF evaluation and replaced by a variable.  So
---   @f(*g(X))@ rewrites as @Y is g(X), f(Y)@.
+--   1. There are explicit \"eval\" (@*@) and \"quote\" (@&@) operators
+--   which may be used to manually specify which is intended.
 --
---   2. Each functor gets a chance to specify, for each argument, that it
---   would prefer to evaluate a given position.  If so, and if the position
---   is occupied by a non-variable, non-primitive term (e.g., atom or
---   structure) whose disposition is to evaluate and which is not quoted by
---   the unary @&@ functor, then it is moved to an ANF evaluation.  If f
---   prefers to evaluate only its first position, and @g@ is disposed to
---   evaluation by default, @f(g(X),h(Y))@ rewrites as @Z is g(X),
---   f(Z,h(Y))@.
+--   2. Functors specify \"argument dispositions\", indicating whether they
+--   prefer to evaluate or build structure in each argument position.
 --
---   3. Otherwise, a non-variable, non-primitive term in an argument will
---   be interpreted as structure-building.
+--   3. Functors further specify \"self disposition\", indicating whether
+--   they 1) leave the decision to the parent, 2) prefer to build structure
+--   unless explicitly evaluated, or 3) prefer to be evaluated unless
+--   explicitly quoted.
+--
+-- In short, explicit marks are always obeyed; absent one, the functor's
+-- self disposition is obeyed; if the functor has no preference, the outer
+-- functor's argument disposition is used as a last resort.  There is,
+-- however, one important caveat: /variables/ and /primitive terms/ (e.g.
+-- numerics, strings, literal dynabases, foreign terms, ...) have self
+-- dispositions of preferring structural interpretation.  Variables may be
+-- meaningfully explicitly evaluated, with the effect of evaluating their
+-- bindings.  Attempting to evaluate a primitive is an error.
 --
 -- Note that in rules, the head is by default not evaluated (regardless of
 -- the disposition of their outer functor), while the body is interpreted as
@@ -49,6 +53,7 @@ import qualified Data.ByteString.UTF8       as BU
 import qualified Data.ByteString            as B
 import qualified Data.Map                   as M
 import qualified Data.Set                   as S
+import           Text.PrettyPrint.Free
 import qualified Text.Trifecta              as T
 
 import qualified Dyna.ParserHS.Parser       as P
@@ -57,8 +62,17 @@ import           Dyna.Term.TTerm
 
 import qualified Data.Char as C
 
-import Text.PrettyPrint
+data SelfDispos = SDInherit
+                | SDEval
+                | SDQuote
+
+data ArgDispos = ADEval
+               | ADQuote
+
+data ECSrc = ECFunctor
+           | ECExplicit
 
+type EvalCtx = (ECSrc,ArgDispos)
 
 data ANFDict = AD
   { -- | A map from (functor,arity) to a list of bits indicating whether to
@@ -68,12 +82,19 @@ data ANFDict = AD
     --
     -- XXX Stronger type desired: we'd like static assurance that the
     -- length of the list matches the arity in the key!
-    ad_arg_dispos  :: (B.ByteString,Int) -> [Bool]
+    ad_arg_dispos  :: (DFunct,Int) -> [ArgDispos]
 
-    -- | The set of functors that prefer not to be evaluated.
-  , ad_self_dispos :: S.Set (B.ByteString,Int)
+    -- | A map from (functor,arity) to self disposition.
+  , ad_self_dispos :: (DFunct,Int) -> SelfDispos
   }
 
+mergeDispositions = md
+ where
+  md SDInherit (_,d)                = d
+  md SDEval    (ECExplicit,ADQuote) = ADQuote
+  md SDEval    (_,_)                = ADEval
+  md SDQuote   (ECExplicit,ADEval)  = ADEval
+  md SDQuote   (_,_)                = ADQuote
 
 {- This stage of ANF does not actually link evaluations to
  - their semantic interpretation.  That is, we have not yet
@@ -81,12 +102,14 @@ data ANFDict = AD
  -}
 data ANFState = AS
               { as_next  :: !Int
-              , as_evals :: M.Map B.ByteString DTerm
-              , as_unifs :: M.Map B.ByteString DTerm
+              , as_evals :: M.Map DVar DTerm
+              , as_unifs :: M.Map DVar DTerm
+              , as_annot :: M.Map DVar [T.Spanned (Annotation DTerm)]
+              , as_warns :: [(B.ByteString, [T.Span])]
               }
  deriving (Show)
 
-nextVar :: (MonadState ANFState m) => String -> m B.ByteString
+nextVar :: (MonadState ANFState m) => String -> m DVar
 nextVar pfx = do
     vn  <- gets as_next
     modify (\s -> s { as_next = vn + 1 })
@@ -96,16 +119,18 @@ newEval :: (MonadState ANFState m) => String -> DTerm -> m DTerm
 newEval pfx t = do
     n   <- nextVar pfx
     evs <- gets as_evals
-    modify (\s -> s { as_evals = M.insert n t evs})
+    modify (\s -> s { as_evals = M.insert n t evs })
     return $ UVar n
 
 newUnif :: (MonadState ANFState m) => String -> DTerm -> m DTerm
 newUnif pfx t = do
     n   <- nextVar pfx
     uns <- gets as_unifs
-    modify (\s -> s { as_unifs = M.insert n t uns})
+    modify (\s -> s { as_unifs = M.insert n t uns })
     return $ UVar n
 
+newWarn :: (MonadState ANFState m) => B.ByteString -> [T.Span] -> m ()
+newWarn msg loc = modify (\s -> s { as_warns = (msg,loc):(as_warns s) })
 
 unspan :: T.Spanned P.Term -> DTerm
 unspan (P.TVar v T.:~ _)        = UVar v
@@ -117,65 +142,81 @@ unspan (P.TAnnot a t T.:~ _)    = UTerm $ TAnnot (fmap unspan a) (unspan t)
 -- | Convert a syntactic term into ANF; while here, move to a
 -- Control.Unification term representation.
 --
+-- The ANFState ensures that variables are unique; we additionally give them
+-- \"semi-meaningful\" prefixes, but these should not be relied upon.
+--
+-- XXX On second thought, we should just move to a @TermF B.ByteString
+-- Var@ representation, since we want everything flattened.
+--
 -- XXX This sheds span information entirely, which is probably not what we
 -- actually want.  Note that we're careful to keep a stack of contexts
 -- around, so we should probably do something clever like attach them to
 -- operations we extract?
 normTerm_ :: (MonadState ANFState m, MonadReader ANFDict m)
-               => Bool          -- ^ In an evaluation context?
-               -> Bool          -- ^ Unpack unifications?
+               => EvalCtx       -- ^ In an evaluation context?
                -> [T.Span]      -- ^ List of spans traversed
                -> P.Term        -- ^ Term being digested
                -> m DTerm
 
-    -- Variables don't evaluate and don't need to be moved
-normTerm_ _ _   _  (P.TVar  v)       = return $ UVar v
+-- Variables only evaluate in explicit context 
+normTerm_ c _ (P.TVar v) =
+    case c of
+       (ECExplicit,ADEval) -> newEval "_$v"
+       _                   -> return
+    $ UVar v
+
+-- Numerics get returned in-place and raise a warning if they are evaluated.
+normTerm_ c   ss  (P.TNumeric n)    = do
+    case c of
+      (ECExplicit,ADEval) -> newWarn "Ignoring request to evaluate numeric" ss
+      _                   -> return ()
+    return $ UTerm $ TNumeric n
+
+-- Quote makes the context explicitly a quoting one
+normTerm_ _   ss (P.TFunctor "&" [t T.:~ st]) = do
+    normTerm_ (ECExplicit,ADQuote) (st:ss) t
+
+-- Evaluation is a little different: in addition to forcing the context to
+-- evaluate, it must also evaluate if the context from on high is one of
+-- evaluation!
+normTerm_ c   ss (P.TFunctor "*" [t T.:~ st]) =
+    normTerm_ (ECExplicit,ADEval) (st:ss) t
+    >>= case c of
+          (_,ADEval) -> newEval "_$s"
+          _          -> return
+
+-- Annotations are stripped of their span information
+--
+-- XXX this is probably the wrong thing to do
+normTerm_ c   ss (P.TAnnot a (t T.:~ st)) = do
+    nt <- normTerm_ c (st:ss) t
+    return $ UTerm $ TAnnot (fmap unspan a) nt
 
-    -- Numerics also get returned in-place.
-normTerm_ _ _   _  (P.TNumeric n)    = return $ UTerm $ TNumeric n
+-- Functors have both top-down and bottom-up dispositions on
+-- their handling.
+normTerm_ c   ss (P.TFunctor f as) = do
 
-    -- FIXME: (nwf) Quote simply disappears having converted the context to a
-    -- non-evaluation context.
-normTerm_ _ _   ss (P.TFunctor "&" [t T.:~ st]) = do
-    normTerm_ False True (st:ss) t
+    argdispos <- asks $ flip ($) (f,length as) . ad_arg_dispos
+    normas <- mapM (\(a T.:~ s,d) -> normTerm_ (ECFunctor,d) (s:ss) a)
+                   (zip as argdispos)
 
-    -- Star forces evaluation even when the argument would prefer
-    -- to not be evaluated, thus the sort of odd "normalize in
-    -- nonevaluation context then eval" here.
-normTerm_ _ _   ss (P.TFunctor "*" [t T.:~ st]) = do
-    normTerm_ False True (st:ss) t >>= newEval "_s"
+    selfdispos <- asks $ flip ($) (f,length as) . ad_self_dispos
 
-     -- Annotations are stripped of their span information
-     --
-     -- XXX this is probably the wrong thing to do
-normTerm_ c u   ss (P.TAnnot a (t T.:~ st)) = do
-    nt <- normTerm_ c u (st:ss) t
-    return $ UTerm $ TAnnot (fmap unspan a) nt
+    let dispos = mergeDispositions selfdispos c
 
-    -- Functors have both top-down and bottom-up dispositions on
-    -- their handling.
-normTerm_ c u   ss (P.TFunctor f as) = do
-    argdispos <- asks $ flip ($) (f,length as) . ad_arg_dispos
-    normas <- mapM (\(a T.:~ s,d) -> normTerm_ d True (s:ss) a) (zip as argdispos)
-    selfdispos <- getSelfDispos
-    (case () of
-       _ | c && selfdispos -> newEval "_f"
-       _ | u               -> newUnif "_u"
-       _                   -> return)
+    (case dispos of
+        ADEval  -> newEval "_$f"
+        ADQuote -> newUnif "_$u")
      $ UTerm $ TFunctor f normas
- where
-   getSelfDispos = do
-    set <- asks $ not . S.member (f,length as) . ad_self_dispos
-    return set
-
 
 normTerm :: (MonadState ANFState m, MonadReader ANFDict m)
          => Bool               -- ^ In an evaluation context?
          -> T.Spanned P.Term   -- ^ Term to digest
          -> m DTerm
-normTerm c (t T.:~ s) = normTerm_ c False [s] t
+normTerm c (t T.:~ s) = normTerm_ (ECFunctor,if c then ADEval else ADQuote)
+                                  [s] t
 
-    -- XXX
+-- XXX
 normRule :: (MonadState ANFState m, MonadReader ANFDict m)
          => T.Spanned P.Rule   -- ^ Term to digest
          -> m DRule
@@ -188,28 +229,36 @@ normRule (P.Rule h a es r T.:~ _) = do
     nes <- mapM (normTerm True) es
     return $ Rule nh a nes nr
 
-    -- XXX
-dynaFunctorArgDispositions :: (B.ByteString, Int) -> [Bool]
+-- XXX
+dynaFunctorArgDispositions :: (DFunct, Int) -> [ArgDispos]
 dynaFunctorArgDispositions x = case x of
-    ("is", 2) -> [False,True]
+    ("is", 2) -> [ADQuote,ADEval]
     -- evaluate arithmetic / math
-    ("exp", 1) -> [True]
-    ("log", 1) -> [True]
+    ("exp", 1) -> [ADEval]
+    ("log", 1) -> [ADEval]
     -- logic
-    ("and", 2) -> [True, True]
-    ("or", 2)  -> [True, True]
-    ("not", 1) -> [True]
-    (name, arity) -> take arity $ repeat $ not.C.isAlphaNum $ head $ BU.toString name
-
-    -- XXX
-    --
-    -- Functors which prefer not to be evaluated
-dynaFunctorSelfDispositions :: S.Set (B.ByteString,Int)
-dynaFunctorSelfDispositions = S.fromList
-    [ ("true",0)
-    , ("false",0)
-    , ("pair",2)
-    ]
+    ("and", 2) -> [ADEval, ADEval]
+    ("or", 2)  -> [ADEval, ADEval]
+    ("not", 1) -> [ADEval]
+    (name, arity) ->
+       let d = if C.isAlphaNum $ head $ BU.toString name
+                then ADEval
+                else ADQuote
+       in take arity $ repeat $ d
+
+-- XXX
+--
+-- Functors which prefer not to be evaluated
+dynaFunctorSelfDispositions :: (DFunct,Int) -> SelfDispos
+dynaFunctorSelfDispositions x = case x of
+    ("true",0)   -> SDQuote
+    ("false",0)  -> SDQuote
+    ("pair",2)   -> SDQuote
+    (name, arity) ->
+       let d = if C.isAlphaNum $ head $ BU.toString name
+                then SDInherit
+                else SDEval
+       in d
 
 
 -- | Run the normalization routine.
@@ -217,11 +266,9 @@ dynaFunctorSelfDispositions = S.fromList
 -- Use as @runNormalize nRule
 runNormalize :: ReaderT ANFDict (State ANFState) a -> (a, ANFState)
 runNormalize =
-  flip runState   (AS 0 M.empty M.empty) .
+  flip runState   (AS 0 M.empty M.empty M.empty []) .
   flip runReaderT (AD dynaFunctorArgDispositions dynaFunctorSelfDispositions)
 
-
-
 -- FIXME: "str" is the same a constant str.
 
 -- TODO: ANF Normalizer should return *flat terms* so that we have type-safety
@@ -246,11 +293,9 @@ runNormalize =
 --
 --  TODO: there might too much special handling of the comma operator...
 --
-bs :: B.ByteString -> Doc  -- PrettyPrinter doesn't like ByteStrings
-bs = text . show
 
 pp ((Rule h a e result), AS {as_evals = evals, as_unifs = unifs}) =
-  parens $ (bs a)
+  parens $ (pretty a)
            <+> vcat [ (p h)
                     , parens $ text "side"   <+> (vcat $ map (text.show) e)
                     , parens $ text "evals"  <+> (q evals)
@@ -258,9 +303,9 @@ pp ((Rule h a e result), AS {as_evals = evals, as_unifs = unifs}) =
                     , parens $ text "result" <+> (text $ show result)
                     ]
   where
-    p (UTerm (TFunctor fn args)) = parens $ fcat $ punctuate (text " ") $ (bs fn : (map g args))
+    p (UTerm (TFunctor fn args)) = parens $ hcat $ punctuate (text " ") $ (pretty fn : (map g args))
 
-    q x = vcat $ map (\(x,y)-> parens $ bs x <+> p y) $ M.toList x
+    q x = vcat $ map (\(x,y)-> parens $ pretty x <+> p y) $ M.toList x
 
     -- todo: doesn't cover annotations or Functor (will `g` ever be passed a Functor?)
     g (UTerm (TNumeric (Left x))) = text $ show x
diff --git a/src/Dyna/Analysis/NormalizeParseSelftest.hs b/src/Dyna/Analysis/NormalizeParseSelftest.hs
index f18639a..36350f2 100644
--- a/src/Dyna/Analysis/NormalizeParseSelftest.hs
+++ b/src/Dyna/Analysis/NormalizeParseSelftest.hs
@@ -12,26 +12,22 @@
 
 module Dyna.Analysis.NormalizeParseSelftest where
 
-import Text.PrettyPrint
 
 import qualified Data.ByteString              as B
-import           Dyna.Analysis.NormalizeParse
+import qualified Data.List                    as L
+import qualified Data.Map                     as M
+import qualified Text.Trifecta                as T
+import           Text.PrettyPrint.Free
 
+import           Dyna.Analysis.NormalizeParse
 import qualified Dyna.ParserHS.Parser         as P
-import qualified Data.ByteString              as B
-
-
-import Dyna.Term.TTerm
-
-import           Dyna.XXX.TrifectaTest
 import           Dyna.ParserHS.Selftest
+import           Dyna.Term.TTerm
+import           Dyna.XXX.TrifectaTest
 
-import qualified Data.List     as L
-import qualified Data.Map      as M
-import qualified Text.Trifecta as T
 
-testNormTerm :: B.ByteString -> (DTerm, ANFState)
-testNormTerm = runNormalize . normTerm False . unsafeParse P.dterm
+testNormTerm :: Bool -> B.ByteString -> (DTerm, ANFState)
+testNormTerm c = runNormalize . normTerm c . unsafeParse P.dterm
 
 testNormRule :: B.ByteString -> (DRule, ANFState)
 testNormRule = runNormalize . normRule . unsafeParse P.drule
-- 
2.50.1