First part of new NormalizeParse

author Nathaniel Wesley Filardo <nwf@cs.jhu.edu>

Thu, 29 Nov 2012 05:09:30 +0000 (00:09 -0500)

committer Nathaniel Wesley Filardo <nwf@cs.jhu.edu>

Thu, 29 Nov 2012 05:09:30 +0000 (00:09 -0500)
author Nathaniel Wesley Filardo <nwf@cs.jhu.edu>
Thu, 29 Nov 2012 05:09:30 +0000 (00:09 -0500)
committer Nathaniel Wesley Filardo <nwf@cs.jhu.edu>
Thu, 29 Nov 2012 05:09:30 +0000 (00:09 -0500)
diff --git a/src/Dyna/Analysis/NormalizeParse.hs b/src/Dyna/Analysis/NormalizeParse.hs

index c9b48a0d3e110f69516bee234517c597d50d4e7f..493479349c48d33a5d820812ea2faa5766729700 100644 (file)
--- a/src/Dyna/Analysis/NormalizeParse.hs
+++ b/src/Dyna/Analysis/NormalizeParse.hs
@@ -3,23 +3,27 @@
  --
  -- In Dyna's surface syntax, there exists both \"in-place evaluation\" and
  -- \"in-place construction\".  How do we deal with this?  Well, it's a
--- little messy:
+-- little messy.
  --
---   1. If a term is wrapped by the unary @*@ functor, it is explicitly
---   moved to an ANF evaluation and replaced by a variable.  So
---   @f(*g(X))@ rewrites as @Y is g(X), f(Y)@.
+--   1. There are explicit \"eval\" (@*@) and \"quote\" (@&@) operators
+--   which may be used to manually specify which is intended.
  --
---   2. Each functor gets a chance to specify, for each argument, that it
---   would prefer to evaluate a given position.  If so, and if the position
---   is occupied by a non-variable, non-primitive term (e.g., atom or
---   structure) whose disposition is to evaluate and which is not quoted by
---   the unary @&@ functor, then it is moved to an ANF evaluation.  If f
---   prefers to evaluate only its first position, and @g@ is disposed to
---   evaluation by default, @f(g(X),h(Y))@ rewrites as @Z is g(X),
---   f(Z,h(Y))@.
+--   2. Functors specify \"argument dispositions\", indicating whether they
+--   prefer to evaluate or build structure in each argument position.
  --
---   3. Otherwise, a non-variable, non-primitive term in an argument will
---   be interpreted as structure-building.
+--   3. Functors further specify \"self disposition\", indicating whether
+--   they 1) leave the decision to the parent, 2) prefer to build structure
+--   unless explicitly evaluated, or 3) prefer to be evaluated unless
+--   explicitly quoted.
+--
+-- In short, explicit marks are always obeyed; absent one, the functor's
+-- self disposition is obeyed; if the functor has no preference, the outer
+-- functor's argument disposition is used as a last resort.  There is,
+-- however, one important caveat: /variables/ and /primitive terms/ (e.g.
+-- numerics, strings, literal dynabases, foreign terms, ...) have self
+-- dispositions of preferring structural interpretation.  Variables may be
+-- meaningfully explicitly evaluated, with the effect of evaluating their
+-- bindings.  Attempting to evaluate a primitive is an error.
  --
  -- Note that in rules, the head is by default not evaluated (regardless of
  -- the disposition of their outer functor), while the body is interpreted as
@@ -49,6 +53,7 @@ import qualified Data.ByteString.UTF8       as BU
  import qualified Data.ByteString            as B
  import qualified Data.Map                   as M
  import qualified Data.Set                   as S
+import           Text.PrettyPrint.Free
  import qualified Text.Trifecta              as T
  
  import qualified Dyna.ParserHS.Parser       as P
@@ -57,8 +62,17 @@ import           Dyna.Term.TTerm
  
  import qualified Data.Char as C
  
-import Text.PrettyPrint
+data SelfDispos = SDInherit
+                | SDEval
+                | SDQuote
+
+data ArgDispos = ADEval
+               | ADQuote
+
+data ECSrc = ECFunctor
+           | ECExplicit
  
+type EvalCtx = (ECSrc,ArgDispos)
  
  data ANFDict = AD
    { -- | A map from (functor,arity) to a list of bits indicating whether to
@@ -68,12 +82,19 @@ data ANFDict = AD
      --
      -- XXX Stronger type desired: we'd like static assurance that the
      -- length of the list matches the arity in the key!
-    ad_arg_dispos  :: (B.ByteString,Int) -> [Bool]
+    ad_arg_dispos  :: (DFunct,Int) -> [ArgDispos]
  
-    -- | The set of functors that prefer not to be evaluated.
-  , ad_self_dispos :: S.Set (B.ByteString,Int)
+    -- | A map from (functor,arity) to self disposition.
+  , ad_self_dispos :: (DFunct,Int) -> SelfDispos
    }
  
+mergeDispositions = md
+ where
+  md SDInherit (_,d)                = d
+  md SDEval    (ECExplicit,ADQuote) = ADQuote
+  md SDEval    (_,_)                = ADEval
+  md SDQuote   (ECExplicit,ADEval)  = ADEval
+  md SDQuote   (_,_)                = ADQuote
  
  {- This stage of ANF does not actually link evaluations to
   - their semantic interpretation.  That is, we have not yet
@@ -81,12 +102,14 @@ data ANFDict = AD
   -}
  data ANFState = AS
                { as_next  :: !Int
-              , as_evals :: M.Map B.ByteString DTerm
-              , as_unifs :: M.Map B.ByteString DTerm
+              , as_evals :: M.Map DVar DTerm
+              , as_unifs :: M.Map DVar DTerm
+              , as_annot :: M.Map DVar [T.Spanned (Annotation DTerm)]
+              , as_warns :: [(B.ByteString, [T.Span])]
                }
   deriving (Show)
  
-nextVar :: (MonadState ANFState m) => String -> m B.ByteString
+nextVar :: (MonadState ANFState m) => String -> m DVar
  nextVar pfx = do
      vn  <- gets as_next
      modify (\s -> s { as_next = vn + 1 })
@@ -96,16 +119,18 @@ newEval :: (MonadState ANFState m) => String -> DTerm -> m DTerm
  newEval pfx t = do
      n   <- nextVar pfx
      evs <- gets as_evals
-    modify (\s -> s { as_evals = M.insert n t evs})
+    modify (\s -> s { as_evals = M.insert n t evs })
      return $ UVar n
  
  newUnif :: (MonadState ANFState m) => String -> DTerm -> m DTerm
  newUnif pfx t = do
      n   <- nextVar pfx
      uns <- gets as_unifs
-    modify (\s -> s { as_unifs = M.insert n t uns})
+    modify (\s -> s { as_unifs = M.insert n t uns })
      return $ UVar n
  
+newWarn :: (MonadState ANFState m) => B.ByteString -> [T.Span] -> m ()
+newWarn msg loc = modify (\s -> s { as_warns = (msg,loc):(as_warns s) })
  
  unspan :: T.Spanned P.Term -> DTerm
  unspan (P.TVar v T.:~ _)        = UVar v
@@ -117,65 +142,81 @@ unspan (P.TAnnot a t T.:~ _)    = UTerm $ TAnnot (fmap unspan a) (unspan t)
  -- | Convert a syntactic term into ANF; while here, move to a
  -- Control.Unification term representation.
  --
+-- The ANFState ensures that variables are unique; we additionally give them
+-- \"semi-meaningful\" prefixes, but these should not be relied upon.
+--
+-- XXX On second thought, we should just move to a @TermF B.ByteString
+-- Var@ representation, since we want everything flattened.
+--
  -- XXX This sheds span information entirely, which is probably not what we
  -- actually want.  Note that we're careful to keep a stack of contexts
  -- around, so we should probably do something clever like attach them to
  -- operations we extract?
  normTerm_ :: (MonadState ANFState m, MonadReader ANFDict m)
-               => Bool          -- ^ In an evaluation context?
-               -> Bool          -- ^ Unpack unifications?
+               => EvalCtx       -- ^ In an evaluation context?
                 -> [T.Span]      -- ^ List of spans traversed
                 -> P.Term        -- ^ Term being digested
                 -> m DTerm
  
-    -- Variables don't evaluate and don't need to be moved
-normTerm_ _ _   _  (P.TVar  v)       = return $ UVar v
+-- Variables only evaluate in explicit context 
+normTerm_ c _ (P.TVar v) =
+    case c of
+       (ECExplicit,ADEval) -> newEval "_$v"
+       _                   -> return
+    $ UVar v
+
+-- Numerics get returned in-place and raise a warning if they are evaluated.
+normTerm_ c   ss  (P.TNumeric n)    = do
+    case c of
+      (ECExplicit,ADEval) -> newWarn "Ignoring request to evaluate numeric" ss
+      _                   -> return ()
+    return $ UTerm $ TNumeric n
+
+-- Quote makes the context explicitly a quoting one
+normTerm_ _   ss (P.TFunctor "&" [t T.:~ st]) = do
+    normTerm_ (ECExplicit,ADQuote) (st:ss) t
+
+-- Evaluation is a little different: in addition to forcing the context to
+-- evaluate, it must also evaluate if the context from on high is one of
+-- evaluation!
+normTerm_ c   ss (P.TFunctor "*" [t T.:~ st]) =
+    normTerm_ (ECExplicit,ADEval) (st:ss) t
+    >>= case c of
+          (_,ADEval) -> newEval "_$s"
+          _          -> return
+
+-- Annotations are stripped of their span information
+--
+-- XXX this is probably the wrong thing to do
+normTerm_ c   ss (P.TAnnot a (t T.:~ st)) = do
+    nt <- normTerm_ c (st:ss) t
+    return $ UTerm $ TAnnot (fmap unspan a) nt
  
-    -- Numerics also get returned in-place.
-normTerm_ _ _   _  (P.TNumeric n)    = return $ UTerm $ TNumeric n
+-- Functors have both top-down and bottom-up dispositions on
+-- their handling.
+normTerm_ c   ss (P.TFunctor f as) = do
  
-    -- FIXME: (nwf) Quote simply disappears having converted the context to a
-    -- non-evaluation context.
-normTerm_ _ _   ss (P.TFunctor "&" [t T.:~ st]) = do
-    normTerm_ False True (st:ss) t
+    argdispos <- asks $ flip ($) (f,length as) . ad_arg_dispos
+    normas <- mapM (\(a T.:~ s,d) -> normTerm_ (ECFunctor,d) (s:ss) a)
+                   (zip as argdispos)
  
-    -- Star forces evaluation even when the argument would prefer
-    -- to not be evaluated, thus the sort of odd "normalize in
-    -- nonevaluation context then eval" here.
-normTerm_ _ _   ss (P.TFunctor "*" [t T.:~ st]) = do
-    normTerm_ False True (st:ss) t >>= newEval "_s"
+    selfdispos <- asks $ flip ($) (f,length as) . ad_self_dispos
  
-     -- Annotations are stripped of their span information
-     --
-     -- XXX this is probably the wrong thing to do
-normTerm_ c u   ss (P.TAnnot a (t T.:~ st)) = do
-    nt <- normTerm_ c u (st:ss) t
-    return $ UTerm $ TAnnot (fmap unspan a) nt
+    let dispos = mergeDispositions selfdispos c
  
-    -- Functors have both top-down and bottom-up dispositions on
-    -- their handling.
-normTerm_ c u   ss (P.TFunctor f as) = do
-    argdispos <- asks $ flip ($) (f,length as) . ad_arg_dispos
-    normas <- mapM (\(a T.:~ s,d) -> normTerm_ d True (s:ss) a) (zip as argdispos)
-    selfdispos <- getSelfDispos
-    (case () of
-       _ | c && selfdispos -> newEval "_f"
-       _ | u               -> newUnif "_u"
-       _                   -> return)
+    (case dispos of
+        ADEval  -> newEval "_$f"
+        ADQuote -> newUnif "_$u")
       $ UTerm $ TFunctor f normas
- where
-   getSelfDispos = do
-    set <- asks $ not . S.member (f,length as) . ad_self_dispos
-    return set
-
  
  normTerm :: (MonadState ANFState m, MonadReader ANFDict m)
           => Bool               -- ^ In an evaluation context?
           -> T.Spanned P.Term   -- ^ Term to digest
           -> m DTerm
-normTerm c (t T.:~ s) = normTerm_ c False [s] t
+normTerm c (t T.:~ s) = normTerm_ (ECFunctor,if c then ADEval else ADQuote)
+                                  [s] t
  
-    -- XXX
+-- XXX
  normRule :: (MonadState ANFState m, MonadReader ANFDict m)
           => T.Spanned P.Rule   -- ^ Term to digest
           -> m DRule
@@ -188,28 +229,36 @@ normRule (P.Rule h a es r T.:~ _) = do
      nes <- mapM (normTerm True) es
      return $ Rule nh a nes nr
  
-    -- XXX
-dynaFunctorArgDispositions :: (B.ByteString, Int) -> [Bool]
+-- XXX
+dynaFunctorArgDispositions :: (DFunct, Int) -> [ArgDispos]
  dynaFunctorArgDispositions x = case x of
-    ("is", 2) -> [False,True]
+    ("is", 2) -> [ADQuote,ADEval]
      -- evaluate arithmetic / math
-    ("exp", 1) -> [True]
-    ("log", 1) -> [True]
+    ("exp", 1) -> [ADEval]
+    ("log", 1) -> [ADEval]
      -- logic
-    ("and", 2) -> [True, True]
-    ("or", 2)  -> [True, True]
-    ("not", 1) -> [True]
-    (name, arity) -> take arity $ repeat $ not.C.isAlphaNum $ head $ BU.toString name
-
-    -- XXX
-    --
-    -- Functors which prefer not to be evaluated
-dynaFunctorSelfDispositions :: S.Set (B.ByteString,Int)
-dynaFunctorSelfDispositions = S.fromList
-    [ ("true",0)
-    , ("false",0)
-    , ("pair",2)
-    ]
+    ("and", 2) -> [ADEval, ADEval]
+    ("or", 2)  -> [ADEval, ADEval]
+    ("not", 1) -> [ADEval]
+    (name, arity) ->
+       let d = if C.isAlphaNum $ head $ BU.toString name
+                then ADEval
+                else ADQuote
+       in take arity $ repeat $ d
+
+-- XXX
+--
+-- Functors which prefer not to be evaluated
+dynaFunctorSelfDispositions :: (DFunct,Int) -> SelfDispos
+dynaFunctorSelfDispositions x = case x of
+    ("true",0)   -> SDQuote
+    ("false",0)  -> SDQuote
+    ("pair",2)   -> SDQuote
+    (name, arity) ->
+       let d = if C.isAlphaNum $ head $ BU.toString name
+                then SDInherit
+                else SDEval
+       in d
  
  
  -- | Run the normalization routine.
@@ -217,11 +266,9 @@ dynaFunctorSelfDispositions = S.fromList
  -- Use as @runNormalize nRule
  runNormalize :: ReaderT ANFDict (State ANFState) a -> (a, ANFState)
  runNormalize =
-  flip runState   (AS 0 M.empty M.empty) .
+  flip runState   (AS 0 M.empty M.empty M.empty []) .
    flip runReaderT (AD dynaFunctorArgDispositions dynaFunctorSelfDispositions)
  
-
-
  -- FIXME: "str" is the same a constant str.
  
  -- TODO: ANF Normalizer should return *flat terms* so that we have type-safety
@@ -246,11 +293,9 @@ runNormalize =
  --
  --  TODO: there might too much special handling of the comma operator...
  --
-bs :: B.ByteString -> Doc  -- PrettyPrinter doesn't like ByteStrings
-bs = text . show
  
  pp ((Rule h a e result), AS {as_evals = evals, as_unifs = unifs}) =
-  parens $ (bs a)
+  parens $ (pretty a)
             <+> vcat [ (p h)
                      , parens $ text "side"   <+> (vcat $ map (text.show) e)
                      , parens $ text "evals"  <+> (q evals)
@@ -258,9 +303,9 @@ pp ((Rule h a e result), AS {as_evals = evals, as_unifs = unifs}) =
                      , parens $ text "result" <+> (text $ show result)
                      ]
    where
-    p (UTerm (TFunctor fn args)) = parens $ fcat $ punctuate (text " ") $ (bs fn : (map g args))
+    p (UTerm (TFunctor fn args)) = parens $ hcat $ punctuate (text " ") $ (pretty fn : (map g args))
  
-    q x = vcat $ map (\(x,y)-> parens $ bs x <+> p y) $ M.toList x
+    q x = vcat $ map (\(x,y)-> parens $ pretty x <+> p y) $ M.toList x
  
      -- todo: doesn't cover annotations or Functor (will `g` ever be passed a Functor?)
      g (UTerm (TNumeric (Left x))) = text $ show x
diff --git a/src/Dyna/Analysis/NormalizeParseSelftest.hs b/src/Dyna/Analysis/NormalizeParseSelftest.hs

index f18639aa22b723a37b25a175c21ddebc7ed5fb83..36350f2cfc5992c3fa7ce6c443337d58e918e498 100644 (file)
--- a/src/Dyna/Analysis/NormalizeParseSelftest.hs
+++ b/src/Dyna/Analysis/NormalizeParseSelftest.hs
@@ -12,26 +12,22 @@
  
  module Dyna.Analysis.NormalizeParseSelftest where
  
-import Text.PrettyPrint
  
  import qualified Data.ByteString              as B
-import           Dyna.Analysis.NormalizeParse
+import qualified Data.List                    as L
+import qualified Data.Map                     as M
+import qualified Text.Trifecta                as T
+import           Text.PrettyPrint.Free
  
+import           Dyna.Analysis.NormalizeParse
  import qualified Dyna.ParserHS.Parser         as P
-import qualified Data.ByteString              as B
-
-
-import Dyna.Term.TTerm
-
-import           Dyna.XXX.TrifectaTest
  import           Dyna.ParserHS.Selftest
+import           Dyna.Term.TTerm
+import           Dyna.XXX.TrifectaTest
  
-import qualified Data.List     as L
-import qualified Data.Map      as M
-import qualified Text.Trifecta as T
  
-testNormTerm :: B.ByteString -> (DTerm, ANFState)
-testNormTerm = runNormalize . normTerm False . unsafeParse P.dterm
+testNormTerm :: Bool -> B.ByteString -> (DTerm, ANFState)
+testNormTerm c = runNormalize . normTerm c . unsafeParse P.dterm
  
  testNormRule :: B.ByteString -> (DRule, ANFState)
  testNormRule = runNormalize . normRule . unsafeParse P.drule
author	Nathaniel Wesley Filardo <nwf@cs.jhu.edu>
	Thu, 29 Nov 2012 05:09:30 +0000 (00:09 -0500)
committer	Nathaniel Wesley Filardo <nwf@cs.jhu.edu>
	Thu, 29 Nov 2012 05:09:30 +0000 (00:09 -0500)
src/Dyna/Analysis/NormalizeParse.hs		patch \| blob \| history
src/Dyna/Analysis/NormalizeParseSelftest.hs		patch \| blob \| history