From 4f1ccc02e57f5029ca8e6ec969e789b1cb61bd7a Mon Sep 17 00:00:00 2001
From: Nathaniel Wesley Filardo <nwf@cs.jhu.edu>
Date: Tue, 4 Dec 2012 15:51:25 -0500
Subject: [PATCH] Pretty up the frontend code a bit

---
 src/Dyna/Analysis/NormalizeParse.hs         | 162 ++++++++++++--------
 src/Dyna/Analysis/NormalizeParseSelftest.hs |   7 +-
 src/Dyna/ParserHS/Parser.hs                 |  83 +++++-----
 src/Dyna/ParserHS/Selftest.hs               |   9 +-
 src/Dyna/Term/TTerm.hs                      |   1 +
 5 files changed, 152 insertions(+), 110 deletions(-)

diff --git a/src/Dyna/Analysis/NormalizeParse.hs b/src/Dyna/Analysis/NormalizeParse.hs
index 57441cd..59a266b 100644
--- a/src/Dyna/Analysis/NormalizeParse.hs
+++ b/src/Dyna/Analysis/NormalizeParse.hs
@@ -38,13 +38,40 @@
 -- @is(X,Y) :- X = *Y.@.  Is that something we should be normalizing out
 -- here or should be waiting for some further unfolding optimization phase?
 
+-- FIXME: "str" is the same a constant str.
+
+-- TODO: ANF Normalizer should return *flat terms* so that we have type-safety
+-- can a lint checker can verify we have exhaustive pattern matching... etc.
+
+--     timv: should there ever be more than one side condition? shouldn't it be
+--     a single result variable after normalization? I see that if I use comma
+--     to combine my conditions I get mutliple variables but should side
+--     condtions be combined with comma? I was under the impression that we
+--     always want strong Boolean values (i.e. none of that three-values null
+--     stuff).
+--
+--     it might be nice if terms came in with a type that verified that they are
+--     "flat term" -- they've been normalized.
+--
+--     It would also be nice if spans were killed... maybe there is an argument
+--     against this.
+--
+--     ANF Rule, `result` always the name of a variable -- it would be nice for
+--     its type were string in that case. Similarly, side conditions are always
+--     variables.
+--
+--  TODO: there might too much special handling of the comma operator...
+
+
 -- Header material                                                      {{{
 {-# LANGUAGE FlexibleContexts #-}
 {-# LANGUAGE FlexibleInstances #-}
 {-# LANGUAGE NoMonomorphismRestriction #-}
 {-# LANGUAGE OverloadedStrings #-}
 
-module Dyna.Analysis.NormalizeParse where
+module Dyna.Analysis.NormalizeParse (
+    ANFState, normTerm, normRule, runNormalize, printANF
+) where
 
 import           Control.Monad.Reader
 import           Control.Monad.State
@@ -52,7 +79,6 @@ import           Control.Unification
 import qualified Data.ByteString.UTF8       as BU
 import qualified Data.ByteString            as B
 import qualified Data.Map                   as M
-import qualified Data.Set                   as S
 import           Text.PrettyPrint.Free
 import qualified Text.Trifecta              as T
 
@@ -62,6 +88,9 @@ import           Dyna.Term.TTerm
 
 import qualified Data.Char as C
 
+------------------------------------------------------------------------}}}
+-- Preliminaries                                                        {{{
+
 data SelfDispos = SDInherit
                 | SDEval
                 | SDQuote
@@ -88,6 +117,7 @@ data ANFDict = AD
   , ad_self_dispos :: (DFunct,Int) -> SelfDispos
   }
 
+mergeDispositions :: SelfDispos -> (ECSrc, ArgDispos) -> ArgDispos
 mergeDispositions = md
  where
   md SDInherit (_,d)                = d
@@ -136,9 +166,46 @@ newWarn msg loc = modify (\s -> s { as_warns = (msg,loc):(as_warns s) })
 unspan :: T.Spanned P.Term -> DTerm
 unspan (P.TVar v T.:~ _)        = UVar v
 unspan (P.TNumeric v T.:~ _)    = UTerm $ TNumeric v
+unspan (P.TString v T.:~ _)     = UTerm $ TString v
 unspan (P.TFunctor a as T.:~ _) = UTerm $ TFunctor a $ map unspan as
 unspan (P.TAnnot a t T.:~ _)    = UTerm $ TAnnot (fmap unspan a) (unspan t)
 
+------------------------------------------------------------------------}}}
+-- Disposition computations                                             {{{
+
+-- XXX These should be read from declarations
+dynaFunctorArgDispositions :: (DFunct, Int) -> [ArgDispos]
+dynaFunctorArgDispositions x = case x of
+    ("is", 2)  -> [ADQuote,ADEval]
+    -- evaluate arithmetic / math
+    ("exp", 1) -> [ADEval]
+    ("log", 1) -> [ADEval]
+    -- logic
+    ("and", 2) -> [ADEval, ADEval]
+    ("or", 2)  -> [ADEval, ADEval]
+    ("not", 1) -> [ADEval]
+    (name, arity) ->
+       -- If it starts with a nonalpha, it prefers to evaluate arguments
+       let d = if C.isAlphaNum $ head $ BU.toString name
+                then ADQuote
+                else ADEval
+       in take arity $ repeat $ d
+
+-- XXX These should be read from declarations
+dynaFunctorSelfDispositions :: (DFunct,Int) -> SelfDispos
+dynaFunctorSelfDispositions x = case x of
+    ("true",0)   -> SDQuote
+    ("false",0)  -> SDQuote
+    ("pair",2)   -> SDQuote
+    (name, _) ->
+       -- If it starts with a nonalpha, it prefers to evaluate
+       let d = if C.isAlphaNum $ head $ BU.toString name
+                then SDInherit
+                else SDEval
+       in d
+
+------------------------------------------------------------------------}}}
+-- Normalize a Term                                                     {{{
 
 -- | Convert a syntactic term into ANF; while here, move to a
 -- Control.Unification term representation.
@@ -163,7 +230,7 @@ normTerm_ :: (MonadState ANFState m, MonadReader ANFDict m)
 --
 -- While here, replace bare underscores with unique names.
 -- XXX is this the right place for that?
-normTerm_ c _ t@(P.TVar v) = do
+normTerm_ c _ (P.TVar v) = do
     v' <- if v == "_" then nextVar "_$w" else return v
     case c of
        (ECExplicit,ADEval) -> newEval "_$v"
@@ -177,6 +244,13 @@ normTerm_ c   ss  (P.TNumeric n)    = do
       _                   -> return ()
     return $ UTerm $ TNumeric n
 
+-- Strings too
+normTerm_ c   ss  (P.TString s)    = do
+    case c of
+      (ECExplicit,ADEval) -> newWarn "Ignoring request to evaluate string" ss
+      _                   -> return ()
+    return $ UTerm $ TString s
+
 -- Quote makes the context explicitly a quoting one
 normTerm_ _   ss (P.TFunctor "&" [t T.:~ st]) = do
     normTerm_ (ECExplicit,ADQuote) (st:ss) t
@@ -221,52 +295,22 @@ normTerm :: (MonadState ANFState m, MonadReader ANFDict m)
 normTerm c (t T.:~ s) = normTerm_ (ECFunctor,if c then ADEval else ADQuote)
                                   [s] t
 
+------------------------------------------------------------------------}}}
+-- Normalize a Rule                                                     {{{
+
 -- XXX
 normRule :: (MonadState ANFState m, MonadReader ANFDict m)
          => T.Spanned P.Rule   -- ^ Term to digest
          -> m DRule
-normRule (P.Fact t T.:~ _) = do
-    nt <- normTerm False t
-    return $ Rule nt ":-" [] (UTerm $ TFunctor "true" [])
 normRule (P.Rule h a es r T.:~ _) = do
     nh  <- normTerm False h
     nr  <- normTerm True  r >>= newUnif "_$r"
     nes <- mapM (normTerm True) es
     return $ Rule nh a nes nr
 
--- XXX
-dynaFunctorArgDispositions :: (DFunct, Int) -> [ArgDispos]
-dynaFunctorArgDispositions x = case x of
-    ("is", 2)  -> [ADQuote,ADEval]
-    -- evaluate arithmetic / math
-    ("exp", 1) -> [ADEval]
-    ("log", 1) -> [ADEval]
-    -- logic
-    ("and", 2) -> [ADEval, ADEval]
-    ("or", 2)  -> [ADEval, ADEval]
-    ("not", 1) -> [ADEval]
-    (name, arity) ->
-       -- If it starts with a nonalpha, it prefers to evaluate arguments
-       let d = if C.isAlphaNum $ head $ BU.toString name
-                then ADQuote
-                else ADEval
-       in take arity $ repeat $ d
-
--- XXX
---
--- Functors which prefer not to be evaluated
-dynaFunctorSelfDispositions :: (DFunct,Int) -> SelfDispos
-dynaFunctorSelfDispositions x = case x of
-    ("true",0)   -> SDQuote
-    ("false",0)  -> SDQuote
-    ("pair",2)   -> SDQuote
-    (name, arity) ->
-       -- If it starts with a nonalpha, it prefers to evaluate
-       let d = if C.isAlphaNum $ head $ BU.toString name
-                then SDInherit
-                else SDEval
-       in d
 
+------------------------------------------------------------------------}}}
+-- Run the normalizer                                                   {{{
 
 -- | Run the normalization routine.
 --
@@ -276,34 +320,11 @@ runNormalize =
   flip runState   (AS 0 M.empty M.empty M.empty []) .
   flip runReaderT (AD dynaFunctorArgDispositions dynaFunctorSelfDispositions)
 
--- FIXME: "str" is the same a constant str.
-
--- TODO: ANF Normalizer should return *flat terms* so that we have type-safety
--- can a lint checker can verify we have exhaustive pattern matching... etc.
+------------------------------------------------------------------------}}}
+-- Pretty Printer                                                       {{{
 
---     timv: should there ever be more than one side condition? shouldn't it be
---     a single result variable after normalization? I see that if I use comma
---     to combine my conditions I get mutliple variables but should side
---     condtions be combined with comma? I was under the impression that we
---     always want strong Boolean values (i.e. none of that three-values null
---     stuff).
---
---     it might be nice if terms came in with a type that verified that they are
---     "flat term" -- they've been normalized.
---
---     It would also be nice if spans were killed... maybe there is an argument
---     against this.
---
---     ANF Rule, `result` always the name of a variable -- it would be nice for
---     its type were string in that case. Similarly, side conditions are always
---     variables.
---
---  TODO: there might too much special handling of the comma operator...
---
-
-valign = align.vcat
-
-pp ((Rule h a e result), AS {as_evals = evals, as_unifs = unifs}) =
+printANF :: (DRule, ANFState) -> Doc e
+printANF ((Rule h a e result), AS {as_evals = evals, as_unifs = unifs}) =
   parens $ (pretty a)
            <+> valign [ (p h)
                       , parens $ text "side"   <+> (valign $ map (text.show) e)
@@ -312,9 +333,14 @@ pp ((Rule h a e result), AS {as_evals = evals, as_unifs = unifs}) =
                       , parens $ text "result" <+> (p result)
                       ]
   where
-    p (UTerm (TFunctor fn args)) = parens $ hcat $ punctuate (text " ") $ (pretty fn : (map p args))
-    p (UTerm (TNumeric (Left x))) = text $ show x
+    valign = align.vcat
+
+    p (UTerm (TFunctor fn args))   = parens $ hcat $ punctuate (text " ")
+                                     $ (pretty fn : (map p args))
+    p (UTerm (TNumeric (Left x)))  = text $ show x
     p (UTerm (TNumeric (Right x))) = text $ show x
+    p (UTerm (TString s))          = text $ show s
+    p (UTerm (TAnnot _ t))         = p t -- XXX
     p (UVar x) = pretty x
 
-    q x = valign $ map (\(x,y)-> parens $ pretty x <+> p y) $ M.toList x
+    q x = valign $ map (\(y,z)-> parens $ pretty y <+> p z) $ M.toList x
diff --git a/src/Dyna/Analysis/NormalizeParseSelftest.hs b/src/Dyna/Analysis/NormalizeParseSelftest.hs
index 36350f2..6d05ee8 100644
--- a/src/Dyna/Analysis/NormalizeParseSelftest.hs
+++ b/src/Dyna/Analysis/NormalizeParseSelftest.hs
@@ -33,9 +33,8 @@ testNormRule :: B.ByteString -> (DRule, ANFState)
 testNormRule = runNormalize . normRule . unsafeParse P.drule
 
 
--- XXX fix periods, parser thinks it's an infix op and fails.
 e1 = testNormRule "f(X)."
-e2 = testNormRule "f(X) := 1." -- does not work
+e2 = testNormRule "f(X) := 1."
 
 t1 = testNormRule "f(X) max= g(X) + h(X,X)"
 t2 = testNormRule "f(X, g(I)) += (g(I, h(X)) + 10)^2"
@@ -46,13 +45,13 @@ t4 = unsafeParse P.dlines e4
 -- hideous monster rule
 e3 = "f(X,Y) += (g(X,\"str\",d) - h(X,X,Y) - c)^2 + f(Y,Z)/exp(3.0) whenever ?c, (d < 10), e(f(h(X)), g(X))"
 t3 = testNormRule e3
-p3 = pp $ t3
+p3 = printANF $ t3
 
 
 normalizeFile file = do
     contents <- B.readFile file
     writeFile (file ++ ".anf")
-              (show $ vcat (map (\(P.LRule x T.:~ _) -> pp $ runNormalize $ normRule x)
+              (show $ vcat (map (\(P.LRule x T.:~ _) -> printANF $ runNormalize $ normRule x)
                                 (unsafeParse P.dlines contents))
                       <> text "\n") -- add newline at end of file...
     return ()
diff --git a/src/Dyna/ParserHS/Parser.hs b/src/Dyna/ParserHS/Parser.hs
index cc448ce..24e32ee 100644
--- a/src/Dyna/ParserHS/Parser.hs
+++ b/src/Dyna/ParserHS/Parser.hs
@@ -15,8 +15,11 @@
 --
 --   * Doesn't handle shared subgoals ("whenever ... { ... }")
 --
---   * Don't end numerics with ., even if it's the end-of-rule marker;
---   put a space first.
+--   * Doesn't understand nullary star for gensym correctly
+--      (it's a available in term context but not texpr context;
+--      this depends on an upstream fix in Text.Parser.Expression.
+--      But: I am not worried about it since we don't handle gensyms
+--      anywhere else in the pipeline yet)
 
 --   Header material                                                      {{{
 
@@ -65,10 +68,7 @@ data Term = TFunctor !B.ByteString
 --   explicit about the head being a term (though that's not an expressivity
 --   concern -- just use the parenthesized texpr case) so that there is no
 --   risk of parsing ambiguity.
---
---   XXX The span on Fact is a little silly
-data Rule = Fact (Spanned Term)
-          | Rule !(Spanned Term) !B.ByteString ![Spanned Term] !(Spanned Term)
+data Rule = Rule !(Spanned Term) !B.ByteString ![Spanned Term] !(Spanned Term)
  deriving (Eq,Show)
 
 --   XXX The span on LRule is a little silly
@@ -193,6 +193,9 @@ atom =     liftA BU.fromString stringLiteralSQ
 ------------------------------------------------------------------------}}}
 -- Terms and term expressions                                           {{{
 
+nullaryStar :: DeltaParsing m => m (Spanned Term)
+nullaryStar = spanned $ flip TFunctor [] <$> (bsf $ string "*")
+
 term :: DeltaParsing m => m (Spanned Term)
 term  = token $ choice
       [       parens texpr
@@ -207,7 +210,7 @@ term  = token $ choice
       , try $ spanned $ flip TFunctor [] <$> atom
                       <* (notFollowedBy $ char '(')
 
-      , try $ spanned $ flip TFunctor [] <$> (bsf $ string "*")
+      , try $ nullaryStar
       ,       spanned $ parenfunc
       ]
  where
@@ -218,33 +221,7 @@ term  = token $ choice
 
   mkta ty te = TAnnot (AnnType ty) te
 
--- XXX I remember now why we didn't handle ',' as an operator: if it were,
--- we'd have no way of distinguishing between @f(a,b)@ as
---
---   > TFunctor "f" [TFunctor "a" [] :~ _, TFunctor "b" [] :~ _]
---
--- and
---
---   > TFunctor "f" [TFunctor "," [TFunctor "a" [] :~ _, TFunctor "b" [] :~ _] :~ _]
---
--- We can fix this, but it means that we should have a separate expression
--- parser for contexts where "comma means argument separation" and "comma
--- means evaluation separator".  I don't yet know how I feel about
--- the "whenever" (and "is"?) operator(s) being available in the former table.
-
--- XXX right now all binops are at equal precedence and left-associative;
--- that's wrong.
-texpr :: DeltaParsing m => m (Spanned Term)
-texpr = buildExpressionParser etable term <?> "Expression"
- where
-  etable = [ [ Prefix $ uf (spanned $ bsf $ symbol "new") ]
-           , [ Prefix $ uf (spanned $ bsf $ ident dynaPfxOperStyle)        ]
-           , [ Infix  (bf (spanned $ bsf $ ident dynaOperStyle)) AssocLeft ]
-           , [ Infix  (bf (spanned $ bsf $ dotOper)) AssocRight ]
-           , [ Infix  (bf (spanned $ bsf $ symbol "is")) AssocNone ]
-           ]
-
--- The dot operator is required to have not-a-space following (to avoid
+-- | The dot operator is required to have not-a-space following (to avoid
 -- confusion with the end-of-rule marker, which is taken to be "dot space"
 -- or "dot eof").
 --
@@ -252,6 +229,8 @@ texpr = buildExpressionParser etable term <?> "Expression"
 dotAny :: CharParsing m => m Char
 dotAny  = char '.' <* satisfy (not . isSpace)
 
+-- | A "dot operator" is a dot followed immediately by something that looks
+-- like a typical operator.
 dotOper :: (Monad m, TokenParsing m) => m [Char]
 dotOper = try (lookAhead dotAny *> identNL dynaDotOperStyle)
 
@@ -269,6 +248,40 @@ bf f = do
   (x:~spx)  <- f
   pure (\a@(_:~spa) b@(_:~spb) -> (TFunctor x [a,b]):~(spa <> spx <> spb))
 
+-- | The basic expression table
+--
+-- XXX right now all binops are at equal precedence and left-associative;
+-- that's wrong.
+--
+-- XXX I remember now why we didn't handle ',' as an operator: if it were,
+-- we'd have no way of distinguishing between @f(a,b)@ as
+--
+--   > TFunctor "f" [TFunctor "a" [] :~ _, TFunctor "b" [] :~ _]
+--
+-- and
+--
+--   > TFunctor "f" [TFunctor "," [TFunctor "a" [] :~ _, TFunctor "b" [] :~ _] :~ _]
+--
+-- We can fix this, but it means that we should have a separate expression
+-- parser for contexts where "comma means argument separation" and "comma
+-- means evaluation separator".  I don't yet know how I feel about
+-- the "whenever" (and "is"?) operator(s) being available in the former table.
+termETable = [ [ Prefix $ uf (spanned $ bsf $ symbol "new") ]
+             , [ Prefix $ uf (spanned $ bsf $ ident dynaPfxOperStyle)        ]
+             , [ Infix  (bf (spanned $ bsf $ ident dynaOperStyle)) AssocLeft ]
+             , [ Infix  (bf (spanned $ bsf $ dotOper)) AssocRight ]
+                -- XXX "is" belongs only in the full expression parser, not
+                -- in the term table
+             , [ Infix  (bf (spanned $ bsf $ symbol "is")) AssocNone ]
+             ]
+
+-- fullETable = termETable ++
+--              [ [ Infix  (bf (spanned $ bsf $ symbol "is")) AssocNone ]
+--             , [ Infix  (bf (spanned $ bsf $ symbol ",")) AssocRight ]
+--             ]
+
+texpr :: DeltaParsing m => m (Spanned Term)
+texpr = buildExpressionParser termETable term <?> "Expression"
 
 dterm, dtexpr :: DeltaParsing m => m (Spanned Term)
 dterm  = unDL term
@@ -301,7 +314,7 @@ rule = choice [
                           <*> texpr)
 
                -- HEAD .
-             , Fact   <$> term
+             , (\h@(_ :~ s) -> Rule h ":-" [] $ (TFunctor "true" [] :~ s)) <$> term
              ]
        <* optional (char '.')
  where
diff --git a/src/Dyna/ParserHS/Selftest.hs b/src/Dyna/ParserHS/Selftest.hs
index 374f318..b74db57 100644
--- a/src/Dyna/ParserHS/Selftest.hs
+++ b/src/Dyna/ParserHS/Selftest.hs
@@ -168,10 +168,13 @@ proglines = unsafeParse (dlines <* eof)
 case_ruleFact :: Assertion
 case_ruleFact = e @=? (progline sr)
  where
-  e  = LRule (Fact (TFunctor "goal" [] :~ Span (Columns 0 0) (Columns 4 4) "goal.")
-                     :~ ts)
+  e  = LRule (Rule (TFunctor "goal" [] :~ Span (Columns 0 0) (Columns 4 4) sr)
+                   ":-"
+                   []
+                   (TFunctor "true" [] :~ Span (Columns 0 0) (Columns 4 4) sr)
+                   :~ ts)
          :~ ts
-  ts = Span (Columns 0 0) (Columns 5 5) "goal."
+  ts = Span (Columns 0 0) (Columns 5 5) sr
   sr = "goal."
 
 case_ruleSimple :: Assertion
diff --git a/src/Dyna/Term/TTerm.hs b/src/Dyna/Term/TTerm.hs
index 07350f0..b578425 100644
--- a/src/Dyna/Term/TTerm.hs
+++ b/src/Dyna/Term/TTerm.hs
@@ -41,6 +41,7 @@ data Annotation t = AnnType t
 data TermF a t = TFunctor !a ![t]
                | TAnnot   !(Annotation t) !t
                | TNumeric !(Either Integer Double)
+               | TString  !B.ByteString
  deriving (Eq,F.Foldable,Functor,Ord,Show,T.Traversable)
 
 type DFunct = B.ByteString
-- 
2.50.1