From 4f1ccc02e57f5029ca8e6ec969e789b1cb61bd7a Mon Sep 17 00:00:00 2001 From: Nathaniel Wesley Filardo Date: Tue, 4 Dec 2012 15:51:25 -0500 Subject: [PATCH] Pretty up the frontend code a bit --- src/Dyna/Analysis/NormalizeParse.hs | 162 ++++++++++++-------- src/Dyna/Analysis/NormalizeParseSelftest.hs | 7 +- src/Dyna/ParserHS/Parser.hs | 83 +++++----- src/Dyna/ParserHS/Selftest.hs | 9 +- src/Dyna/Term/TTerm.hs | 1 + 5 files changed, 152 insertions(+), 110 deletions(-) diff --git a/src/Dyna/Analysis/NormalizeParse.hs b/src/Dyna/Analysis/NormalizeParse.hs index 57441cd..59a266b 100644 --- a/src/Dyna/Analysis/NormalizeParse.hs +++ b/src/Dyna/Analysis/NormalizeParse.hs @@ -38,13 +38,40 @@ -- @is(X,Y) :- X = *Y.@. Is that something we should be normalizing out -- here or should be waiting for some further unfolding optimization phase? +-- FIXME: "str" is the same a constant str. + +-- TODO: ANF Normalizer should return *flat terms* so that we have type-safety +-- can a lint checker can verify we have exhaustive pattern matching... etc. + +-- timv: should there ever be more than one side condition? shouldn't it be +-- a single result variable after normalization? I see that if I use comma +-- to combine my conditions I get mutliple variables but should side +-- condtions be combined with comma? I was under the impression that we +-- always want strong Boolean values (i.e. none of that three-values null +-- stuff). +-- +-- it might be nice if terms came in with a type that verified that they are +-- "flat term" -- they've been normalized. +-- +-- It would also be nice if spans were killed... maybe there is an argument +-- against this. +-- +-- ANF Rule, `result` always the name of a variable -- it would be nice for +-- its type were string in that case. Similarly, side conditions are always +-- variables. +-- +-- TODO: there might too much special handling of the comma operator... + + -- Header material {{{ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE NoMonomorphismRestriction #-} {-# LANGUAGE OverloadedStrings #-} -module Dyna.Analysis.NormalizeParse where +module Dyna.Analysis.NormalizeParse ( + ANFState, normTerm, normRule, runNormalize, printANF +) where import Control.Monad.Reader import Control.Monad.State @@ -52,7 +79,6 @@ import Control.Unification import qualified Data.ByteString.UTF8 as BU import qualified Data.ByteString as B import qualified Data.Map as M -import qualified Data.Set as S import Text.PrettyPrint.Free import qualified Text.Trifecta as T @@ -62,6 +88,9 @@ import Dyna.Term.TTerm import qualified Data.Char as C +------------------------------------------------------------------------}}} +-- Preliminaries {{{ + data SelfDispos = SDInherit | SDEval | SDQuote @@ -88,6 +117,7 @@ data ANFDict = AD , ad_self_dispos :: (DFunct,Int) -> SelfDispos } +mergeDispositions :: SelfDispos -> (ECSrc, ArgDispos) -> ArgDispos mergeDispositions = md where md SDInherit (_,d) = d @@ -136,9 +166,46 @@ newWarn msg loc = modify (\s -> s { as_warns = (msg,loc):(as_warns s) }) unspan :: T.Spanned P.Term -> DTerm unspan (P.TVar v T.:~ _) = UVar v unspan (P.TNumeric v T.:~ _) = UTerm $ TNumeric v +unspan (P.TString v T.:~ _) = UTerm $ TString v unspan (P.TFunctor a as T.:~ _) = UTerm $ TFunctor a $ map unspan as unspan (P.TAnnot a t T.:~ _) = UTerm $ TAnnot (fmap unspan a) (unspan t) +------------------------------------------------------------------------}}} +-- Disposition computations {{{ + +-- XXX These should be read from declarations +dynaFunctorArgDispositions :: (DFunct, Int) -> [ArgDispos] +dynaFunctorArgDispositions x = case x of + ("is", 2) -> [ADQuote,ADEval] + -- evaluate arithmetic / math + ("exp", 1) -> [ADEval] + ("log", 1) -> [ADEval] + -- logic + ("and", 2) -> [ADEval, ADEval] + ("or", 2) -> [ADEval, ADEval] + ("not", 1) -> [ADEval] + (name, arity) -> + -- If it starts with a nonalpha, it prefers to evaluate arguments + let d = if C.isAlphaNum $ head $ BU.toString name + then ADQuote + else ADEval + in take arity $ repeat $ d + +-- XXX These should be read from declarations +dynaFunctorSelfDispositions :: (DFunct,Int) -> SelfDispos +dynaFunctorSelfDispositions x = case x of + ("true",0) -> SDQuote + ("false",0) -> SDQuote + ("pair",2) -> SDQuote + (name, _) -> + -- If it starts with a nonalpha, it prefers to evaluate + let d = if C.isAlphaNum $ head $ BU.toString name + then SDInherit + else SDEval + in d + +------------------------------------------------------------------------}}} +-- Normalize a Term {{{ -- | Convert a syntactic term into ANF; while here, move to a -- Control.Unification term representation. @@ -163,7 +230,7 @@ normTerm_ :: (MonadState ANFState m, MonadReader ANFDict m) -- -- While here, replace bare underscores with unique names. -- XXX is this the right place for that? -normTerm_ c _ t@(P.TVar v) = do +normTerm_ c _ (P.TVar v) = do v' <- if v == "_" then nextVar "_$w" else return v case c of (ECExplicit,ADEval) -> newEval "_$v" @@ -177,6 +244,13 @@ normTerm_ c ss (P.TNumeric n) = do _ -> return () return $ UTerm $ TNumeric n +-- Strings too +normTerm_ c ss (P.TString s) = do + case c of + (ECExplicit,ADEval) -> newWarn "Ignoring request to evaluate string" ss + _ -> return () + return $ UTerm $ TString s + -- Quote makes the context explicitly a quoting one normTerm_ _ ss (P.TFunctor "&" [t T.:~ st]) = do normTerm_ (ECExplicit,ADQuote) (st:ss) t @@ -221,52 +295,22 @@ normTerm :: (MonadState ANFState m, MonadReader ANFDict m) normTerm c (t T.:~ s) = normTerm_ (ECFunctor,if c then ADEval else ADQuote) [s] t +------------------------------------------------------------------------}}} +-- Normalize a Rule {{{ + -- XXX normRule :: (MonadState ANFState m, MonadReader ANFDict m) => T.Spanned P.Rule -- ^ Term to digest -> m DRule -normRule (P.Fact t T.:~ _) = do - nt <- normTerm False t - return $ Rule nt ":-" [] (UTerm $ TFunctor "true" []) normRule (P.Rule h a es r T.:~ _) = do nh <- normTerm False h nr <- normTerm True r >>= newUnif "_$r" nes <- mapM (normTerm True) es return $ Rule nh a nes nr --- XXX -dynaFunctorArgDispositions :: (DFunct, Int) -> [ArgDispos] -dynaFunctorArgDispositions x = case x of - ("is", 2) -> [ADQuote,ADEval] - -- evaluate arithmetic / math - ("exp", 1) -> [ADEval] - ("log", 1) -> [ADEval] - -- logic - ("and", 2) -> [ADEval, ADEval] - ("or", 2) -> [ADEval, ADEval] - ("not", 1) -> [ADEval] - (name, arity) -> - -- If it starts with a nonalpha, it prefers to evaluate arguments - let d = if C.isAlphaNum $ head $ BU.toString name - then ADQuote - else ADEval - in take arity $ repeat $ d - --- XXX --- --- Functors which prefer not to be evaluated -dynaFunctorSelfDispositions :: (DFunct,Int) -> SelfDispos -dynaFunctorSelfDispositions x = case x of - ("true",0) -> SDQuote - ("false",0) -> SDQuote - ("pair",2) -> SDQuote - (name, arity) -> - -- If it starts with a nonalpha, it prefers to evaluate - let d = if C.isAlphaNum $ head $ BU.toString name - then SDInherit - else SDEval - in d +------------------------------------------------------------------------}}} +-- Run the normalizer {{{ -- | Run the normalization routine. -- @@ -276,34 +320,11 @@ runNormalize = flip runState (AS 0 M.empty M.empty M.empty []) . flip runReaderT (AD dynaFunctorArgDispositions dynaFunctorSelfDispositions) --- FIXME: "str" is the same a constant str. - --- TODO: ANF Normalizer should return *flat terms* so that we have type-safety --- can a lint checker can verify we have exhaustive pattern matching... etc. +------------------------------------------------------------------------}}} +-- Pretty Printer {{{ --- timv: should there ever be more than one side condition? shouldn't it be --- a single result variable after normalization? I see that if I use comma --- to combine my conditions I get mutliple variables but should side --- condtions be combined with comma? I was under the impression that we --- always want strong Boolean values (i.e. none of that three-values null --- stuff). --- --- it might be nice if terms came in with a type that verified that they are --- "flat term" -- they've been normalized. --- --- It would also be nice if spans were killed... maybe there is an argument --- against this. --- --- ANF Rule, `result` always the name of a variable -- it would be nice for --- its type were string in that case. Similarly, side conditions are always --- variables. --- --- TODO: there might too much special handling of the comma operator... --- - -valign = align.vcat - -pp ((Rule h a e result), AS {as_evals = evals, as_unifs = unifs}) = +printANF :: (DRule, ANFState) -> Doc e +printANF ((Rule h a e result), AS {as_evals = evals, as_unifs = unifs}) = parens $ (pretty a) <+> valign [ (p h) , parens $ text "side" <+> (valign $ map (text.show) e) @@ -312,9 +333,14 @@ pp ((Rule h a e result), AS {as_evals = evals, as_unifs = unifs}) = , parens $ text "result" <+> (p result) ] where - p (UTerm (TFunctor fn args)) = parens $ hcat $ punctuate (text " ") $ (pretty fn : (map p args)) - p (UTerm (TNumeric (Left x))) = text $ show x + valign = align.vcat + + p (UTerm (TFunctor fn args)) = parens $ hcat $ punctuate (text " ") + $ (pretty fn : (map p args)) + p (UTerm (TNumeric (Left x))) = text $ show x p (UTerm (TNumeric (Right x))) = text $ show x + p (UTerm (TString s)) = text $ show s + p (UTerm (TAnnot _ t)) = p t -- XXX p (UVar x) = pretty x - q x = valign $ map (\(x,y)-> parens $ pretty x <+> p y) $ M.toList x + q x = valign $ map (\(y,z)-> parens $ pretty y <+> p z) $ M.toList x diff --git a/src/Dyna/Analysis/NormalizeParseSelftest.hs b/src/Dyna/Analysis/NormalizeParseSelftest.hs index 36350f2..6d05ee8 100644 --- a/src/Dyna/Analysis/NormalizeParseSelftest.hs +++ b/src/Dyna/Analysis/NormalizeParseSelftest.hs @@ -33,9 +33,8 @@ testNormRule :: B.ByteString -> (DRule, ANFState) testNormRule = runNormalize . normRule . unsafeParse P.drule --- XXX fix periods, parser thinks it's an infix op and fails. e1 = testNormRule "f(X)." -e2 = testNormRule "f(X) := 1." -- does not work +e2 = testNormRule "f(X) := 1." t1 = testNormRule "f(X) max= g(X) + h(X,X)" t2 = testNormRule "f(X, g(I)) += (g(I, h(X)) + 10)^2" @@ -46,13 +45,13 @@ t4 = unsafeParse P.dlines e4 -- hideous monster rule e3 = "f(X,Y) += (g(X,\"str\",d) - h(X,X,Y) - c)^2 + f(Y,Z)/exp(3.0) whenever ?c, (d < 10), e(f(h(X)), g(X))" t3 = testNormRule e3 -p3 = pp $ t3 +p3 = printANF $ t3 normalizeFile file = do contents <- B.readFile file writeFile (file ++ ".anf") - (show $ vcat (map (\(P.LRule x T.:~ _) -> pp $ runNormalize $ normRule x) + (show $ vcat (map (\(P.LRule x T.:~ _) -> printANF $ runNormalize $ normRule x) (unsafeParse P.dlines contents)) <> text "\n") -- add newline at end of file... return () diff --git a/src/Dyna/ParserHS/Parser.hs b/src/Dyna/ParserHS/Parser.hs index cc448ce..24e32ee 100644 --- a/src/Dyna/ParserHS/Parser.hs +++ b/src/Dyna/ParserHS/Parser.hs @@ -15,8 +15,11 @@ -- -- * Doesn't handle shared subgoals ("whenever ... { ... }") -- --- * Don't end numerics with ., even if it's the end-of-rule marker; --- put a space first. +-- * Doesn't understand nullary star for gensym correctly +-- (it's a available in term context but not texpr context; +-- this depends on an upstream fix in Text.Parser.Expression. +-- But: I am not worried about it since we don't handle gensyms +-- anywhere else in the pipeline yet) -- Header material {{{ @@ -65,10 +68,7 @@ data Term = TFunctor !B.ByteString -- explicit about the head being a term (though that's not an expressivity -- concern -- just use the parenthesized texpr case) so that there is no -- risk of parsing ambiguity. --- --- XXX The span on Fact is a little silly -data Rule = Fact (Spanned Term) - | Rule !(Spanned Term) !B.ByteString ![Spanned Term] !(Spanned Term) +data Rule = Rule !(Spanned Term) !B.ByteString ![Spanned Term] !(Spanned Term) deriving (Eq,Show) -- XXX The span on LRule is a little silly @@ -193,6 +193,9 @@ atom = liftA BU.fromString stringLiteralSQ ------------------------------------------------------------------------}}} -- Terms and term expressions {{{ +nullaryStar :: DeltaParsing m => m (Spanned Term) +nullaryStar = spanned $ flip TFunctor [] <$> (bsf $ string "*") + term :: DeltaParsing m => m (Spanned Term) term = token $ choice [ parens texpr @@ -207,7 +210,7 @@ term = token $ choice , try $ spanned $ flip TFunctor [] <$> atom <* (notFollowedBy $ char '(') - , try $ spanned $ flip TFunctor [] <$> (bsf $ string "*") + , try $ nullaryStar , spanned $ parenfunc ] where @@ -218,33 +221,7 @@ term = token $ choice mkta ty te = TAnnot (AnnType ty) te --- XXX I remember now why we didn't handle ',' as an operator: if it were, --- we'd have no way of distinguishing between @f(a,b)@ as --- --- > TFunctor "f" [TFunctor "a" [] :~ _, TFunctor "b" [] :~ _] --- --- and --- --- > TFunctor "f" [TFunctor "," [TFunctor "a" [] :~ _, TFunctor "b" [] :~ _] :~ _] --- --- We can fix this, but it means that we should have a separate expression --- parser for contexts where "comma means argument separation" and "comma --- means evaluation separator". I don't yet know how I feel about --- the "whenever" (and "is"?) operator(s) being available in the former table. - --- XXX right now all binops are at equal precedence and left-associative; --- that's wrong. -texpr :: DeltaParsing m => m (Spanned Term) -texpr = buildExpressionParser etable term "Expression" - where - etable = [ [ Prefix $ uf (spanned $ bsf $ symbol "new") ] - , [ Prefix $ uf (spanned $ bsf $ ident dynaPfxOperStyle) ] - , [ Infix (bf (spanned $ bsf $ ident dynaOperStyle)) AssocLeft ] - , [ Infix (bf (spanned $ bsf $ dotOper)) AssocRight ] - , [ Infix (bf (spanned $ bsf $ symbol "is")) AssocNone ] - ] - --- The dot operator is required to have not-a-space following (to avoid +-- | The dot operator is required to have not-a-space following (to avoid -- confusion with the end-of-rule marker, which is taken to be "dot space" -- or "dot eof"). -- @@ -252,6 +229,8 @@ texpr = buildExpressionParser etable term "Expression" dotAny :: CharParsing m => m Char dotAny = char '.' <* satisfy (not . isSpace) +-- | A "dot operator" is a dot followed immediately by something that looks +-- like a typical operator. dotOper :: (Monad m, TokenParsing m) => m [Char] dotOper = try (lookAhead dotAny *> identNL dynaDotOperStyle) @@ -269,6 +248,40 @@ bf f = do (x:~spx) <- f pure (\a@(_:~spa) b@(_:~spb) -> (TFunctor x [a,b]):~(spa <> spx <> spb)) +-- | The basic expression table +-- +-- XXX right now all binops are at equal precedence and left-associative; +-- that's wrong. +-- +-- XXX I remember now why we didn't handle ',' as an operator: if it were, +-- we'd have no way of distinguishing between @f(a,b)@ as +-- +-- > TFunctor "f" [TFunctor "a" [] :~ _, TFunctor "b" [] :~ _] +-- +-- and +-- +-- > TFunctor "f" [TFunctor "," [TFunctor "a" [] :~ _, TFunctor "b" [] :~ _] :~ _] +-- +-- We can fix this, but it means that we should have a separate expression +-- parser for contexts where "comma means argument separation" and "comma +-- means evaluation separator". I don't yet know how I feel about +-- the "whenever" (and "is"?) operator(s) being available in the former table. +termETable = [ [ Prefix $ uf (spanned $ bsf $ symbol "new") ] + , [ Prefix $ uf (spanned $ bsf $ ident dynaPfxOperStyle) ] + , [ Infix (bf (spanned $ bsf $ ident dynaOperStyle)) AssocLeft ] + , [ Infix (bf (spanned $ bsf $ dotOper)) AssocRight ] + -- XXX "is" belongs only in the full expression parser, not + -- in the term table + , [ Infix (bf (spanned $ bsf $ symbol "is")) AssocNone ] + ] + +-- fullETable = termETable ++ +-- [ [ Infix (bf (spanned $ bsf $ symbol "is")) AssocNone ] +-- , [ Infix (bf (spanned $ bsf $ symbol ",")) AssocRight ] +-- ] + +texpr :: DeltaParsing m => m (Spanned Term) +texpr = buildExpressionParser termETable term "Expression" dterm, dtexpr :: DeltaParsing m => m (Spanned Term) dterm = unDL term @@ -301,7 +314,7 @@ rule = choice [ <*> texpr) -- HEAD . - , Fact <$> term + , (\h@(_ :~ s) -> Rule h ":-" [] $ (TFunctor "true" [] :~ s)) <$> term ] <* optional (char '.') where diff --git a/src/Dyna/ParserHS/Selftest.hs b/src/Dyna/ParserHS/Selftest.hs index 374f318..b74db57 100644 --- a/src/Dyna/ParserHS/Selftest.hs +++ b/src/Dyna/ParserHS/Selftest.hs @@ -168,10 +168,13 @@ proglines = unsafeParse (dlines <* eof) case_ruleFact :: Assertion case_ruleFact = e @=? (progline sr) where - e = LRule (Fact (TFunctor "goal" [] :~ Span (Columns 0 0) (Columns 4 4) "goal.") - :~ ts) + e = LRule (Rule (TFunctor "goal" [] :~ Span (Columns 0 0) (Columns 4 4) sr) + ":-" + [] + (TFunctor "true" [] :~ Span (Columns 0 0) (Columns 4 4) sr) + :~ ts) :~ ts - ts = Span (Columns 0 0) (Columns 5 5) "goal." + ts = Span (Columns 0 0) (Columns 5 5) sr sr = "goal." case_ruleSimple :: Assertion diff --git a/src/Dyna/Term/TTerm.hs b/src/Dyna/Term/TTerm.hs index 07350f0..b578425 100644 --- a/src/Dyna/Term/TTerm.hs +++ b/src/Dyna/Term/TTerm.hs @@ -41,6 +41,7 @@ data Annotation t = AnnType t data TermF a t = TFunctor !a ![t] | TAnnot !(Annotation t) !t | TNumeric !(Either Integer Double) + | TString !B.ByteString deriving (Eq,F.Foldable,Functor,Ord,Show,T.Traversable) type DFunct = B.ByteString -- 2.50.1