From af72a3a1cebf5fb0fd8d3732c0782a7c87c5b718 Mon Sep 17 00:00:00 2001 From: Nathaniel Wesley Filardo Date: Fri, 5 Oct 2012 20:40:11 -0400 Subject: [PATCH] Make Dyna parser understand type annotations --- dyna.cabal | 6 +- src/Dyna/ParserHS/Parser.hs | 148 ++++++++++++++++++++-------- src/Dyna/ParserHS/ParserSelftest.hs | 70 ++++++++++--- 3 files changed, 170 insertions(+), 54 deletions(-) diff --git a/dyna.cabal b/dyna.cabal index de97133..aca5f49 100644 --- a/dyna.cabal +++ b/dyna.cabal @@ -31,6 +31,7 @@ Library Build-Depends: base >=4, bytestring >=0.9, + charset >=0.3, containers >=0.4, mtl >=2.1, parsers >=0.3, @@ -50,6 +51,7 @@ Executable drepl Hs-Source-Dirs: src Build-Depends: base >=4, bytestring >=0.9, + charset >=0.3, containers >=0.4, haskeline >=0.6, mtl >=2.1, @@ -58,7 +60,8 @@ Executable drepl semigroups >=0.8, trifecta >=0.90, unordered-containers>=0.2, - utf8-string >=0.3 + utf8-string >=0.3, + wl-pprint-extras >=3.0 Main-Is: Dyna/REPL.hs @@ -72,6 +75,7 @@ Test-suite dyna-selftests Build-Depends: base >=4, bytestring >=0.9, + charset >=0.3, containers >=0.4, HUnit >=1.2, mtl >=2.1, diff --git a/src/Dyna/ParserHS/Parser.hs b/src/Dyna/ParserHS/Parser.hs index 9566a49..e600be1 100644 --- a/src/Dyna/ParserHS/Parser.hs +++ b/src/Dyna/ParserHS/Parser.hs @@ -1,23 +1,25 @@ -{-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE NoMonomorphismRestriction #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE Rank2Types #-} - +--------------------------------------------------------------------------- +-- | A parser for some chunk of the Dyna language, using Trifecta +-- -- Based in part on -- https://github.com/ekmett/trifecta/blob/master/examples/RFC2616.hs -- as well as the trifecta code itself -- --- XXX no longer handles comments due to trifecta code upgrade --- -- TODO: -- We might want to use T.T.Literate, too, in the end. -- Doesn't understand dynabase literals ("{ ... }") -- Doesn't handle parenthesized aggregators -- Doesn't handle shared subgoals ("whenever ... { ... }") --- Doesn't understand "foo." style rules. + +-- Header material {{{ + +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE NoMonomorphismRestriction #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE Rank2Types #-} module Dyna.ParserHS.Parser ( - Term(..), dterm, dtexpr, + Term(..), Annotation(..), dterm, dtexpr, Rule(..), drule, Line(..), dline, dlines ) where @@ -27,6 +29,7 @@ import Control.Monad.Trans (MonadTrans,lift) import qualified Data.ByteString.UTF8 as BU import qualified Data.ByteString as B import Data.Char (isSpace) +import qualified Data.CharSet as CS import qualified Data.HashSet as H import Data.Semigroup ((<>)) import Data.Monoid (mempty) @@ -37,16 +40,64 @@ import Text.Trifecta import Dyna.XXX.Trifecta (identNL) +------------------------------------------------------------------------}}} +-- Parsed output definition {{{ + +data Annotation = AnnType !B.ByteString + deriving (Eq,Ord,Show) + data Term = TFunctor {-# UNPACK #-} !B.ByteString ![Spanned Term] + | TAnnot {-# UNPACK #-} !Annotation !(Spanned Term) | TVar {-# UNPACK #-} !B.ByteString -- | TDBLit XXX deriving (Eq,Ord,Show) +-- | Rules are not just terms because we want to make it very syntactically +-- explicit about the head being a term (though that's not an expressivity +-- concern -- just use the parenthesized texpr case) so that there is no +-- risk of parsing ambiguity. +-- +-- XXX The span on Fact is a little silly +data Rule = Fact (Spanned Term) + | Rule !(Spanned Term) !B.ByteString ![Spanned Term] !(Spanned Term) + deriving (Eq,Ord,Show) + +-- XXX The span on LRule is a little silly +-- XXX Having one kind of Pragma is probably wrong +data Line = LRule (Spanned Rule) + | LPragma !(Spanned Term) + deriving (Eq,Ord,Show) + + +------------------------------------------------------------------------}}} +-- Utilities {{{ + +bsf :: Functor f => f String -> f B.ByteString +bsf = fmap BU.fromString + +------------------------------------------------------------------------}}} +-- Identifier Syles {{{ + +usualpunct :: CS.CharSet +usualpunct = CS.fromList "!#$%&*+/<=>?@\\^|-~:." + dynaDotOperStyle :: TokenParsing m => IdentifierStyle m dynaDotOperStyle = IdentifierStyle { styleName = "Dot Operator" , styleStart = char '.' - , styleLetter = oneOf "!#$%&*+/<=>?@\\^|-~:." + , styleLetter = oneOfSet $ usualpunct + , styleReserved = mempty + , styleHighlight = Operator + , styleReservedHighlight = ReservedOperator + } + + -- | Colon is not a permitted beginning to a prefix + -- operator, as it is a sigil for type annotations. +dynaPfxOperStyle :: TokenParsing m => IdentifierStyle m +dynaPfxOperStyle = IdentifierStyle + { styleName = "Prefix Operator" + , styleStart = oneOfSet $ usualpunct CS.\\ CS.fromList ".:" + , styleLetter = oneOfSet $ usualpunct , styleReserved = mempty , styleHighlight = Operator , styleReservedHighlight = ReservedOperator @@ -54,14 +105,24 @@ dynaDotOperStyle = IdentifierStyle dynaOperStyle :: TokenParsing m => IdentifierStyle m dynaOperStyle = IdentifierStyle - { styleName = "Operator" - , styleStart = oneOf "!#$%&*+/<=>?@\\^|-~:" - , styleLetter = oneOf "!#$%&*+/<=>?@\\^|-~:." + { styleName = "Infix Operator" + , styleStart = oneOfSet $ CS.delete '.' usualpunct + , styleLetter = oneOfSet $ usualpunct , styleReserved = mempty , styleHighlight = Operator , styleReservedHighlight = ReservedOperator } +dynaTypeStyle :: TokenParsing m => IdentifierStyle m +dynaTypeStyle = IdentifierStyle + { styleName = "Type Annotation" + , styleStart = char ':' + , styleLetter = (alphaNum <|> oneOf "_'") + , styleReserved = mempty + , styleHighlight = Operator + , styleReservedHighlight = ReservedOperator +} + dynaAtomStyle :: TokenParsing m => IdentifierStyle m dynaAtomStyle = IdentifierStyle { styleName = "Atom" @@ -82,6 +143,10 @@ dynaVarStyle = IdentifierStyle , styleReservedHighlight = ReservedIdentifier } + +------------------------------------------------------------------------}}} +-- Comment handling {{{ + dynaCommentStyle :: CommentStyle dynaCommentStyle = CommentStyle { commentStart = "{%" -- XXX? @@ -107,36 +172,38 @@ instance DeltaParsing m => DeltaParsing (DynaLanguage m) where slicedWith f (DL m) = DL $ slicedWith f m rend = lift rend restOfLine = lift restOfLine - -bsf = fmap BU.fromString +------------------------------------------------------------------------}}} +-- Atoms {{{ atom :: (Monad m, TokenParsing m) => m B.ByteString atom = liftA BU.fromString stringLiteral <|> (bsf $ ident dynaAtomStyle) --- sparen :: MonadParser m => m a -> m a --- sparen = between (char '(' *> spaces) (spaces <* char ')') +------------------------------------------------------------------------}}} +-- Terms and term expressions {{{ term :: DeltaParsing m => m (Spanned Term) term = token $ choice [ parens texpr - , spanned $ TVar <$> (bsf$ident dynaVarStyle) + , spanned $ TVar <$> (bsf $ ident dynaVarStyle) , try $ spanned $ flip TFunctor [] <$> atom <* (notFollowedBy $ char '(') + , try $ spanned $ mkta <$> (bsf $ ident dynaTypeStyle) <* spaces <*> term , spanned $ parenfunc - ] + ] where parenfunc = TFunctor <$> (highlight Identifier atom "Functor") <*> parens (texpr `sepBy` symbolic ',') + mkta ty te = TAnnot (AnnType ty) te -- XXX right now all binops are at equal precedence and left-associative; that's wrong. texpr :: DeltaParsing m => m (Spanned Term) texpr = buildExpressionParser etable term "Expression" where etable = [ [ Prefix $ uf (spanned $ bsf $ symbol "new") ] - , [ Prefix $ uf (spanned $ bsf $ ident dynaOperStyle) ] + , [ Prefix $ uf (spanned $ bsf $ ident dynaPfxOperStyle) ] , [ Infix (bf (spanned $ bsf $ ident dynaOperStyle)) AssocLeft ] - , [ Infix (bf (spanned $ bsf $ dotOper)) AssocRight ] + , [ Infix (bf (spanned $ bsf $ dotOper)) AssocRight ] , [ Infix (bf (spanned $ bsf $ symbol "is")) AssocNone ] ] @@ -153,48 +220,46 @@ texpr = buildExpressionParser etable term "Expression" (x:~spx) <- f pure (\a@(_:~spa) b@(_:~spb) -> (TFunctor x [a,b]):~(spa <> spx <> spb)) -hriss = highlight ReservedOperator . spanned . symbol dterm, dtexpr :: DeltaParsing m => m (Spanned Term) dterm = unDL term dtexpr = unDL texpr --- | Rules are not just terms because we want to make it very syntactically --- explicit about the head being a term (though that's not an expressivity --- concern -- just use the parenthesized texpr case) so that there is no --- risk of parsing ambiguity. --- --- XXX The span on Fact is a little silly -data Rule = Fact (Spanned Term) - | Rule !(Spanned Term) !B.ByteString ![Spanned Term] !(Spanned Term) - deriving (Eq,Ord,Show) - --- XXX The span on LRule is a little silly --- XXX Having one kind of Pragma is probably wrong -data Line = LRule (Spanned Rule) - | LPragma !(Spanned Term) - deriving (Eq,Ord,Show) +------------------------------------------------------------------------}}} +-- Rules {{{ +-- | Grab the head (term!) and aggregation operator from a line that +-- we hope is a rule. +rulepfx :: DeltaParsing f => f ([Spanned Term] -> Spanned Term -> Rule) rulepfx = Rule <$> term <* spaces - <*> (bsf$ident dynaOperStyle "Aggregator") + <*> (bsf $ ident dynaOperStyle "Aggregator") rule :: DeltaParsing m => m Rule -rule = choice [(try (liftA flip rulepfx +rule = choice [ + -- HEAD OP= RESULT whenever EXPRS . + (try (liftA flip rulepfx <*> texpr - <* hriss "whenever")) + <* hrss "whenever")) <*> (texpr `sepBy1` symbolic ',') + -- HEAD OP= EXPRS, RESULT . , (try rulepfx) <*> many (try (texpr <* symbolic ',')) <*> texpr + -- HEAD . , Fact <$> term ] + where + hrss = highlight ReservedOperator . spanned . symbol drule :: DeltaParsing m => m (Spanned Rule) drule = spanned rule +------------------------------------------------------------------------}}} +-- Lines {{{ + progline :: DeltaParsing m => m (Spanned Line) progline = spanned $ choice [ LRule <$> drule , LPragma <$> (symbol ":-" @@ -203,8 +268,9 @@ progline = spanned $ choice [ LRule <$> drule ] dline :: DeltaParsing m => m (Spanned Line) --- dline = unDL (progline <* optional (char '.' <* (spaces <|> eof))) dline = unDL (progline <* optional (char '.') <* optional newline) dlines :: DeltaParsing m => m [Spanned Line] dlines = unDL (progline `sepEndBy` (char '.' <* spaces)) + +------------------------------------------------------------------------}}} diff --git a/src/Dyna/ParserHS/ParserSelftest.hs b/src/Dyna/ParserHS/ParserSelftest.hs index bf242ae..33e09fd 100644 --- a/src/Dyna/ParserHS/ParserSelftest.hs +++ b/src/Dyna/ParserHS/ParserSelftest.hs @@ -1,7 +1,6 @@ -{-# LANGUAGE TemplateHaskell #-} -{-# LANGUAGE Rank2Types #-} -{-# LANGUAGE OverloadedStrings #-} - +--------------------------------------------------------------------------- +-- | Parser self-test cases +-- -- TODO: -- Writing these is still too hard, Template Haskell and the REPL -- notwithstanding. @@ -9,13 +8,19 @@ -- Test.Framework.TH appears not to understand comments at the -- moment, and parses right through them. +-- Header material {{{ + +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE Rank2Types #-} +{-# LANGUAGE OverloadedStrings #-} + module Dyna.ParserHS.ParserSelftest where -import Control.Applicative ((<*)) +-- import Control.Applicative ((<*)) import Data.ByteString (ByteString) -import Data.Foldable (toList) -import Data.Monoid (mempty) -import qualified Data.Sequence as S +-- import Data.Foldable (toList) +-- import Data.Monoid (mempty) +-- import qualified Data.Sequence as S import Data.String import qualified Test.Framework as TF import Test.Framework.Providers.HUnit @@ -27,16 +32,21 @@ import Text.Trifecta.Delta import Dyna.ParserHS.Parser import Dyna.XXX.TrifectaTest +------------------------------------------------------------------------}}} +-- Terms and basic handling {{{ term :: ByteString -> Spanned Term term = unsafeParse dterm +case_basicAtom :: Assertion case_basicAtom = e @=? (term "foo") where e = TFunctor "foo" [] :~ Span (Columns 0 0) (Columns 3 3) "foo" +case_basicAtomTWS :: Assertion case_basicAtomTWS = e @=? (term "foo ") where e = TFunctor "foo" [] :~ Span (Columns 0 0) (Columns 4 4) "foo " +case_basicFunctor :: Assertion case_basicFunctor = e @=? (term sfb) where e = TFunctor "foo" @@ -47,6 +57,7 @@ case_basicFunctor = e @=? (term sfb) sfb :: (IsString s) => s sfb = "foo(bar)" +case_nestedFunctorsWithArgs :: Assertion case_nestedFunctorsWithArgs = e @=? (term st) where e = TFunctor "foo" @@ -64,6 +75,7 @@ case_nestedFunctorsWithArgs = e @=? (term st) st :: (IsString s) => s st = "foo(bar,X,bif(),baz(quux,Y))" +case_basicFunctorComment :: Assertion case_basicFunctorComment = e @=? (term sfb) where e = TFunctor "foo" [] :~ Span (Columns 0 0) (Columns 8 8) sfb @@ -71,6 +83,7 @@ case_basicFunctorComment = e @=? (term sfb) sfb :: (IsString s) => s sfb = "foo %xxx" +case_basicFunctorNLComment :: Assertion case_basicFunctorNLComment = e @=? (term sfb) where e = TFunctor "foo" @@ -82,7 +95,7 @@ case_basicFunctorNLComment = e @=? (term sfb) sfb :: (IsString s) => s sfb = "foo(%xxx\n1,2\n)" - +case_basicFunctorTWS :: Assertion case_basicFunctorTWS = e @=? (term sfb) where e = TFunctor "foo" @@ -92,6 +105,7 @@ case_basicFunctorTWS = e @=? (term sfb) sfb :: (IsString s) => s sfb = "foo\n(bar )" +case_basicFunctorNL :: Assertion case_basicFunctorNL = e @=? (term sfb) where e = TFunctor "foo" @@ -101,6 +115,7 @@ case_basicFunctorNL = e @=? (term sfb) sfb :: (IsString s) => s sfb = "foo\n(bar )" +case_colonFunctor :: Assertion case_colonFunctor = e @=? (term pvv) where e = TFunctor "possible" @@ -110,18 +125,37 @@ case_colonFunctor = e @=? (term pvv) ] :~ Span (Columns 9 9) (Columns 16 16) pvv ] - :~ Span (Columns 0 0) (Columns 17 17) "possible(Var:Val)" + :~ Span (Columns 0 0) (Columns 17 17) pvv pvv = "possible(Var:Val)" +case_failIncompleteExpr :: Assertion case_failIncompleteExpr = checkParseFail dterm "foo +" "(interactive):1:5: error: expected: \"(\",\n end of input\nfoo + " +-- Annotations {{{ + +case_tyAnnot :: Assertion +case_tyAnnot = e @=? (term fintx) + where + e = TFunctor "f" [TAnnot (AnnType ":int") + (TVar "X" :~ Span (Columns 7 7) (Columns 8 8) fintx) + :~ Span (Columns 2 2) (Columns 8 8) fintx + ] + :~ Span (Columns 0 0) (Columns 9 9) fintx + fintx = "f(:int X)" + +------------------------------------------------------------------------}}} + +------------------------------------------------------------------------}}} +-- Rules and lines {{{ + progline :: ByteString -> Spanned Line progline = unsafeParse dline proglines :: ByteString -> [Spanned Line] proglines = unsafeParse dlines +case_ruleSimple :: Assertion case_ruleSimple = e @=? (progline sr) where e = LRule (Rule (TFunctor "goal" [] :~ Span (Columns 0 0) (Columns 5 5) sr) @@ -132,6 +166,7 @@ case_ruleSimple = e @=? (progline sr) :~ Span (Columns 0 0) (Columns 10 10) sr sr = "goal += 1 ." +case_ruleExpr :: Assertion case_ruleExpr = e @=? (progline sr) where e = LRule (Rule (TFunctor "goal" [] :~ Span (Columns 0 0) (Columns 5 5) sr) @@ -147,6 +182,7 @@ case_ruleExpr = e @=? (progline sr) :~ Span (Columns 0 0) (Columns 18 18) sr sr = "goal += foo + bar ." +case_ruleDotExpr :: Assertion case_ruleDotExpr = e @=? (progline sr) where e = LRule (Rule (TFunctor "goal" [] :~ Span (Columns 0 0) (Columns 5 5) sr) @@ -162,6 +198,7 @@ case_ruleDotExpr = e @=? (progline sr) :~ Span (Columns 0 0) (Columns 15 15) sr sr = "goal += foo.bar." +case_ruleComma :: Assertion case_ruleComma = e @=? (progline sr) where e = LRule (Rule (TFunctor "foo" [] :~ Span (Columns 0 0) (Columns 4 4) sr) @@ -178,6 +215,7 @@ case_ruleComma = e @=? (progline sr) :~ Span (Columns 0 0) (Columns 24 24) sr sr = "foo += bar(X), baz(X), X." +case_ruleKeywordsComma :: Assertion case_ruleKeywordsComma = e @=? (progline sr) where e = LRule (Rule (TFunctor "foo" [] :~ Span (Columns 0 0) (Columns 4 4) sr) @@ -202,7 +240,7 @@ case_ruleKeywordsComma = e @=? (progline sr) :~ Span (Columns 0 0) (Columns 41 41) sr sr = "foo = new X whenever X is baz(Y), Y is 3 ." --- XXX It takes a while to parse this one. Why? +case_rules :: Assertion case_rules = e @=? (proglines sr) where e = [ LRule (Rule (TFunctor "goal" [] :~ Span (Columns 0 0) (Columns 5 5) sr) @@ -220,7 +258,7 @@ case_rules = e @=? (proglines sr) ] sr = "goal += 1. goal += 2." --- XXX It takes a while to parse this one. Why? +case_rulesDotExpr :: Assertion case_rulesDotExpr = e @=? (proglines sr) where e = [ LRule (Rule (TFunctor "goal" [] :~ Span (Columns 0 0) (Columns 5 5) sr) @@ -243,12 +281,18 @@ case_rulesDotExpr = e @=? (proglines sr) ] sr = "goal += foo.bar. goal += 1." +------------------------------------------------------------------------}}} +-- Harness toplevel {{{ + selftest :: TF.Test selftest = $(testGroupGenerator) main :: IO () main = $(defaultMainGenerator) +------------------------------------------------------------------------}}} +-- Experimental debris (XXX) {{{ + {- runParser :: (Show a) => (forall r . Language (Parser r String) a) -> B.ByteString -> Result TermDoc a runParser p = parseByteString (dynafy p <* eof) M.mempty @@ -263,3 +307,5 @@ cs r e = case r of Success w s | S.null w -> assertEqual "XXX" e s _ -> assertBool "XXX" False -} + +------------------------------------------------------------------------}}} -- 2.50.1