-{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-{-# LANGUAGE NoMonomorphismRestriction #-}
-{-# LANGUAGE OverloadedStrings #-}
-{-# LANGUAGE Rank2Types #-}
-
+---------------------------------------------------------------------------
+-- | A parser for some chunk of the Dyna language, using Trifecta
+--
-- Based in part on
-- https://github.com/ekmett/trifecta/blob/master/examples/RFC2616.hs
-- as well as the trifecta code itself
--
--- XXX no longer handles comments due to trifecta code upgrade
---
-- TODO:
-- We might want to use T.T.Literate, too, in the end.
-- Doesn't understand dynabase literals ("{ ... }")
-- Doesn't handle parenthesized aggregators
-- Doesn't handle shared subgoals ("whenever ... { ... }")
--- Doesn't understand "foo." style rules.
+
+-- Header material {{{
+
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE NoMonomorphismRestriction #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE Rank2Types #-}
module Dyna.ParserHS.Parser (
- Term(..), dterm, dtexpr,
+ Term(..), Annotation(..), dterm, dtexpr,
Rule(..), drule, Line(..), dline, dlines
) where
import qualified Data.ByteString.UTF8 as BU
import qualified Data.ByteString as B
import Data.Char (isSpace)
+import qualified Data.CharSet as CS
import qualified Data.HashSet as H
import Data.Semigroup ((<>))
import Data.Monoid (mempty)
import Dyna.XXX.Trifecta (identNL)
+------------------------------------------------------------------------}}}
+-- Parsed output definition {{{
+
+data Annotation = AnnType !B.ByteString
+ deriving (Eq,Ord,Show)
+
data Term = TFunctor {-# UNPACK #-} !B.ByteString ![Spanned Term]
+ | TAnnot {-# UNPACK #-} !Annotation !(Spanned Term)
| TVar {-# UNPACK #-} !B.ByteString
-- | TDBLit XXX
deriving (Eq,Ord,Show)
+-- | Rules are not just terms because we want to make it very syntactically
+-- explicit about the head being a term (though that's not an expressivity
+-- concern -- just use the parenthesized texpr case) so that there is no
+-- risk of parsing ambiguity.
+--
+-- XXX The span on Fact is a little silly
+data Rule = Fact (Spanned Term)
+ | Rule !(Spanned Term) !B.ByteString ![Spanned Term] !(Spanned Term)
+ deriving (Eq,Ord,Show)
+
+-- XXX The span on LRule is a little silly
+-- XXX Having one kind of Pragma is probably wrong
+data Line = LRule (Spanned Rule)
+ | LPragma !(Spanned Term)
+ deriving (Eq,Ord,Show)
+
+
+------------------------------------------------------------------------}}}
+-- Utilities {{{
+
+bsf :: Functor f => f String -> f B.ByteString
+bsf = fmap BU.fromString
+
+------------------------------------------------------------------------}}}
+-- Identifier Syles {{{
+
+usualpunct :: CS.CharSet
+usualpunct = CS.fromList "!#$%&*+/<=>?@\\^|-~:."
+
dynaDotOperStyle :: TokenParsing m => IdentifierStyle m
dynaDotOperStyle = IdentifierStyle
{ styleName = "Dot Operator"
, styleStart = char '.'
- , styleLetter = oneOf "!#$%&*+/<=>?@\\^|-~:."
+ , styleLetter = oneOfSet $ usualpunct
+ , styleReserved = mempty
+ , styleHighlight = Operator
+ , styleReservedHighlight = ReservedOperator
+ }
+
+ -- | Colon is not a permitted beginning to a prefix
+ -- operator, as it is a sigil for type annotations.
+dynaPfxOperStyle :: TokenParsing m => IdentifierStyle m
+dynaPfxOperStyle = IdentifierStyle
+ { styleName = "Prefix Operator"
+ , styleStart = oneOfSet $ usualpunct CS.\\ CS.fromList ".:"
+ , styleLetter = oneOfSet $ usualpunct
, styleReserved = mempty
, styleHighlight = Operator
, styleReservedHighlight = ReservedOperator
dynaOperStyle :: TokenParsing m => IdentifierStyle m
dynaOperStyle = IdentifierStyle
- { styleName = "Operator"
- , styleStart = oneOf "!#$%&*+/<=>?@\\^|-~:"
- , styleLetter = oneOf "!#$%&*+/<=>?@\\^|-~:."
+ { styleName = "Infix Operator"
+ , styleStart = oneOfSet $ CS.delete '.' usualpunct
+ , styleLetter = oneOfSet $ usualpunct
, styleReserved = mempty
, styleHighlight = Operator
, styleReservedHighlight = ReservedOperator
}
+dynaTypeStyle :: TokenParsing m => IdentifierStyle m
+dynaTypeStyle = IdentifierStyle
+ { styleName = "Type Annotation"
+ , styleStart = char ':'
+ , styleLetter = (alphaNum <|> oneOf "_'")
+ , styleReserved = mempty
+ , styleHighlight = Operator
+ , styleReservedHighlight = ReservedOperator
+}
+
dynaAtomStyle :: TokenParsing m => IdentifierStyle m
dynaAtomStyle = IdentifierStyle
{ styleName = "Atom"
, styleReservedHighlight = ReservedIdentifier
}
+
+------------------------------------------------------------------------}}}
+-- Comment handling {{{
+
dynaCommentStyle :: CommentStyle
dynaCommentStyle = CommentStyle
{ commentStart = "{%" -- XXX?
slicedWith f (DL m) = DL $ slicedWith f m
rend = lift rend
restOfLine = lift restOfLine
-
-bsf = fmap BU.fromString
+------------------------------------------------------------------------}}}
+-- Atoms {{{
atom :: (Monad m, TokenParsing m) => m B.ByteString
atom = liftA BU.fromString stringLiteral
<|> (bsf $ ident dynaAtomStyle)
--- sparen :: MonadParser m => m a -> m a
--- sparen = between (char '(' *> spaces) (spaces <* char ')')
+------------------------------------------------------------------------}}}
+-- Terms and term expressions {{{
term :: DeltaParsing m => m (Spanned Term)
term = token $ choice
[ parens texpr
- , spanned $ TVar <$> (bsf$ident dynaVarStyle)
+ , spanned $ TVar <$> (bsf $ ident dynaVarStyle)
, try $ spanned $ flip TFunctor [] <$> atom <* (notFollowedBy $ char '(')
+ , try $ spanned $ mkta <$> (bsf $ ident dynaTypeStyle) <* spaces <*> term
, spanned $ parenfunc
- ]
+ ]
where
parenfunc = TFunctor <$> (highlight Identifier atom <?> "Functor")
<*> parens (texpr `sepBy` symbolic ',')
+ mkta ty te = TAnnot (AnnType ty) te
-- XXX right now all binops are at equal precedence and left-associative; that's wrong.
texpr :: DeltaParsing m => m (Spanned Term)
texpr = buildExpressionParser etable term <?> "Expression"
where
etable = [ [ Prefix $ uf (spanned $ bsf $ symbol "new") ]
- , [ Prefix $ uf (spanned $ bsf $ ident dynaOperStyle) ]
+ , [ Prefix $ uf (spanned $ bsf $ ident dynaPfxOperStyle) ]
, [ Infix (bf (spanned $ bsf $ ident dynaOperStyle)) AssocLeft ]
- , [ Infix (bf (spanned $ bsf $ dotOper)) AssocRight ]
+ , [ Infix (bf (spanned $ bsf $ dotOper)) AssocRight ]
, [ Infix (bf (spanned $ bsf $ symbol "is")) AssocNone ]
]
(x:~spx) <- f
pure (\a@(_:~spa) b@(_:~spb) -> (TFunctor x [a,b]):~(spa <> spx <> spb))
-hriss = highlight ReservedOperator . spanned . symbol
dterm, dtexpr :: DeltaParsing m => m (Spanned Term)
dterm = unDL term
dtexpr = unDL texpr
--- | Rules are not just terms because we want to make it very syntactically
--- explicit about the head being a term (though that's not an expressivity
--- concern -- just use the parenthesized texpr case) so that there is no
--- risk of parsing ambiguity.
---
--- XXX The span on Fact is a little silly
-data Rule = Fact (Spanned Term)
- | Rule !(Spanned Term) !B.ByteString ![Spanned Term] !(Spanned Term)
- deriving (Eq,Ord,Show)
-
--- XXX The span on LRule is a little silly
--- XXX Having one kind of Pragma is probably wrong
-data Line = LRule (Spanned Rule)
- | LPragma !(Spanned Term)
- deriving (Eq,Ord,Show)
+------------------------------------------------------------------------}}}
+-- Rules {{{
+-- | Grab the head (term!) and aggregation operator from a line that
+-- we hope is a rule.
+rulepfx :: DeltaParsing f => f ([Spanned Term] -> Spanned Term -> Rule)
rulepfx = Rule <$> term
<* spaces
- <*> (bsf$ident dynaOperStyle <?> "Aggregator")
+ <*> (bsf $ ident dynaOperStyle <?> "Aggregator")
rule :: DeltaParsing m => m Rule
-rule = choice [(try (liftA flip rulepfx
+rule = choice [
+ -- HEAD OP= RESULT whenever EXPRS .
+ (try (liftA flip rulepfx
<*> texpr
- <* hriss "whenever"))
+ <* hrss "whenever"))
<*> (texpr `sepBy1` symbolic ',')
+ -- HEAD OP= EXPRS, RESULT .
, (try rulepfx)
<*> many (try (texpr <* symbolic ','))
<*> texpr
+ -- HEAD .
, Fact <$> term
]
+ where
+ hrss = highlight ReservedOperator . spanned . symbol
drule :: DeltaParsing m => m (Spanned Rule)
drule = spanned rule
+------------------------------------------------------------------------}}}
+-- Lines {{{
+
progline :: DeltaParsing m => m (Spanned Line)
progline = spanned $ choice [ LRule <$> drule
, LPragma <$> (symbol ":-"
]
dline :: DeltaParsing m => m (Spanned Line)
--- dline = unDL (progline <* optional (char '.' <* (spaces <|> eof)))
dline = unDL (progline <* optional (char '.') <* optional newline)
dlines :: DeltaParsing m => m [Spanned Line]
dlines = unDL (progline `sepEndBy` (char '.' <* spaces))
+
+------------------------------------------------------------------------}}}
-{-# LANGUAGE TemplateHaskell #-}
-{-# LANGUAGE Rank2Types #-}
-{-# LANGUAGE OverloadedStrings #-}
-
+---------------------------------------------------------------------------
+-- | Parser self-test cases
+--
-- TODO:
-- Writing these is still too hard, Template Haskell and the REPL
-- notwithstanding.
-- Test.Framework.TH appears not to understand comments at the
-- moment, and parses right through them.
+-- Header material {{{
+
+{-# LANGUAGE TemplateHaskell #-}
+{-# LANGUAGE Rank2Types #-}
+{-# LANGUAGE OverloadedStrings #-}
+
module Dyna.ParserHS.ParserSelftest where
-import Control.Applicative ((<*))
+-- import Control.Applicative ((<*))
import Data.ByteString (ByteString)
-import Data.Foldable (toList)
-import Data.Monoid (mempty)
-import qualified Data.Sequence as S
+-- import Data.Foldable (toList)
+-- import Data.Monoid (mempty)
+-- import qualified Data.Sequence as S
import Data.String
import qualified Test.Framework as TF
import Test.Framework.Providers.HUnit
import Dyna.ParserHS.Parser
import Dyna.XXX.TrifectaTest
+------------------------------------------------------------------------}}}
+-- Terms and basic handling {{{
term :: ByteString -> Spanned Term
term = unsafeParse dterm
+case_basicAtom :: Assertion
case_basicAtom = e @=? (term "foo")
where e = TFunctor "foo" [] :~ Span (Columns 0 0) (Columns 3 3) "foo"
+case_basicAtomTWS :: Assertion
case_basicAtomTWS = e @=? (term "foo ")
where e = TFunctor "foo" [] :~ Span (Columns 0 0) (Columns 4 4) "foo "
+case_basicFunctor :: Assertion
case_basicFunctor = e @=? (term sfb)
where
e = TFunctor "foo"
sfb :: (IsString s) => s
sfb = "foo(bar)"
+case_nestedFunctorsWithArgs :: Assertion
case_nestedFunctorsWithArgs = e @=? (term st)
where
e = TFunctor "foo"
st :: (IsString s) => s
st = "foo(bar,X,bif(),baz(quux,Y))"
+case_basicFunctorComment :: Assertion
case_basicFunctorComment = e @=? (term sfb)
where
e = TFunctor "foo" [] :~ Span (Columns 0 0) (Columns 8 8) sfb
sfb :: (IsString s) => s
sfb = "foo %xxx"
+case_basicFunctorNLComment :: Assertion
case_basicFunctorNLComment = e @=? (term sfb)
where
e = TFunctor "foo"
sfb :: (IsString s) => s
sfb = "foo(%xxx\n1,2\n)"
-
+case_basicFunctorTWS :: Assertion
case_basicFunctorTWS = e @=? (term sfb)
where
e = TFunctor "foo"
sfb :: (IsString s) => s
sfb = "foo\n(bar )"
+case_basicFunctorNL :: Assertion
case_basicFunctorNL = e @=? (term sfb)
where
e = TFunctor "foo"
sfb :: (IsString s) => s
sfb = "foo\n(bar )"
+case_colonFunctor :: Assertion
case_colonFunctor = e @=? (term pvv)
where
e = TFunctor "possible"
]
:~ Span (Columns 9 9) (Columns 16 16) pvv
]
- :~ Span (Columns 0 0) (Columns 17 17) "possible(Var:Val)"
+ :~ Span (Columns 0 0) (Columns 17 17) pvv
pvv = "possible(Var:Val)"
+case_failIncompleteExpr :: Assertion
case_failIncompleteExpr = checkParseFail dterm "foo +"
"(interactive):1:5: error: expected: \"(\",\n end of input\nfoo +<EOF> "
+-- Annotations {{{
+
+case_tyAnnot :: Assertion
+case_tyAnnot = e @=? (term fintx)
+ where
+ e = TFunctor "f" [TAnnot (AnnType ":int")
+ (TVar "X" :~ Span (Columns 7 7) (Columns 8 8) fintx)
+ :~ Span (Columns 2 2) (Columns 8 8) fintx
+ ]
+ :~ Span (Columns 0 0) (Columns 9 9) fintx
+ fintx = "f(:int X)"
+
+------------------------------------------------------------------------}}}
+
+------------------------------------------------------------------------}}}
+-- Rules and lines {{{
+
progline :: ByteString -> Spanned Line
progline = unsafeParse dline
proglines :: ByteString -> [Spanned Line]
proglines = unsafeParse dlines
+case_ruleSimple :: Assertion
case_ruleSimple = e @=? (progline sr)
where
e = LRule (Rule (TFunctor "goal" [] :~ Span (Columns 0 0) (Columns 5 5) sr)
:~ Span (Columns 0 0) (Columns 10 10) sr
sr = "goal += 1 ."
+case_ruleExpr :: Assertion
case_ruleExpr = e @=? (progline sr)
where
e = LRule (Rule (TFunctor "goal" [] :~ Span (Columns 0 0) (Columns 5 5) sr)
:~ Span (Columns 0 0) (Columns 18 18) sr
sr = "goal += foo + bar ."
+case_ruleDotExpr :: Assertion
case_ruleDotExpr = e @=? (progline sr)
where
e = LRule (Rule (TFunctor "goal" [] :~ Span (Columns 0 0) (Columns 5 5) sr)
:~ Span (Columns 0 0) (Columns 15 15) sr
sr = "goal += foo.bar."
+case_ruleComma :: Assertion
case_ruleComma = e @=? (progline sr)
where
e = LRule (Rule (TFunctor "foo" [] :~ Span (Columns 0 0) (Columns 4 4) sr)
:~ Span (Columns 0 0) (Columns 24 24) sr
sr = "foo += bar(X), baz(X), X."
+case_ruleKeywordsComma :: Assertion
case_ruleKeywordsComma = e @=? (progline sr)
where
e = LRule (Rule (TFunctor "foo" [] :~ Span (Columns 0 0) (Columns 4 4) sr)
:~ Span (Columns 0 0) (Columns 41 41) sr
sr = "foo = new X whenever X is baz(Y), Y is 3 ."
--- XXX It takes a while to parse this one. Why?
+case_rules :: Assertion
case_rules = e @=? (proglines sr)
where
e = [ LRule (Rule (TFunctor "goal" [] :~ Span (Columns 0 0) (Columns 5 5) sr)
]
sr = "goal += 1. goal += 2."
--- XXX It takes a while to parse this one. Why?
+case_rulesDotExpr :: Assertion
case_rulesDotExpr = e @=? (proglines sr)
where
e = [ LRule (Rule (TFunctor "goal" [] :~ Span (Columns 0 0) (Columns 5 5) sr)
]
sr = "goal += foo.bar. goal += 1."
+------------------------------------------------------------------------}}}
+-- Harness toplevel {{{
+
selftest :: TF.Test
selftest = $(testGroupGenerator)
main :: IO ()
main = $(defaultMainGenerator)
+------------------------------------------------------------------------}}}
+-- Experimental debris (XXX) {{{
+
{-
runParser :: (Show a) => (forall r . Language (Parser r String) a) -> B.ByteString -> Result TermDoc a
runParser p = parseByteString (dynafy p <* eof) M.mempty
Success w s | S.null w -> assertEqual "XXX" e s
_ -> assertBool "XXX" False
-}
+
+------------------------------------------------------------------------}}}