From 49640d6fdcb69c9f1f9fdda1b38c9eb482a98789 Mon Sep 17 00:00:00 2001 From: Nathaniel Wesley Filardo Date: Thu, 29 Nov 2012 02:10:31 -0500 Subject: [PATCH] Tweak the parser as suggested by Tim V --- src/Dyna/ParserHS/Parser.hs | 108 +++++++++++++++++++++++----------- src/Dyna/ParserHS/Selftest.hs | 68 ++++++++++++--------- src/Dyna/XXX/Trifecta.hs | 61 ++++++++++++++++++- 3 files changed, 173 insertions(+), 64 deletions(-) diff --git a/src/Dyna/ParserHS/Parser.hs b/src/Dyna/ParserHS/Parser.hs index 3956bf9..cc448ce 100644 --- a/src/Dyna/ParserHS/Parser.hs +++ b/src/Dyna/ParserHS/Parser.hs @@ -46,7 +46,7 @@ import Text.Parser.Token.Style import Text.Trifecta import Dyna.Term.TTerm (Annotation(..)) -import Dyna.XXX.Trifecta (identNL) +import Dyna.XXX.Trifecta (identNL,stringLiteralSQ) ------------------------------------------------------------------------}}} -- Parsed output definition {{{ @@ -56,6 +56,7 @@ data Term = TFunctor !B.ByteString | TAnnot !(Annotation (Spanned Term)) !(Spanned Term) | TNumeric !(Either Integer Double) + | TString !B.ByteString | TVar !B.ByteString deriving (Eq,Ord,Show) @@ -186,7 +187,7 @@ instance DeltaParsing m => DeltaParsing (DynaLanguage m) where -- Atoms {{{ atom :: (Monad m, TokenParsing m) => m B.ByteString -atom = liftA BU.fromString stringLiteral +atom = liftA BU.fromString stringLiteralSQ <|> (bsf $ ident dynaAtomStyle) ------------------------------------------------------------------------}}} @@ -199,6 +200,8 @@ term = token $ choice , spanned $ mkta <$> (colon *> term) <* whiteSpace <*> term + , try $ spanned $ TString <$> bsf stringLiteral + , try $ spanned $ TNumeric <$> naturalOrDouble , try $ spanned $ flip TFunctor [] <$> atom @@ -215,7 +218,22 @@ term = token $ choice mkta ty te = TAnnot (AnnType ty) te --- XXX right now all binops are at equal precedence and left-associative; that's wrong. +-- XXX I remember now why we didn't handle ',' as an operator: if it were, +-- we'd have no way of distinguishing between @f(a,b)@ as +-- +-- > TFunctor "f" [TFunctor "a" [] :~ _, TFunctor "b" [] :~ _] +-- +-- and +-- +-- > TFunctor "f" [TFunctor "," [TFunctor "a" [] :~ _, TFunctor "b" [] :~ _] :~ _] +-- +-- We can fix this, but it means that we should have a separate expression +-- parser for contexts where "comma means argument separation" and "comma +-- means evaluation separator". I don't yet know how I feel about +-- the "whenever" (and "is"?) operator(s) being available in the former table. + +-- XXX right now all binops are at equal precedence and left-associative; +-- that's wrong. texpr :: DeltaParsing m => m (Spanned Term) texpr = buildExpressionParser etable term "Expression" where @@ -226,18 +244,30 @@ texpr = buildExpressionParser etable term "Expression" , [ Infix (bf (spanned $ bsf $ symbol "is")) AssocNone ] ] - -- The dot operator is required to have not-a-space following (to avoid - -- confusion with the end-of-rule marker, which is taken to be "dot space" - -- or "dot eof"). - dotAny = char '.' <* satisfy (not . isSpace) - dotOper = try (lookAhead dotAny *> identNL dynaDotOperStyle) +-- The dot operator is required to have not-a-space following (to avoid +-- confusion with the end-of-rule marker, which is taken to be "dot space" +-- or "dot eof"). +-- +-- XXX is the use of isSpace here correct or do we want whiteSpace? +dotAny :: CharParsing m => m Char +dotAny = char '.' <* satisfy (not . isSpace) + +dotOper :: (Monad m, TokenParsing m) => m [Char] +dotOper = try (lookAhead dotAny *> identNL dynaDotOperStyle) - uf f = do - (x:~spx) <- f - pure (\a@(_:~sp) -> (TFunctor x [a]):~(spx <> sp)) - bf f = do - (x:~spx) <- f - pure (\a@(_:~spa) b@(_:~spb) -> (TFunctor x [a,b]):~(spa <> spx <> spb)) +uf :: (Monad m, Applicative m) + => m (Spanned B.ByteString) + -> m (Spanned Term -> Spanned Term) +uf f = do + (x:~spx) <- f + pure (\a@(_:~sp) -> (TFunctor x [a]):~(spx <> sp)) + +bf :: (Monad m, Applicative m) + => m (Spanned B.ByteString) + -> m (Spanned Term -> Spanned Term -> Spanned Term) +bf f = do + (x:~spx) <- f + pure (\a@(_:~spa) b@(_:~spb) -> (TFunctor x [a,b]):~(spa <> spx <> spb)) dterm, dtexpr :: DeltaParsing m => m (Spanned Term) @@ -252,26 +282,28 @@ dtexpr = unDL texpr rulepfx :: DeltaParsing f => f ([Spanned Term] -> Spanned Term -> Rule) rulepfx = Rule <$> term <* whiteSpace - -- XXX probably a better way to do this.. probably want aggregators have suffix = + -- XXX probably a better way to do this.. + -- probably want aggregators have suffix = <*> ((bsf $ some $ satisfy $ not . isSpace) "Aggregator") <* whiteSpace rule :: DeltaParsing m => m Rule rule = choice [ - -- HEAD OP= RESULTEXPR whenever EXPRS . + -- HEAD OP= RESULTEXPR whenever EXPRS . (try (liftA flip rulepfx - <*> texpr - <* hrss "whenever")) - <*> (texpr `sepBy1` symbolic ',') - - -- HEAD OP= EXPRS, RESULTEXPR . - , try (rulepfx - <*> many (try (texpr <* symbolic ',')) - <*> texpr) - - -- HEAD . - , Fact <$> term - ] + <*> texpr + <* hrss "whenever")) + <*> (texpr `sepBy1` symbolic ',') + + -- HEAD OP= EXPRS, RESULTEXPR . + , try (rulepfx + <*> many (try (texpr <* symbolic ',')) + <*> texpr) + + -- HEAD . + , Fact <$> term + ] + <* optional (char '.') where hrss = highlight ReservedOperator . spanned . symbol @@ -281,17 +313,23 @@ drule = unDL (spanned rule) ------------------------------------------------------------------------}}} -- Lines {{{ +dpragma :: DeltaParsing m => m (Spanned Term) +dpragma = symbol ":-" + *> whiteSpace + *> texpr + <* whiteSpace + <* optional (char '.') + progline :: DeltaParsing m => m (Spanned Line) -progline = do - whiteSpace - spanned (choice [ LRule <$> drule - , LPragma <$> (symbol ":-" *> whiteSpace *> texpr) - ]) +progline = whiteSpace + *> spanned (choice [ LRule <$> drule + , LPragma <$> dpragma + ]) dline :: DeltaParsing m => m (Spanned Line) -dline = unDL (progline <* optional (char '.') <* optional whiteSpace) +dline = unDL (progline <* optional whiteSpace) dlines :: DeltaParsing m => m [Spanned Line] -dlines = unDL (progline `sepEndBy` (char '.' <* whiteSpace)) +dlines = unDL (many (progline <* optional whiteSpace)) ------------------------------------------------------------------------}}} diff --git a/src/Dyna/ParserHS/Selftest.hs b/src/Dyna/ParserHS/Selftest.hs index 785927f..374f318 100644 --- a/src/Dyna/ParserHS/Selftest.hs +++ b/src/Dyna/ParserHS/Selftest.hs @@ -16,7 +16,7 @@ module Dyna.ParserHS.Selftest where --- import Control.Applicative ((<*)) +import Control.Applicative import Data.ByteString (ByteString) import qualified Data.ByteString as B -- import Data.Foldable (toList) @@ -160,17 +160,18 @@ case_tyAnnot = e @=? (term fintx) -- Rules and lines {{{ progline :: ByteString -> Spanned Line -progline = unsafeParse dline +progline = unsafeParse (dline <* eof) proglines :: ByteString -> [Spanned Line] -proglines = unsafeParse dlines +proglines = unsafeParse (dlines <* eof) case_ruleFact :: Assertion case_ruleFact = e @=? (progline sr) where e = LRule (Fact (TFunctor "goal" [] :~ Span (Columns 0 0) (Columns 4 4) "goal.") - :~ Span (Columns 0 0) (Columns 4 4) "goal.") - :~ Span (Columns 0 0) (Columns 4 4) "goal." + :~ ts) + :~ ts + ts = Span (Columns 0 0) (Columns 5 5) "goal." sr = "goal." case_ruleSimple :: Assertion @@ -180,8 +181,9 @@ case_ruleSimple = e @=? (progline sr) "+=" [] (TNumeric (Left 1) :~ Span (Columns 8 8) (Columns 9 9) sr) - :~ Span (Columns 0 0) (Columns 9 9) sr) - :~ Span (Columns 0 0) (Columns 9 9) sr + :~ ts) + :~ ts + ts = Span (Columns 0 0) (Columns 10 10) sr sr = "goal += 1." case_ruleExpr :: Assertion @@ -196,8 +198,9 @@ case_ruleExpr = e @=? (progline sr) ] :~ Span (Columns 8 8) (Columns 18 18) sr ) - :~ Span (Columns 0 0) (Columns 18 18) sr) - :~ Span (Columns 0 0) (Columns 18 18) sr + :~ ts) + :~ ts + ts = Span (Columns 0 0) (Columns 19 19) sr sr = "goal += foo + bar ." case_ruleDotExpr :: Assertion @@ -212,8 +215,9 @@ case_ruleDotExpr = e @=? (progline sr) ] :~ Span (Columns 8 8) (Columns 15 15) sr ) - :~ Span (Columns 0 0) (Columns 15 15) sr) - :~ Span (Columns 0 0) (Columns 15 15) sr + :~ ts) + :~ ts + ts = Span (Columns 0 0) (Columns 16 16) sr sr = "goal += foo.bar." case_ruleComma :: Assertion @@ -229,8 +233,9 @@ case_ruleComma = e @=? (progline sr) :~ Span (Columns 15 15) (Columns 21 21) sr ] (TVar "X" :~ Span (Columns 23 23) (Columns 24 24) sr) - :~ Span (Columns 0 0) (Columns 24 24) sr) - :~ Span (Columns 0 0) (Columns 24 24) sr + :~ ts) + :~ ts + ts = Span (Columns 0 0) (Columns 25 25) sr sr = "foo += bar(X), baz(X), X." case_ruleKeywordsComma :: Assertion @@ -254,8 +259,9 @@ case_ruleKeywordsComma = e @=? (progline sr) (TFunctor "new" [TVar "X" :~ Span (Columns 10 10) (Columns 12 12) sr] :~ Span (Columns 6 6) (Columns 12 12) sr) - :~ Span (Columns 0 0) (Columns 41 41) sr) - :~ Span (Columns 0 0) (Columns 41 41) sr + :~ ts) + :~ ts + ts = Span (Columns 0 0) (Columns 42 42) sr sr = "foo = new X whenever X is baz(Y), Y is 3 ." case_rules :: Assertion @@ -265,15 +271,17 @@ case_rules = e @=? (proglines sr) "+=" [] (TNumeric (Left 1) :~ Span (Columns 8 8) (Columns 10 10) sr) - :~ Span (Columns 0 0) (Columns 10 10) sr) - :~ Span (Columns 0 0) (Columns 10 10) sr + :~ s1) + :~ s1 , LRule (Rule (TFunctor "goal" [] :~ Span (Columns 12 12) (Columns 17 17) sr) "+=" [] (TNumeric (Left 2) :~ Span (Columns 20 20) (Columns 22 22) sr) - :~ Span (Columns 12 12) (Columns 22 22) sr) - :~ Span (Columns 12 12) (Columns 22 22) sr + :~ s2) + :~ s2 ] + s1 = Span (Columns 0 0) (Columns 11 11) sr + s2 = Span (Columns 12 12) (Columns 23 23) sr sr = "goal += 1 . goal += 2 ." case_rulesWhitespace :: Assertion @@ -283,19 +291,21 @@ case_rulesWhitespace = e @=? (proglines sr) "+=" [] (TNumeric (Left 1) :~ Span (Lines 1 4 19 4) (Lines 1 6 21 6) l1) - :~ Span (Columns 2 2) (Lines 1 6 21 6) l0) - :~ Span (Columns 2 2) (Lines 1 6 21 6) l0 + :~ s1) + :~ s1 , LRule (Rule (TFunctor "goal" [] :~ Span (Lines 3 1 31 1) (Lines 3 6 36 6) l3) "+=" [] (TNumeric (Left 2) :~ Span (Lines 3 9 39 9) (Lines 3 11 41 11) l3) - :~ Span (Lines 3 1 31 1) (Lines 3 11 41 11) l3) - :~ Span (Lines 3 1 31 1) (Lines 3 11 41 11) " goal += 2 .\n" + :~ s2) + :~ s2 ] l0 = " goal%comment\n" l1 = " += 1 .\n" l2 = "%test \n" - l3 = " goal += 2 .\n" + l3 = " goal += 2 . " + s1 = Span (Columns 2 2) (Lines 1 7 22 7) l0 + s2 = Span (Lines 3 1 31 1) (Lines 3 12 42 12) l3 sr = B.concat [l0,l1,l2,l3] @@ -311,15 +321,17 @@ case_rulesDotExpr = e @=? (proglines sr) ] :~ Span (Columns 8 8) (Columns 15 15) sr ) - :~ Span (Columns 0 0) (Columns 15 15) sr) - :~ Span (Columns 0 0) (Columns 15 15) sr + :~ s1) + :~ s1 , LRule (Rule (TFunctor "goal" [] :~ Span (Columns 17 17) (Columns 22 22) sr) "+=" [] (TNumeric (Left 1) :~ Span (Columns 25 25) (Columns 27 27) sr) - :~ Span (Columns 17 17) (Columns 27 27) sr) - :~ Span (Columns 17 17) (Columns 27 27) sr + :~ s2) + :~ s2 ] + s1 = Span (Columns 0 0) (Columns 16 16) sr + s2 = Span (Columns 17 17) (Columns 28 28) sr sr = "goal += foo.bar. goal += 1 ." ------------------------------------------------------------------------}}} diff --git a/src/Dyna/XXX/Trifecta.hs b/src/Dyna/XXX/Trifecta.hs index e1deb82..dd5a517 100644 --- a/src/Dyna/XXX/Trifecta.hs +++ b/src/Dyna/XXX/Trifecta.hs @@ -2,15 +2,18 @@ -- XXX contribute back to trifecta module Dyna.XXX.Trifecta ( - identNL, pureSpanned, triInteract + identNL, pureSpanned, stringLiteralSQ, triInteract ) where import Control.Applicative import Control.Monad (when) import qualified Data.ByteString.UTF8 as BU +import Data.Char +import Data.List (foldl') import Data.Monoid (mempty) import Data.HashSet as HashSet (member) import qualified Data.Semigroup.Reducer as R +import Text.Parser.Token.Highlight import Text.Trifecta import Text.Trifecta.Delta @@ -26,6 +29,62 @@ identNL s = try $ do when (HashSet.member name (styleReserved s)) $ unexpected $ "reserved " ++ styleName s ++ " " ++ show name return name +-- | Just like stringLiteral but with single quotes. +-- +-- belongs in Text.Parser.Token +stringLiteralSQ :: TokenParsing m => m String +stringLiteralSQ = token (highlight StringLiteral lit) where + lit = Prelude.foldr (maybe id (:)) "" + <$> between (char '\'') (char '\'' "end of string") (many stringChar) + "string" + stringChar = Just <$> stringLetter + <|> stringEscape + "string character" + stringLetter = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026')) + -- XXX That is, charLetter + + stringEscape = highlight EscapeCode $ char '\\' *> esc where + esc = Nothing <$ escapeGap + <|> Nothing <$ escapeEmpty + <|> Just <$> escapeCode + escapeEmpty = char '&' + escapeGap = skipSome space *> (char '\\' "end of string gap") +{-# INLINE stringLiteralSQ #-} + +-- XXX Duplicated from Text.Parser.Token +escapeCode :: TokenParsing m => m Char +escapeCode = (charEsc <|> charNum <|> charAscii <|> charControl) "escape code" + where + charControl = (\c -> toEnum (fromEnum c - fromEnum 'A')) <$> (char '^' *> upper) + charNum = toEnum . fromInteger <$> num where + num = decimal + <|> (char 'o' *> number 8 octDigit) + <|> (char 'x' *> number 16 hexDigit) + charEsc = choice $ parseEsc <$> escMap + parseEsc (c,code) = code <$ char c + escMap = zip ("abfnrtv\\\"\'") ("\a\b\f\n\r\t\v\\\"\'") + charAscii = choice $ parseAscii <$> asciiMap + parseAscii (asc,code) = try $ code <$ string asc + asciiMap = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2) + ascii2codes, ascii3codes :: [String] + ascii2codes = [ "BS","HT","LF","VT","FF","CR","SO" + , "SI","EM","FS","GS","RS","US","SP"] + ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK" + ,"BEL","DLE","DC1","DC2","DC3","DC4","NAK" + ,"SYN","ETB","CAN","SUB","ESC","DEL"] + ascii2, ascii3 :: [Char] + ascii2 = ['\BS','\HT','\LF','\VT','\FF','\CR','\SO','\SI' + ,'\EM','\FS','\GS','\RS','\US','\SP'] + ascii3 = ['\NUL','\SOH','\STX','\ETX','\EOT','\ENQ','\ACK' + ,'\BEL','\DLE','\DC1','\DC2','\DC3','\DC4','\NAK' + ,'\SYN','\ETB','\CAN','\SUB','\ESC','\DEL'] + +-- XXX Duplicated from Text.Parser.Token +number :: TokenParsing m => Integer -> m Char -> m Integer +number base baseDigit = + foldl' (\x d -> base*x + toInteger (digitToInt d)) 0 <$> some baseDigit + + -- | Just like "pure" but right here in the parsing state -- -- belongs in Text.Trifecta.Diagnostic.Rendering.Span -- 2.50.1