From: Nathaniel Wesley Filardo Date: Sat, 15 Jun 2013 02:15:11 +0000 (-0400) Subject: Fix bugs in parser and add tests X-Git-Url: https://hydra-www.ietfng.org/gitweb/?a=commitdiff_plain;h=133a4f39e3aa512210498bf655b6e57d28c81a3d;p=dyna2 Fix bugs in parser and add tests Notably, this fixes the parser backtracking all the way to the start when it encounters an operator it doesn't know about. While here, token-ize constituent parsers and move parser export types to their own module. --- diff --git a/src/Dyna/Analysis/ANF.hs b/src/Dyna/Analysis/ANF.hs index e87ba34..96c2a7d 100644 --- a/src/Dyna/Analysis/ANF.hs +++ b/src/Dyna/Analysis/ANF.hs @@ -80,7 +80,7 @@ import qualified Data.Set as S -- import qualified Debug.Trace as XT import Dyna.Main.Defns import Dyna.Main.Exception -import qualified Dyna.ParserHS.Parser as P +import qualified Dyna.ParserHS.Types as P import Dyna.Term.TTerm import Dyna.Term.Normalized import Dyna.Term.SurfaceSyntax diff --git a/src/Dyna/Backend/Python/Backend.hs b/src/Dyna/Backend/Python/Backend.hs index d40d6d6..dabe150 100644 --- a/src/Dyna/Backend/Python/Backend.hs +++ b/src/Dyna/Backend/Python/Backend.hs @@ -34,6 +34,7 @@ import Dyna.Analysis.Mode import Dyna.Analysis.RuleMode import Dyna.Backend.BackendDefn import Dyna.Main.Exception +import qualified Dyna.ParserHS.Types as P import qualified Dyna.ParserHS.Parser as P import Dyna.Term.TTerm import Dyna.XXX.PPrint diff --git a/src/Dyna/ParserHS/OneshotDriver.hs b/src/Dyna/ParserHS/OneshotDriver.hs index 4020541..3ce581d 100644 --- a/src/Dyna/ParserHS/OneshotDriver.hs +++ b/src/Dyna/ParserHS/OneshotDriver.hs @@ -23,11 +23,13 @@ import Control.Monad.State import qualified Data.ByteString as B import qualified Data.ByteString.UTF8 as BU import qualified Data.Map as M +import Data.Maybe import qualified Data.Set as S import Data.Monoid (mempty) import Dyna.Main.Defns import Dyna.Main.Exception import Dyna.ParserHS.Parser +import Dyna.ParserHS.Types import Dyna.Term.SurfaceSyntax import Dyna.Term.TTerm import Dyna.XXX.Trifecta (prettySpanLoc) @@ -188,28 +190,33 @@ pragmasFromPCS (PCS dt_mk dt_over _ nextRule :: (DeltaParsing m, LookAheadParsing m, MonadState PCS m) => Maybe (S.Set String) - -> m (Spanned Rule) + -> m (Maybe (Spanned Rule)) nextRule aggs = go where go = do (l :~ s) <- gets (mkdlc aggs) >>= parse case l of - LPragma p -> pcsProcPragma (p :~ s) >> go - LRule r -> return r + PLPragma p -> pcsProcPragma (p :~ s) >> return Nothing + PLRule r -> return (Just r) oneshotDynaParser :: (DeltaParsing m, LookAheadParsing m) => Maybe (S.Set String) -> m ParsedDynaProgram oneshotDynaParser aggs = (postProcess =<<) $ flip runStateT defPCS - $ many (try $ do - r <- nextRule aggs - rix <- pcs_ruleix <<%= (+1) - dt <- use pcs_dt_cache - return $ (rix, dt, r)) - <* optional (dynaWhiteSpace (someSpace)) + $ optional (dynaWhiteSpace (someSpace)) + *> many (try $ do + mr <- nextRule aggs + case mr of + Nothing -> return Nothing + (Just r) -> do + rix <- pcs_ruleix <<%= (+1) + dt <- use pcs_dt_cache + return $ Just (rix, dt, r)) where - postProcess (rs,pcs) = return $ PDP rs (pcs ^. pcs_iagg_map) (pragmasFromPCS pcs) - + postProcess (rs,pcs) = return $ + PDP (catMaybes rs) + (pcs ^. pcs_iagg_map) + (pragmasFromPCS pcs) ------------------------------------------------------------------------}}} diff --git a/src/Dyna/ParserHS/Parser.hs b/src/Dyna/ParserHS/Parser.hs index 1cd0421..f8c4e63 100644 --- a/src/Dyna/ParserHS/Parser.hs +++ b/src/Dyna/ParserHS/Parser.hs @@ -25,9 +25,8 @@ -- this depends on an upstream fix in Text.Parser.Expression. -- But: I am not worried about it since we don't handle gensyms -- anywhere else in the pipeline yet) --- --- Header material {{{ -{-# LANGUAGE DeriveDataTypeable #-} + +-- Header material {{{ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} @@ -40,14 +39,9 @@ module Dyna.ParserHS.Parser ( -- * Parser configuration inputs EOT, mkEOT, DLCfg(..), - -- * Parser output types - NameWithArgs(..), - -- ** Surface langauge - Term(..), Rule(..), dynaWhiteSpace, genericAggregators, + dynaWhiteSpace, genericAggregators, -- ** Pragmas - ParsedInst(..), ParsedModeInst, Pragma(..), renderPragma, - -- ** Line - Line(..), + renderPragma, -- * Action parse, -- * Test harness hooks @@ -63,7 +57,6 @@ import Control.Monad.Reader import qualified Data.ByteString.UTF8 as BU import qualified Data.ByteString as B import qualified Data.CharSet as CS -import qualified Data.Data as D import qualified Data.HashSet as H import qualified Data.Map as M import Data.Semigroup ((<>)) @@ -71,7 +64,7 @@ import Data.Monoid (mempty) import Dyna.Analysis.Mode.Inst import qualified Dyna.Analysis.Mode.InstPretty as IP import Dyna.Analysis.Mode.Uniq -import Dyna.Main.Defns +import Dyna.ParserHS.Types import Dyna.Term.TTerm (Annotation(..), TBase(..), DFunct) import Dyna.Term.SurfaceSyntax @@ -85,84 +78,6 @@ import Text.Parser.Token.Style import qualified Text.PrettyPrint.Free as PP import Text.Trifecta -------------------------------------------------------------------------}}} --- Parsed output definitions {{{ - -data Term = TFunctor B.ByteString - [Spanned Term] - | TAnnot (Annotation (Spanned Term)) - (Spanned Term) - | TVar B.ByteString - | TBase TBase - deriving (D.Data,D.Typeable,Eq,Ord,Show) - --- | Rules are not just terms because we want to make it very syntactically --- explicit about the head being a term (though that's not an expressivity --- concern -- just use the parenthesized texpr case) so that there is no --- risk of parsing ambiguity. -data Rule = Rule (Spanned Term) B.ByteString (Spanned Term) - deriving (Eq,Show) - -data NameWithArgs = PNWA B.ByteString [B.ByteString] - deriving (Eq,Show) - --- | Pragmas that are recognized by the parser -data Pragma = PDispos SelfDispos B.ByteString [ArgDispos] - -- ^ Assert the evaluation disposition of a functor - - | PDisposDefl String - -- ^ Specify the default disposition handlers - -- for subsequent context. - -- - -- Note that the override defintions are - -- preserved across this operation! - -- (XXX is that what we want?) - - | PIAggr B.ByteString Int B.ByteString - -- ^ Assert the aggregator for a functor/arity. - - | PInst NameWithArgs - ParsedInst - -- ^ Declare an instantiation state: name and body - - | PMode NameWithArgs - ParsedModeInst - ParsedModeInst - -- ^ Declare a mode: name, input, and output - - | POperAdd Fixity Integer B.ByteString - -- ^ Add an operator - - | POperDel B.ByteString - -- ^ Remove an operator - - -- | PQMode DFunctAr - -- ^ A query mode declaration - - | PRuleIx RuleIx - -- ^ Set the rule index. - -- - -- XXX This is a bit of a hack to allow external drivers to - -- feed rules incrementally; those drivers should treat the - -- rule index as an opaque token rather than something to be - -- interpreted. Eventually this will go away, when our - -- REPLs have captive compilers. - - {- --- | PMisc Term - -- ^ Fall-back parser for :- lines. -} - deriving (Eq,Show) - --- | The type of a parsed inst declaration -data ParsedInst = PIVar !B.ByteString - | PIInst !(InstF DFunct ParsedInst) - deriving (Eq,Show) - -type ParsedModeInst = Either NameWithArgs ParsedInst - -data Line = LRule (Spanned Rule) - | LPragma Pragma - deriving (Show) - ------------------------------------------------------------------------}}} -- Parser input definitions {{{ @@ -381,10 +296,10 @@ var = bsf $ ident dynaVarStyle ------------------------------------------------------------------------}}} -- Atoms {{{ -parseAtom :: (Monad m, TokenParsing m) => m B.ByteString +parseAtom :: (Monad m, TokenParsing m) => m DFunct parseAtom = (liftA BU.fromString stringLiteralSQ <|> name) "Atom" -parseFunctor :: (Monad m, TokenParsing m) => m B.ByteString +parseFunctor :: (Monad m, TokenParsing m) => m DFunct parseFunctor = highlight Identifier parseAtom "Functor" ------------------------------------------------------------------------}}} @@ -400,7 +315,7 @@ term = token $ choice [ parens tfexpr , spanned $ TVar <$> var - , spanned $ mkta <$> (colon *> term) <* whiteSpace <*> term + , spanned $ mkta <$> (colon *> term) <*> term , try $ spanned $ TBase . TString <$> bsf stringLiteral @@ -506,15 +421,13 @@ genericAggregators = token rule :: (DeltaParsing m, LookAheadParsing m, MonadReader DLCfg m) => m Rule -rule = do - _ <- whiteSpace +rule = token $ do h@(_ :~ hs) <- term choice [ do _ <- try (char '.' <* lookAhead whiteSpace) return (Rule h "|=" (TFunctor "true" [] :~ hs)) , do - aggr <- join $ asks dlc_aggrs - _ <- whiteSpace + aggr <- token $ join $ asks dlc_aggrs body <- tfexpr _ <- char '.' return (Rule h aggr body) @@ -582,7 +495,7 @@ parseUniq = choice [ symbol "clobbered" *> pure UClobbered pragmaBody :: (DeltaParsing m, LookAheadParsing m, MonadReader DLCfg m) => m Pragma -pragmaBody = choice +pragmaBody = token $ choice [ symbol "dispos_def" *> parseDisposDefl -- set default dispositions , symbol "dispos" *> parseDisposition -- in-place dispositions @@ -739,24 +652,22 @@ renderPragma = PP.enclose ":-" PP.dot . renderPragma_ pragma :: (DeltaParsing m, LookAheadParsing m, MonadReader DLCfg m) => m Pragma -pragma = symbol ":-" - *> whiteSpace - *> (pragmaBody - -- <|> fmap PMisc (unSpan <$> tfexpr "Other pragma") - ) - <* whiteSpace - <* {- optional -} (char '.') +pragma = token $ + symbol ":-" + *> (pragmaBody + -- <|> fmap PMisc (unSpan <$> tfexpr "Other pragma") + ) + <* {- optional -} (char '.') ------------------------------------------------------------------------}}} -- Lines {{{ dline :: (MonadReader DLCfg m, DeltaParsing m, LookAheadParsing m) - => m (Spanned Line) -dline = whiteSpace - *> spanned (choice [ LPragma <$> pragma - , LRule <$> spanned rule - ]) + => m (Spanned PLine) +dline = spanned (choice [ PLPragma <$> pragma + , PLRule <$> spanned rule + ]) configureParser :: (DeltaParsing m, LookAheadParsing m) => DynaLanguage m a @@ -765,7 +676,7 @@ configureParser :: (DeltaParsing m, LookAheadParsing m) configureParser p c = runReaderT (unDL p) c -- | The grand Dyna parser. -parse :: (DeltaParsing m, LookAheadParsing m) => DLCfg -> m (Spanned Line) +parse :: (DeltaParsing m, LookAheadParsing m) => DLCfg -> m (Spanned PLine) parse = configureParser dline ------------------------------------------------------------------------}}} diff --git a/src/Dyna/ParserHS/Selftest.hs b/src/Dyna/ParserHS/Selftest.hs index 6ee47e4..55e6fb1 100644 --- a/src/Dyna/ParserHS/Selftest.hs +++ b/src/Dyna/ParserHS/Selftest.hs @@ -21,12 +21,13 @@ import Data.ByteString (ByteString) import qualified Data.ByteString as B import qualified Data.ByteString.UTF8 as BU -- import Data.Foldable (toList) --- import Data.Monoid (mempty) +import Data.Monoid (mempty) -- import qualified Data.Sequence as S import Data.String import Dyna.Main.Defns import Dyna.ParserHS.Parser import Dyna.ParserHS.OneshotDriver +import Dyna.ParserHS.Types import Dyna.Term.SurfaceSyntax import Dyna.Term.TTerm (Annotation(..), TBase(..)) import Dyna.XXX.TrifectaTest @@ -164,7 +165,7 @@ case_colonFunctor = e @=? (term pvv) case_failIncompleteExpr :: Assertion case_failIncompleteExpr = checkParseFail (testTerm defDLC) "foo +" - "(interactive):1:5: error: expected: \"(\",\nend of input\nfoo + \n ^ " + (\s -> take 18 s @=? "(interactive):1:5:") ------------------------------------------------------------------------}}} -- Annotations {{{ @@ -193,7 +194,10 @@ test_aggregators = hUnitTestToTests $ TestList okAggrs , TestLabel "generic invalid" $ TestList $ map (\x -> TestLabel (BU.toString x) $ TestCase - $ checkParseFail_ testGenericAggr x) + $ checkParseFail + testGenericAggr + x + (\_ -> return ())) [".", ". ", "+=3", "+3=", "+=a", "+a=" ] , TestLabel "custom accept" $ let r = unsafeParse (testRule cdlc) r1 @@ -201,7 +205,9 @@ test_aggregators = hUnitTestToTests $ TestList "+=" (TFunctor "b" [] :~ Span (Columns 5 5) (Columns 6 6) r1) , TestLabel "custom reject" $ TestCase - $ checkParseFail_ (testRule cdlc) "a *= b." + $ checkParseFail (testRule cdlc) + "a *= b." + (\_ -> return ()) ] where r1 = "a += b." @@ -214,10 +220,10 @@ test_aggregators = hUnitTestToTests $ TestList -- Rules {{{ progrule :: ByteString -> Spanned Rule -progrule = unsafeParse (spanned (testRule defDLC <* eof)) +progrule = unsafeParse (whiteSpace *> spanned (testRule defDLC <* eof)) progrules :: ByteString -> [Spanned Rule] -progrules = unsafeParse (many (spanned (testRule defDLC)) <* eof) +progrules = unsafeParse (whiteSpace *> many (spanned (testRule defDLC)) <* eof) oneshotRules :: ByteString -> [(RuleIx, Spanned Rule)] oneshotRules = xlate . unsafeParse (oneshotDynaParser Nothing) @@ -362,12 +368,12 @@ case_rules = e @=? (progrules sr) (_tNumeric (Left 2) :~ Span (Columns 22 22) (Columns 24 24) sr) :~ s2 ] - s1 = Span (Columns 0 0) (Columns 11 11) sr - s2 = Span (Columns 11 11) (Columns 25 25) sr + s1 = Span (Columns 0 0) (Columns 12 12) sr + s2 = Span (Columns 12 12) (Columns 25 25) sr sr = "goal += 1 . laog min= 2 ." -case_rules_ruleix_pragmas :: Assertion -case_rules_ruleix_pragmas = e @=? (oneshotRules sr) +case_rules_with_ruleix_pragmas :: Assertion +case_rules_with_ruleix_pragmas = e @=? (oneshotRules sr) where e = [ ( 5 , Rule @@ -385,11 +391,13 @@ case_rules_ruleix_pragmas = e @=? (oneshotRules sr) ) ] - s1 = Span (Columns 13 13) (Columns 23 23) sr + s1 = Span (Columns 13 13) (Columns 24 24) sr s2 = Span (Columns 24 24) (Columns 36 36) sr sr = ":- ruleix 5. goal += 1. laog min= 2." - - + +case_just_ruleix_pragma :: Assertion +case_just_ruleix_pragma = [] @=? (oneshotRules ":-ruleix 5.") + case_rulesWhitespace :: Assertion case_rulesWhitespace = e @=? (progrules sr) where @@ -408,8 +416,8 @@ case_rulesWhitespace = e @=? (progrules sr) l1 = " += 1 .\n" l2 = "%test \n" l3 = " goal += 2 ." - s1 = Span (Columns 0 0) (Lines 1 7 22 7) l0 - s2 = Span (Lines 1 7 22 7) (Lines 3 12 42 12) l1 + s1 = Span (Columns 2 2) (Lines 3 1 31 1) l0 + s2 = Span (Lines 3 1 31 1) (Lines 3 12 42 12) l3 sr = B.concat [l0,l1,l2,l3] case_rulesDotExpr :: Assertion @@ -430,10 +438,18 @@ case_rulesDotExpr = e @=? (progrules sr) (_tNumeric (Left 1) :~ Span (Columns 25 25) (Columns 27 27) sr) :~ s2 ] - s1 = Span (Columns 0 0) (Columns 16 16) sr - s2 = Span (Columns 16 16) (Columns 28 28) sr + s1 = Span (Columns 0 0) (Columns 17 17) sr + s2 = Span (Columns 17 17) (Columns 28 28) sr sr = "goal += foo.bar. goal += 1 ." +case_rule_with_unknown_operator :: Assertion +case_rule_with_unknown_operator = + checkParseFail (testRule dlc) + "goal += 1 ### 2." + (\s -> take 19 s @=? "(interactive):1:11:") + where + dlc = DLC (mkEOT mempty False) genericAggregators + ------------------------------------------------------------------------}}} -- Pragmas {{{ diff --git a/src/Dyna/ParserHS/Types.hs b/src/Dyna/ParserHS/Types.hs new file mode 100644 index 0000000..8ad129c --- /dev/null +++ b/src/Dyna/ParserHS/Types.hs @@ -0,0 +1,105 @@ +--------------------------------------------------------------------------- +-- | The types which constitute the output of the parser + +-- Header material {{{ +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# OPTIONS_GHC -Wall #-} + +module Dyna.ParserHS.Types ( + -- * Parser output types + NameWithArgs(..), + -- ** Surface langauge + Term(..), Rule(..), + -- ** Pragmas + ParsedInst(..), ParsedModeInst, Pragma(..), + -- ** Lines + PLine(..), +) where + + +import qualified Data.ByteString as B +import qualified Data.Data as D +import Dyna.Analysis.Mode.Inst +import Dyna.Main.Defns +import Dyna.Term.TTerm (Annotation(..), TBase(..), + DFunct) +import Dyna.Term.SurfaceSyntax +import Text.Trifecta + +------------------------------------------------------------------------}}} +-- Parsed output definitions {{{ + +data Term = TFunctor B.ByteString + [Spanned Term] + | TAnnot (Annotation (Spanned Term)) + (Spanned Term) + | TVar B.ByteString + | TBase TBase + deriving (D.Data,D.Typeable,Eq,Ord,Show) + +-- | Rules are not just terms because we want to make it very syntactically +-- explicit about the head being a term (though that's not an expressivity +-- concern -- just use the parenthesized texpr case) so that there is no +-- risk of parsing ambiguity. +data Rule = Rule (Spanned Term) B.ByteString (Spanned Term) + deriving (Eq,Show) + +data NameWithArgs = PNWA B.ByteString [B.ByteString] + deriving (Eq,Show) + +-- | Pragmas that are recognized by the parser +data Pragma = PDispos SelfDispos B.ByteString [ArgDispos] + -- ^ Assert the evaluation disposition of a functor + + | PDisposDefl String + -- ^ Specify the default disposition handlers + -- for subsequent context. + -- + -- Note that the override defintions are + -- preserved across this operation! + -- (XXX is that what we want?) + + | PIAggr B.ByteString Int B.ByteString + -- ^ Assert the aggregator for a functor/arity. + + | PInst NameWithArgs + ParsedInst + -- ^ Declare an instantiation state: name and body + + | PMode NameWithArgs + ParsedModeInst + ParsedModeInst + -- ^ Declare a mode: name, input, and output + + | POperAdd Fixity Integer B.ByteString + -- ^ Add an operator + + | POperDel B.ByteString + -- ^ Remove an operator + + | PRuleIx RuleIx + -- ^ Set the rule index. + -- + -- XXX This is a bit of a hack to allow external drivers to + -- feed rules incrementally; those drivers should treat the + -- rule index as an opaque token rather than something to be + -- interpreted. Eventually this will go away, when our + -- REPLs have captive compilers. + + {- --- | PMisc Term + -- ^ Fall-back parser for :- lines. -} + deriving (Eq,Show) + +-- | The type of a parsed inst declaration +data ParsedInst = PIVar !B.ByteString + | PIInst !(InstF DFunct ParsedInst) + deriving (Eq,Show) + +type ParsedModeInst = Either NameWithArgs ParsedInst + +data PLine = PLRule (Spanned Rule) + | PLPragma Pragma + deriving (Show) + +------------------------------------------------------------------------}}} diff --git a/src/Dyna/XXX/TrifectaTest.hs b/src/Dyna/XXX/TrifectaTest.hs index 13eeb2b..395984e 100644 --- a/src/Dyna/XXX/TrifectaTest.hs +++ b/src/Dyna/XXX/TrifectaTest.hs @@ -1,5 +1,5 @@ module Dyna.XXX.TrifectaTest( - unsafeFS, unsafeFF, unsafeFF_, unsafeParse, checkParseFail, checkParseFail_ + unsafeFS, unsafeFF, unsafeParse, checkParseFail, ) where import Control.Applicative @@ -13,10 +13,10 @@ unsafeFS :: Result t -> t unsafeFS (Success a) = a unsafeFS (Failure td) = error $ "Errors: " ++ show td -unsafeFF :: String -> Result t -> Assertion +unsafeFF :: (String -> Assertion) -> Result t -> Assertion unsafeFF _ (Success _) = assertFailure "Unexpected success" -unsafeFF e (Failure td) = e @=? flip PPA.displayS "" - (filterSD $ PPA.renderCompact td) +unsafeFF a (Failure td) = a $ flip PPA.displayS "" + (filterSD $ PPA.renderCompact td) where -- strip out any ANSI BS filterSD PPA.SEmpty = PPA.SEmpty @@ -25,22 +25,13 @@ unsafeFF e (Failure td) = e @=? flip PPA.displayS "" filterSD (PPA.SLine i x) = PPA.SLine i (filterSD x) filterSD (PPA.SSGR _ x) = filterSD x -unsafeFF_ :: Result t -> Assertion -unsafeFF_ (Success _) = assertFailure "Unexpected success" -unsafeFF_ (Failure _) = return () - unsafeParse :: (Parser a) -> ByteString -> a unsafeParse p = unsafeFS . parseByteString (p <* eof) mempty checkParseFail :: (Show a) => Parser a -> ByteString - -> String + -> (String -> Assertion) -> Assertion checkParseFail p i e = unsafeFF e $ parseByteString (p <* eof) mempty i -checkParseFail_ :: Parser a - -> ByteString - -> Assertion -checkParseFail_ p i = unsafeFF_ $ parseByteString (p <* eof) mempty i -