From da3702357dc9a310b460c82411fe5b8c870416d5 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 25 Jul 2012 10:45:45 -0700 Subject: Put smart, strict in separate options field in state. This is the beginning of a larger transition that will make Options, not ParserState, the parameter of the read functions. (Options will also be used in writers, in place of WriterOptions.) Next step is to remove strict, replacing it with granular tests for different extensions. --- src/Tests/Readers/Markdown.hs | 4 +++- src/Text/Pandoc.hs | 3 +++ src/Text/Pandoc/Options.hs | 18 ++++++++++++++++-- src/Text/Pandoc/Parsing.hs | 15 +++++++-------- src/Text/Pandoc/Readers/HTML.hs | 5 +++-- src/Text/Pandoc/Readers/Markdown.hs | 21 +++++++++++---------- src/Text/Pandoc/Readers/Textile.hs | 6 +++++- src/pandoc.hs | 7 ++++--- 8 files changed, 52 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/Tests/Readers/Markdown.hs b/src/Tests/Readers/Markdown.hs index 5ad974adf..7fbeb6350 100644 --- a/src/Tests/Readers/Markdown.hs +++ b/src/Tests/Readers/Markdown.hs @@ -13,7 +13,9 @@ markdown :: String -> Pandoc markdown = readMarkdown defaultParserState{ stateStandalone = True } markdownSmart :: String -> Pandoc -markdownSmart = readMarkdown defaultParserState{ stateSmart = True } +markdownSmart = readMarkdown defaultParserState{ stateOptions = + let oldOpts = stateOptions defaultParserState in + oldOpts { optionSmart = True } } infix 4 =: (=:) :: ToString c diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index ca6d00de6..193755dff 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -60,6 +60,8 @@ module Text.Pandoc module Text.Pandoc.Definition -- * Generics , module Text.Pandoc.Generic + -- * Options + , module Text.Pandoc.Options -- * Lists of readers and writers , readers , writers @@ -151,6 +153,7 @@ import Text.Pandoc.Writers.AsciiDoc import Text.Pandoc.Templates import Text.Pandoc.Parsing import Text.Pandoc.Shared +import Text.Pandoc.Options import Data.ByteString.Lazy (ByteString) import Data.Version (showVersion) import Text.JSON.Generic diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index fcb46ee0b..d5bd11ba5 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -29,10 +29,13 @@ Data structures and functions for representing parser and writer options. -} module Text.Pandoc.Options ( Extension(..) + , Options(..) ) where import Data.Set (Set) +import qualified Data.Set as Set +import Data.Default --- | Individually selectable markdown syntax extensions. +-- | Individually selectable syntax extensions. data Extension = Footnotes | TeX_math | Delimited_code_blocks @@ -45,6 +48,17 @@ data Extension = Footnotes | Blank_before_blockquote | Blank_before_header | Significant_bullets - deriving (Show, Read, Enum, Eq, Bounded) + deriving (Show, Read, Enum, Eq, Ord, Bounded) +data Options = Options{ + optionExtensions :: Set Extension + , optionSmart :: Bool + , optionStrict :: Bool -- FOR TRANSITION ONLY + } deriving (Show, Read) +instance Default Options + where def = Options{ + optionExtensions = Set.fromList [minBound..maxBound] + , optionSmart = False + , optionStrict = False + } diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 145ad64c5..2d0fef7c3 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -129,6 +129,7 @@ module Text.Pandoc.Parsing ( (>>~), where import Text.Pandoc.Definition +import Text.Pandoc.Options import Text.Pandoc.Generic import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) import Text.Parsec @@ -391,7 +392,7 @@ nullBlock = anyChar >> return Null failIfStrict :: Parsec [a] ParserState () failIfStrict = do state <- getState - if stateStrict state then fail "strict mode" else return () + if optionStrict (stateOptions state) then fail "strict mode" else return () -- | Fail unless we're in literate haskell mode. failUnlessLHS :: Parsec [tok] ParserState () @@ -688,7 +689,8 @@ testStringWith parser str = UTF8.putStrLn $ show $ -- | Parsing options. data ParserState = ParserState - { stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX? + { stateOptions :: Options, -- ^ User options + stateParseRaw :: Bool, -- ^ Parse raw HTML and LaTeX? stateParserContext :: ParserContext, -- ^ Inside list? stateQuoteContext :: QuoteContext, -- ^ Inside quoted environment? stateMaxNestingLevel :: Int, -- ^ Max # of nested Strong/Emph @@ -701,8 +703,6 @@ data ParserState = ParserState stateTitle :: [Inline], -- ^ Title of document stateAuthors :: [[Inline]], -- ^ Authors of document stateDate :: [Inline], -- ^ Date of document - stateStrict :: Bool, -- ^ Use strict markdown syntax? - stateSmart :: Bool, -- ^ Use smart typography? stateOldDashes :: Bool, -- ^ Use pandoc <= 1.8.2.1 behavior -- in parsing dashes; -- is em-dash; -- before numeral is en-dash @@ -724,7 +724,8 @@ instance Default ParserState where defaultParserState :: ParserState defaultParserState = - ParserState { stateParseRaw = False, + ParserState { stateOptions = def, + stateParseRaw = False, stateParserContext = NullState, stateQuoteContext = NoQuote, stateMaxNestingLevel = 6, @@ -737,8 +738,6 @@ defaultParserState = stateTitle = [], stateAuthors = [], stateDate = [], - stateStrict = False, - stateSmart = False, stateOldDashes = False, stateLiterateHaskell = False, stateColumns = 80, @@ -796,7 +795,7 @@ lookupKeySrc table key = case M.lookup key table of -- | Fail unless we're in "smart typography" mode. failUnlessSmart :: Parsec [tok] ParserState () -failUnlessSmart = getState >>= guard . stateSmart +failUnlessSmart = getState >>= guard . optionSmart . stateOptions smartPunctuation :: Parsec [Char] ParserState Inline -> Parsec [Char] ParserState Inline diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 87ce6277d..9510f3a30 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -41,6 +41,7 @@ import Text.HTML.TagSoup.Match import Text.Pandoc.Definition import Text.Pandoc.Builder (text, toList) import Text.Pandoc.Shared +import Text.Pandoc.Options import Text.Pandoc.Parsing import Data.Maybe ( fromMaybe, isJust ) import Data.List ( intercalate ) @@ -125,7 +126,7 @@ pOrderedList :: TagParser [Block] pOrderedList = try $ do TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" []) st <- getState - let (start, style) = if stateStrict st + let (start, style) = if optionStrict (stateOptions st) then (1, DefaultStyle) else (sta', sty') where sta = fromMaybe "1" $ @@ -280,7 +281,7 @@ pCodeBlock = try $ do let attribsClasses = words $ fromMaybe "" $ lookup "class" attr let attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr st <- getState - let attribs = if stateStrict st + let attribs = if optionStrict (stateOptions st) then ("",[],[]) else (attribsId, attribsClasses, attribsKV) return [CodeBlock attribs result] diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index fc3afeac9..d668bb2ab 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -37,6 +37,7 @@ import Data.Char ( isAlphaNum ) import Data.Maybe import Text.Pandoc.Definition import Text.Pandoc.Generic +import Text.Pandoc.Options import Text.Pandoc.Shared import Text.Pandoc.Parsing import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) @@ -181,7 +182,7 @@ parseMarkdown = do -- docMinusKeys is the raw document with blanks where the keys/notes were... st <- getState let firstPassParser = referenceKey - <|> (if stateStrict st then mzero else noteBlock) + <|> (if optionStrict (stateOptions st) then mzero else noteBlock) <|> liftM snd (withRaw codeBlockDelimited) <|> lineClump docMinusKeys <- liftM concat $ manyTill firstPassParser eof @@ -292,7 +293,7 @@ parseBlocks = manyTill block eof block :: Parser [Char] ParserState Block block = do st <- getState - choice (if stateStrict st + choice (if optionStrict (stateOptions st) then [ header , codeBlockIndented , blockQuote @@ -533,7 +534,7 @@ anyOrderedListStart = try $ do skipNonindentSpaces notFollowedBy $ string "p." >> spaceChar >> digit -- page number state <- getState - if stateStrict state + if optionStrict (stateOptions state) then do many1 digit char '.' spaceChar @@ -694,7 +695,7 @@ para = try $ do option (Plain result) $ try $ do newline blanklines <|> - (getState >>= guard . stateStrict >> + (getState >>= guard . optionStrict . stateOptions >> lookAhead (blockQuote <|> header) >> return "") return $ Para result @@ -1008,7 +1009,7 @@ escapedChar' :: Parser [Char] ParserState Char escapedChar' = try $ do char '\\' state <- getState - if stateStrict state + if optionStrict (stateOptions state) then oneOf "\\`*_{}[]()>#+-.!~" else satisfy (not . isAlphaNum) @@ -1023,7 +1024,7 @@ escapedChar = do ltSign :: Parser [Char] ParserState Inline ltSign = do st <- getState - if stateStrict st + if optionStrict (stateOptions st) then char '<' else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html return $ Str ['<'] @@ -1159,7 +1160,7 @@ nonEndline = satisfy (/='\n') str :: Parser [Char] ParserState Inline str = do - smart <- stateSmart `fmap` getState + smart <- (optionSmart . stateOptions) `fmap` getState a <- alphaNum as <- many $ alphaNum <|> (try $ char '_' >>~ lookAhead alphaNum) @@ -1200,7 +1201,7 @@ endline = try $ do newline notFollowedBy blankline st <- getState - when (stateStrict st) $ do + when (optionStrict (stateOptions st)) $ do notFollowedBy emailBlockQuoteStart notFollowedBy (char '#') -- atx header -- parse potential list-starts differently if in a list: @@ -1282,7 +1283,7 @@ autoLink = try $ do (orig, src) <- uri <|> emailAddress char '>' st <- getState - return $ if stateStrict st + return $ if optionStrict (stateOptions st) then Link [Str orig] (src, "") else Link [Code ("",["url"],[]) orig] (src, "") @@ -1343,7 +1344,7 @@ inBrackets parser = do rawHtmlInline :: Parser [Char] ParserState Inline rawHtmlInline = do st <- getState - (_,result) <- if stateStrict st + (_,result) <- if optionStrict (stateOptions st) then htmlTag (not . isTextTag) else htmlTag isInlineTag return $ RawInline "html" result diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 71ba26c8c..5373672b0 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -57,6 +57,7 @@ module Text.Pandoc.Readers.Textile ( readTextile) where import Text.Pandoc.Definition import Text.Pandoc.Shared +import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML ( htmlTag, isInlineTag, isBlockTag ) import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock ) @@ -77,7 +78,10 @@ readTextile state s = parseTextile :: Parser [Char] ParserState Pandoc parseTextile = do -- textile allows raw HTML and does smart punctuation by default - updateState (\state -> state { stateParseRaw = True, stateSmart = True }) + oldOpts <- stateOptions `fmap` getState + updateState $ \state -> state { stateParseRaw = True + , stateOptions = oldOpts{ optionSmart = True } + } many blankline startPos <- getPosition -- go through once just to get list of reference keys and notes diff --git a/src/pandoc.hs b/src/pandoc.hs index 0fd9f37a4..c879a874c 100644 --- a/src/pandoc.hs +++ b/src/pandoc.hs @@ -942,11 +942,12 @@ main = do lhsExtension sources, stateStandalone = standalone', stateCitations = map CSL.refId refs, - stateSmart = smart || (texLigatures && - (laTeXOutput || writerName' == "context")), stateOldDashes = oldDashes, stateColumns = columns, - stateStrict = strict, + stateOptions = def{ optionStrict = strict + , optionSmart = smart || + (texLigatures && + (laTeXOutput || writerName' == "context")) }, stateIndentedCodeClasses = codeBlockClasses, stateApplyMacros = not laTeXOutput } -- cgit v1.2.3