From d5660275a38a58334372326a79a9ce0153fede43 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 17 Nov 2013 08:45:21 -0800 Subject: Parsing: Generalized type of registerHeader, using new typeclasses. New type classes HasReadeOptions, HasIdentifierList, HasHeaderMap. These allow certain common functions to be reused even in parsers that use custom state (instead of ParserState), such as the MediaWiki reader. Minor API bump. --- src/Text/Pandoc/Parsing.hs | 54 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 12 deletions(-) (limited to 'src/Text/Pandoc/Parsing.hs') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 701b2ef84..9687d7712 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -1,4 +1,5 @@ -{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE GeneralizedNewtypeDeriving, TypeSynonymInstances, + FlexibleInstances#-} {- Copyright (C) 2006-2010 John MacFarlane @@ -65,6 +66,9 @@ module Text.Pandoc.Parsing ( (>>~), guardEnabled, guardDisabled, ParserState (..), + HasReaderOptions (..), + HasHeaderMap (..), + HasIdentifierList (..), defaultParserState, HeaderType (..), ParserContext (..), @@ -826,6 +830,34 @@ instance HasMeta ParserState where deleteMeta field st = st{ stateMeta = deleteMeta field $ stateMeta st } +class Monad m => HasReaderOptions m where + askReaderOption :: (ReaderOptions -> b) -> m b + +class Monad m => HasHeaderMap m where + getHeaderMap :: m (M.Map Inlines String) + putHeaderMap :: M.Map Inlines String -> m () + modifyHeaderMap :: (M.Map Inlines String -> M.Map Inlines String) -> m () + -- default + modifyHeaderMap f = getHeaderMap >>= putHeaderMap . f + +class Monad m => HasIdentifierList m where + getIdentifierList :: m [String] + putIdentifierList :: [String] -> m () + modifyIdentifierList :: ([String] -> [String]) -> m () + -- default + modifyIdentifierList f = getIdentifierList >>= putIdentifierList . f + +instance HasReaderOptions (Parser s ParserState) where + askReaderOption = getOption + +instance HasHeaderMap (Parser s ParserState) where + getHeaderMap = fmap stateHeaders getState + putHeaderMap hm = updateState $ \st -> st{ stateHeaders = hm } + +instance HasIdentifierList (Parser s ParserState) where + getIdentifierList = fmap stateIdentifiers getState + putIdentifierList l = updateState $ \st -> st{ stateIdentifiers = l } + defaultParserState :: ParserState defaultParserState = ParserState { stateOptions = def, @@ -895,10 +927,11 @@ type SubstTable = M.Map Key Inlines -- and the auto_identifers extension is set, generate a new -- unique identifier, and update the list of identifiers -- in state. -registerHeader :: Attr -> Inlines -> Parser s ParserState Attr +registerHeader :: (HasReaderOptions m, HasHeaderMap m, HasIdentifierList m) + => Attr -> Inlines -> m Attr registerHeader (ident,classes,kvs) header' = do - ids <- stateIdentifiers `fmap` getState - exts <- getOption readerExtensions + ids <- getIdentifierList + exts <- askReaderOption readerExtensions let insert' = M.insertWith (\_new old -> old) if null ident && Ext_auto_identifiers `Set.member` exts then do @@ -906,16 +939,13 @@ registerHeader (ident,classes,kvs) header' = do let id'' = if Ext_ascii_identifiers `Set.member` exts then catMaybes $ map toAsciiChar id' else id' - updateState $ \st -> st{ - stateIdentifiers = if id' == id'' - then id' : ids - else id' : id'' : ids, - stateHeaders = insert' header' id' $ stateHeaders st } + putIdentifierList $ if id' == id'' + then id' : ids + else id' : id'' : ids + modifyHeaderMap $ insert' header' id' return (id'',classes,kvs) else do - unless (null ident) $ - updateState $ \st -> st{ - stateHeaders = insert' header' ident $ stateHeaders st } + unless (null ident) $ modifyHeaderMap $ insert' header' ident return (ident,classes,kvs) -- | Fail unless we're in "smart typography" mode. -- cgit v1.2.3 From def05d3504bfc90c31c1621438f7a51c95079ad2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 6 Dec 2013 16:43:59 -0800 Subject: HTML reader: Parse LaTeX math if appropriate options are set. * Moved inlineMath, displayMath from Markdown reader to Parsing. * Export them from Parsing. (API change.) * Generalize their types. --- src/Text/Pandoc/Parsing.hs | 35 +++++++++++++++++++++++++++++++++++ src/Text/Pandoc/Readers/HTML.hs | 9 ++++++++- src/Text/Pandoc/Readers/LaTeX.hs | 3 ++- src/Text/Pandoc/Readers/Markdown.hs | 33 --------------------------------- 4 files changed, 45 insertions(+), 35 deletions(-) (limited to 'src/Text/Pandoc/Parsing.hs') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 9687d7712..e15854333 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -48,6 +48,8 @@ module Text.Pandoc.Parsing ( (>>~), romanNumeral, emailAddress, uri, + mathInline, + mathDisplay, withHorizDisplacement, withRaw, escaped, @@ -455,6 +457,39 @@ uri = try $ do let uri' = scheme ++ ":" ++ fromEntities str' return (uri', escapeURI uri') +mathInlineWith :: String -> String -> Parser [Char] st String +mathInlineWith op cl = try $ do + string op + notFollowedBy space + words' <- many1Till (count 1 (noneOf "\n\\") + <|> (char '\\' >> anyChar >>= \c -> return ['\\',c]) + <|> count 1 newline <* notFollowedBy' blankline + *> return " ") + (try $ string cl) + notFollowedBy digit -- to prevent capture of $5 + return $ concat words' + +mathDisplayWith :: String -> String -> Parser [Char] st String +mathDisplayWith op cl = try $ do + string op + many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl) + +mathDisplay :: Parser [Char] ParserState String +mathDisplay = + (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$") + <|> (guardEnabled Ext_tex_math_single_backslash >> + mathDisplayWith "\\[" "\\]") + <|> (guardEnabled Ext_tex_math_double_backslash >> + mathDisplayWith "\\\\[" "\\\\]") + +mathInline :: Parser [Char] ParserState String +mathInline = + (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$") + <|> (guardEnabled Ext_tex_math_single_backslash >> + mathInlineWith "\\(" "\\)") + <|> (guardEnabled Ext_tex_math_double_backslash >> + mathInlineWith "\\\\(" "\\\\)") + -- | Applies a parser, returns tuple of its results and its horizontal -- displacement (the difference between the source column at the end -- and the source column at the beginning). Vertical displacement diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index d691c9878..e758f712f 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -467,7 +467,13 @@ pBlank = try $ do pTagContents :: Parser [Char] ParserState Inline pTagContents = - pStr <|> pSpace <|> smartPunctuation pTagContents <|> pSymbol <|> pBad + Math InlineMath `fmap` mathInline + <|> Math DisplayMath `fmap` mathDisplay + <|> pStr + <|> pSpace + <|> smartPunctuation pTagContents + <|> pSymbol + <|> pBad pStr :: Parser [Char] ParserState Inline pStr = do @@ -482,6 +488,7 @@ isSpecial '"' = True isSpecial '\'' = True isSpecial '.' = True isSpecial '-' = True +isSpecial '$' = True isSpecial '\8216' = True isSpecial '\8217' = True isSpecial '\8220' = True diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 75e29ebb9..509cb5d74 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -38,7 +38,8 @@ import Text.Pandoc.Definition import Text.Pandoc.Walk import Text.Pandoc.Shared import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding ((<|>), many, optional, space) +import Text.Pandoc.Parsing hiding ((<|>), many, optional, space, + mathDisplay, mathInline) import qualified Text.Pandoc.UTF8 as UTF8 import Data.Char ( chr, ord ) import Control.Monad diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 4cb75d86c..11168bc09 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1408,39 +1408,6 @@ math :: MarkdownParser (F Inlines) math = (return . B.displayMath <$> (mathDisplay >>= applyMacros')) <|> (return . B.math <$> (mathInline >>= applyMacros')) -mathDisplay :: MarkdownParser String -mathDisplay = - (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$") - <|> (guardEnabled Ext_tex_math_single_backslash >> - mathDisplayWith "\\[" "\\]") - <|> (guardEnabled Ext_tex_math_double_backslash >> - mathDisplayWith "\\\\[" "\\\\]") - -mathDisplayWith :: String -> String -> MarkdownParser String -mathDisplayWith op cl = try $ do - string op - many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl) - -mathInline :: MarkdownParser String -mathInline = - (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$") - <|> (guardEnabled Ext_tex_math_single_backslash >> - mathInlineWith "\\(" "\\)") - <|> (guardEnabled Ext_tex_math_double_backslash >> - mathInlineWith "\\\\(" "\\\\)") - -mathInlineWith :: String -> String -> MarkdownParser String -mathInlineWith op cl = try $ do - string op - notFollowedBy space - words' <- many1Till (count 1 (noneOf "\n\\") - <|> (char '\\' >> anyChar >>= \c -> return ['\\',c]) - <|> count 1 newline <* notFollowedBy' blankline - *> return " ") - (try $ string cl) - notFollowedBy digit -- to prevent capture of $5 - return $ concat words' - -- Parses material enclosed in *s, **s, _s, or __s. -- Designed to avoid backtracking. enclosure :: Char -- cgit v1.2.3 From 0c5e7cf8cb4fe6959d7e89880e8925afe6625414 Mon Sep 17 00:00:00 2001 From: Henry de Valence Date: Thu, 19 Dec 2013 20:19:24 -0500 Subject: HLint: use `elem` and `notElem` Replaces long conditional chains with calls to `elem` and `notElem`. --- pandoc.hs | 2 +- src/Text/Pandoc/Parsing.hs | 4 ++-- src/Text/Pandoc/Readers/LaTeX.hs | 5 ++--- src/Text/Pandoc/Readers/Markdown.hs | 8 ++++---- src/Text/Pandoc/Shared.hs | 12 ++++-------- src/Text/Pandoc/Writers/Markdown.hs | 11 ++++++----- src/Text/Pandoc/Writers/Org.hs | 2 +- 7 files changed, 20 insertions(+), 24 deletions(-) (limited to 'src/Text/Pandoc/Parsing.hs') diff --git a/pandoc.hs b/pandoc.hs index cada3347d..ccd3e57fb 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -656,7 +656,7 @@ options = (ReqArg (\arg opt -> do let b = takeBaseName arg - if (b == "pdflatex" || b == "lualatex" || b == "xelatex") + if b `elem` ["pdflatex", "lualatex", "xelatex"] then return opt { optLaTeXEngine = arg } else err 45 "latex-engine must be pdflatex, lualatex, or xelatex.") "PROGRAM") diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index e15854333..2f21e1253 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -271,7 +271,7 @@ spaceChar = satisfy $ \c -> c == ' ' || c == '\t' -- | Parses a nonspace, nonnewline character. nonspaceChar :: Parser [Char] st Char -nonspaceChar = satisfy $ \x -> x /= '\t' && x /= '\n' && x /= ' ' && x /= '\r' +nonspaceChar = satisfy $ flip notElem ['\t', '\n', ' ', '\r'] -- | Skips zero or more spaces or tabs. skipSpaces :: Parser [Char] st () @@ -1062,7 +1062,7 @@ doubleQuoteStart :: Parser [Char] ParserState () doubleQuoteStart = do failIfInQuoteContext InDoubleQuote try $ do charOrRef "\"\8220\147" - notFollowedBy (satisfy (\c -> c == ' ' || c == '\t' || c == '\n')) + notFollowedBy . satisfy $ flip elem [' ', '\t', '\n'] doubleQuoteEnd :: Parser [Char] st () doubleQuoteEnd = do diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 0020a8f26..51271edc5 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -874,9 +874,8 @@ verbatimEnv = do (_,r) <- withRaw $ do controlSeq "begin" name <- braced - guard $ name == "verbatim" || name == "Verbatim" || - name == "lstlisting" || name == "minted" || - name == "alltt" + guard $ name `elem` ["verbatim", "Verbatim", "lstlisting", + "minted", "alltt"] verbEnv name rest <- getInput return (r,rest) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index b2e88d47e..f483ab059 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -789,8 +789,8 @@ listItem start = try $ do orderedList :: MarkdownParser (F Blocks) orderedList = try $ do (start, style, delim) <- lookAhead anyOrderedListStart - unless ((style == DefaultStyle || style == Decimal || style == Example) && - (delim == DefaultDelim || delim == Period)) $ + unless (style `elem` [DefaultStyle, Decimal, Example] && + delim `elem` [DefaultDelim, Period]) $ guardEnabled Ext_fancy_lists when (style == Example) $ guardEnabled Ext_example_lists items <- fmap sequence $ many1 $ listItem @@ -925,8 +925,8 @@ strictHtmlBlock = htmlInBalanced (not . isInlineTag) rawVerbatimBlock :: MarkdownParser String rawVerbatimBlock = try $ do - (TagOpen tag _, open) <- htmlTag (tagOpen (\t -> - t == "pre" || t == "style" || t == "script") + (TagOpen tag _, open) <- htmlTag (tagOpen (flip elem + ["pre", "style", "script"]) (const True)) contents <- manyTill anyChar (htmlTag (~== TagClose tag)) return $ open ++ contents ++ renderTags [TagClose tag] diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index cd2f7e24d..3446f4343 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -564,14 +564,10 @@ makeMeta title authors date = -- | Render HTML tags. renderTags' :: [Tag String] -> String renderTags' = renderTagsOptions - renderOptions{ optMinimize = \x -> - let y = map toLower x - in y == "hr" || y == "br" || - y == "img" || y == "meta" || - y == "link" - , optRawTag = \x -> - let y = map toLower x - in y == "script" || y == "style" } + renderOptions{ optMinimize = matchTags ["hr", "br", "img", + "meta", "link"] + , optRawTag = matchTags ["script", "style"] } + where matchTags = \tags -> flip elem tags . map toLower -- -- File handling diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index c0b189b75..278e5cc9d 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -338,7 +338,7 @@ blockToMarkdown opts (RawBlock f str) else return $ if isEnabled Ext_markdown_attribute opts then text (addMarkdownAttribute str) <> text "\n" else text str <> text "\n" - | f == "latex" || f == "tex" || f == "markdown" = do + | f `elem` ["latex", "tex", "markdown"] = do st <- get if stPlain st then return empty @@ -628,10 +628,11 @@ getReference label (src, tit) = do Nothing -> do let label' = case find ((== label) . fst) (stRefs st) of Just _ -> -- label is used; generate numerical label - case find (\n -> not (any (== [Str (show n)]) - (map fst (stRefs st)))) [1..(10000 :: Integer)] of - Just x -> [Str (show x)] - Nothing -> error "no unique label" + case find (\n -> notElem [Str (show n)] + (map fst (stRefs st))) + [1..(10000 :: Integer)] of + Just x -> [Str (show x)] + Nothing -> error "no unique label" Nothing -> label modify (\s -> s{ stRefs = (label', (src,tit)) : stRefs st }) return label' diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 51083f52b..d318c5f6a 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -129,7 +129,7 @@ blockToOrg (Para inlines) = do blockToOrg (RawBlock "html" str) = return $ blankline $$ "#+BEGIN_HTML" $$ nest 2 (text str) $$ "#+END_HTML" $$ blankline -blockToOrg (RawBlock f str) | f == "org" || f == "latex" || f == "tex" = +blockToOrg (RawBlock f str) | f `elem` ["org", "latex", "tex"] = return $ text str blockToOrg (RawBlock _ _) = return empty blockToOrg HorizontalRule = return $ blankline $$ "--------------" $$ blankline -- cgit v1.2.3