From 814ac51d3228eeb3bbcbf78a8a88a43cd11d23dd Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 19 Jun 2017 22:04:01 +0200 Subject: Separated tracing from logging. Formerly tracing was just log messages with a DEBUG log level. We now make these things independent. Tracing can be turned on or off in PandocMonad using `setTrace`; it is independent of logging. * Removed `DEBUG` from `Verbosity`. * Removed `ParserTrace` from `LogMessage`. * Added `trace`, `setTrace` to `PandocMonad`. --- src/Text/Pandoc/Readers/MediaWiki.hs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/Text/Pandoc/Readers/MediaWiki.hs') diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index a3ff60c14..e371ff152 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -52,7 +52,7 @@ import qualified Data.Set as Set import Text.HTML.TagSoup import Text.Pandoc.Builder (Blocks, Inlines, trimInlines) import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Class (PandocMonad(..)) import Text.Pandoc.Definition import Text.Pandoc.Logging import Text.Pandoc.Options @@ -205,7 +205,6 @@ parseMediaWiki = do block :: PandocMonad m => MWParser m Blocks block = do - pos <- getPosition res <- mempty <$ skipMany1 blankline <|> table <|> header @@ -218,7 +217,7 @@ block = do <|> blockTag <|> (B.rawBlock "mediawiki" <$> template) <|> para - report $ ParsingTrace (take 60 $ show $ B.toList res) pos + trace (take 60 $ show $ B.toList res) return res para :: PandocMonad m => MWParser m Blocks -- cgit v1.2.3 From 2363e6a15bdde1c206d65461bd2e21f773dbc808 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 20 Jun 2017 21:52:13 +0200 Subject: Move CR filtering from tabFilter to the readers. The readers previously assumed that CRs had been filtered from the input. Now we strip the CRs in the readers themselves, before parsing. (The point of this is just to simplify the parsers.) Shared now exports a new function `crFilter`. [API change] And `tabFilter` no longer filters CRs. --- src/Text/Pandoc.hs | 4 - src/Text/Pandoc/App.hs | 4 +- src/Text/Pandoc/Readers/DocBook.hs | 5 +- src/Text/Pandoc/Readers/HTML.hs | 4 +- src/Text/Pandoc/Readers/Haddock.hs | 4 +- src/Text/Pandoc/Readers/LaTeX.hs | 3 +- src/Text/Pandoc/Readers/Markdown.hs | 2 +- src/Text/Pandoc/Readers/MediaWiki.hs | 5 +- src/Text/Pandoc/Readers/Muse.hs | 3 +- src/Text/Pandoc/Readers/OPML.hs | 4 +- src/Text/Pandoc/Readers/Org.hs | 3 +- src/Text/Pandoc/Readers/RST.hs | 2 +- src/Text/Pandoc/Readers/TWiki.hs | 3 +- src/Text/Pandoc/Readers/Textile.hs | 4 +- src/Text/Pandoc/Readers/Txt2Tags.hs | 6 +- src/Text/Pandoc/Readers/Vimwiki.hs | 151 ++++++++++++++++++----------------- src/Text/Pandoc/Shared.hs | 12 ++- 17 files changed, 115 insertions(+), 104 deletions(-) (limited to 'src/Text/Pandoc/Readers/MediaWiki.hs') diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index b8dba860a..9fa5f098d 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -52,10 +52,6 @@ inline links: > main = do > T.getContents >>= mdToRST >>= T.putStrLn -Note: all of the readers assume that the input text has @'\n'@ -line endings. So if you get your input text from a web form, -you should remove @'\r'@ characters using @filter (/='\r')@. - -} module Text.Pandoc diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 12429b51d..7e9cfdd95 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -381,8 +381,8 @@ convertWithOpts opts = do | otherwise -> [] let convertTabs = tabFilter (if optPreserveTabs opts || readerName == "t2t" - then 0 - else optTabStop opts) + then 0 + else optTabStop opts) readSources :: [FilePath] -> PandocIO Text readSources srcs = convertTabs . T.intercalate (T.pack "\n") <$> diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 6108aae7f..c1e4d742c 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -1,6 +1,6 @@ module Text.Pandoc.Readers.DocBook ( readDocBook ) where import Data.Char (toUpper) -import Text.Pandoc.Shared (safeRead) +import Text.Pandoc.Shared (safeRead, crFilter) import Text.Pandoc.Options import Text.Pandoc.Definition import Text.Pandoc.Builder @@ -526,7 +526,8 @@ instance Default DBState where readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readDocBook _ inp = do - let tree = normalizeTree . parseXML . handleInstructions $ T.unpack inp + let tree = normalizeTree . parseXML . handleInstructions + $ T.unpack $ crFilter inp (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock $ tree return $ Pandoc (dbMeta st') (toList . mconcat $ bs) diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index e203298b8..301afa207 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -45,7 +45,7 @@ import Text.Pandoc.Definition import qualified Text.Pandoc.Builder as B import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..)) import Text.Pandoc.Shared ( extractSpaces, addMetaField - , escapeURI, safeRead ) + , escapeURI, safeRead, crFilter ) import Text.Pandoc.Options (ReaderOptions(readerExtensions), extensionEnabled, Extension (Ext_epub_html_exts, Ext_raw_html, Ext_native_divs, Ext_native_spans)) @@ -82,7 +82,7 @@ readHtml :: PandocMonad m readHtml opts inp = do let tags = stripPrefixes . canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } - inp + (crFilter inp) parseDoc = do blocks <- (fixPlains False) . mconcat <$> manyTill block eof meta <- stateMeta . parserState <$> getState diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index b22b71b96..a09ed8be9 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -27,7 +27,7 @@ import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Options -import Text.Pandoc.Shared (splitBy, trim) +import Text.Pandoc.Shared (splitBy, trim, crFilter) -- | Parse Haddock markup and return a 'Pandoc' document. @@ -35,7 +35,7 @@ readHaddock :: PandocMonad m => ReaderOptions -> Text -> m Pandoc -readHaddock opts s = case readHaddockEither opts (unpack s) of +readHaddock opts s = case readHaddockEither opts (unpack (crFilter s)) of Right result -> return result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 1ac872933..090dc5fdb 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -63,7 +63,8 @@ readLaTeX :: PandocMonad m -> Text -- ^ String to parse (assumes @'\n'@ line endings) -> m Pandoc readLaTeX opts ltx = do - parsed <- readWithM parseLaTeX def{ stateOptions = opts } (unpack ltx) + parsed <- readWithM parseLaTeX def{ stateOptions = opts } + (unpack (crFilter ltx)) case parsed of Right result -> return result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index edb356b39..96885c9b1 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -74,7 +74,7 @@ readMarkdown :: PandocMonad m -> m Pandoc readMarkdown opts s = do parsed <- (readWithM parseMarkdown) def{ stateOptions = opts } - (T.unpack s ++ "\n\n") + (T.unpack (crFilter s) ++ "\n\n") case parsed of Right result -> return result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index e371ff152..a7f073d50 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -58,7 +58,8 @@ import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (nested) import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag) -import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines, trim) +import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines, trim, + crFilter) import Text.Pandoc.Walk (walk) import Text.Pandoc.XML (fromEntities) @@ -77,7 +78,7 @@ readMediaWiki opts s = do , mwLogMessages = [] , mwInTT = False } - (unpack s ++ "\n") + (unpack (crFilter s) ++ "\n") case parsed of Right result -> return result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index 84121cabe..7eee064a7 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -57,6 +57,7 @@ import Text.Pandoc.Class (PandocMonad(..)) import Text.Pandoc.Definition import Text.Pandoc.Logging import Text.Pandoc.Options +import Text.Pandoc.Shared (crFilter) import Text.Pandoc.Parsing hiding (macro, nested) import Text.Pandoc.Readers.HTML (htmlTag) import Text.Pandoc.XML (fromEntities) @@ -68,7 +69,7 @@ readMuse :: PandocMonad m -> Text -> m Pandoc readMuse opts s = do - res <- readWithM parseMuse def{ stateOptions = opts } (unpack s) + res <- readWithM parseMuse def{ stateOptions = opts } (unpack (crFilter s)) case res of Left e -> throwError e Right d -> return d diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index e9f876525..c25ace800 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -9,6 +9,7 @@ import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc.Builder import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Options +import Text.Pandoc.Shared (crFilter) import Text.Pandoc.Readers.HTML (readHtml) import Text.Pandoc.Readers.Markdown (readMarkdown) import Text.XML.Light @@ -32,7 +33,8 @@ instance Default OPMLState where readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readOPML _ inp = do (bs, st') <- flip runStateT def - (mapM parseBlock $ normalizeTree $ parseXML (unpack inp)) + (mapM parseBlock $ normalizeTree $ + parseXML (unpack (crFilter inp))) return $ setTitle (opmlDocTitle st') $ setAuthors (opmlDocAuthors st') $ diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 5e0d67d10..eaccc251c 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -36,6 +36,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Options import Text.Pandoc.Parsing (reportLogMessages) +import Text.Pandoc.Shared (crFilter) import Control.Monad.Except (throwError) import Control.Monad.Reader (runReaderT) @@ -51,7 +52,7 @@ readOrg :: PandocMonad m readOrg opts s = do parsed <- flip runReaderT def $ readWithM parseOrg (optionsToParserState opts) - (T.unpack s ++ "\n\n") + (T.unpack (crFilter s) ++ "\n\n") case parsed of Right result -> return result Left _ -> throwError $ PandocParseError "problem parsing org" diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index fb5f6f2d4..d13f697b7 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -68,7 +68,7 @@ readRST :: PandocMonad m -> m Pandoc readRST opts s = do parsed <- (readWithM parseRST) def{ stateOptions = opts } - (T.unpack s ++ "\n\n") + (T.unpack (crFilter s) ++ "\n\n") case parsed of Right result -> return result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index 91ee8d1f1..210d3e5aa 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -48,6 +48,7 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed, macro, nested) import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag) import Text.Pandoc.XML (fromEntities) +import Text.Pandoc.Shared (crFilter) import Data.Text (Text) import qualified Data.Text as T @@ -58,7 +59,7 @@ readTWiki :: PandocMonad m -> m Pandoc readTWiki opts s = do res <- readWithM parseTWiki def{ stateOptions = opts } - (T.unpack s ++ "\n\n") + (T.unpack (crFilter s) ++ "\n\n") case res of Left e -> throwError e Right d -> return d diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 96b51feef..a80d75340 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -68,7 +68,7 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag) import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline) -import Text.Pandoc.Shared (trim) +import Text.Pandoc.Shared (trim, crFilter) import Data.Text (Text) import qualified Data.Text as T @@ -79,7 +79,7 @@ readTextile :: PandocMonad m -> m Pandoc readTextile opts s = do parsed <- readWithM parseTextile def{ stateOptions = opts } - (T.unpack s ++ "\n\n") + (T.unpack (crFilter s) ++ "\n\n") case parsed of Right result -> return result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index 260bb7fff..5708358f6 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -41,7 +41,7 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (macro, space, spaces, uri) -import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI) +import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI, crFilter) import Control.Monad (guard, void, when) import Control.Monad.Reader (Reader, asks, runReader) import Data.Default @@ -95,7 +95,9 @@ readTxt2Tags :: PandocMonad m -> m Pandoc readTxt2Tags opts s = do meta <- getT2TMeta - let parsed = flip runReader meta $ readWithM parseT2T (def {stateOptions = opts}) (T.unpack s ++ "\n\n") + let parsed = flip runReader meta $ + readWithM parseT2T (def {stateOptions = opts}) $ + T.unpack (crFilter s) ++ "\n\n" case parsed of Right result -> return $ result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs index 0cfbec34d..98f04eda9 100644 --- a/src/Text/Pandoc/Readers/Vimwiki.hs +++ b/src/Text/Pandoc/Readers/Vimwiki.hs @@ -33,10 +33,10 @@ Conversion of vimwiki text to 'Pandoc' document. * [X] header * [X] hrule * [X] comment - * [X] blockquote - * [X] preformatted - * [X] displaymath - * [X] bulletlist / orderedlist + * [X] blockquote + * [X] preformatted + * [X] displaymath + * [X] bulletlist / orderedlist * [X] orderedlist with 1., i., a) etc identification. * [X] todo lists -- not list builder with attributes? using span. * [X] table @@ -57,8 +57,8 @@ Conversion of vimwiki text to 'Pandoc' document. * [X] sub- and super-scripts * misc: * [X] `TODO:` mark - * [X] metadata placeholders: %title and %date - * [O] control placeholders: %template and %nohtml -- %template added to + * [X] metadata placeholders: %title and %date + * [O] control placeholders: %template and %nohtml -- %template added to meta, %nohtml ignored --} @@ -66,29 +66,29 @@ module Text.Pandoc.Readers.Vimwiki ( readVimwiki ) where import Control.Monad.Except (throwError) import Control.Monad (guard) -import Data.Default +import Data.Default import Data.Maybe import Data.Monoid ((<>)) import Data.List (isInfixOf, isPrefixOf) import Data.Text (Text, unpack) import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, fromList, toList) -import qualified Text.Pandoc.Builder - as B (headerWith, str, space, strong, emph, strikeout, code, link, image, - spanWith, para, horizontalRule, blockQuote, bulletList, plain, - orderedList, simpleTable, softbreak, codeBlockWith, imageWith, divWith, +import qualified Text.Pandoc.Builder + as B (headerWith, str, space, strong, emph, strikeout, code, link, image, + spanWith, para, horizontalRule, blockQuote, bulletList, plain, + orderedList, simpleTable, softbreak, codeBlockWith, imageWith, divWith, setMeta, definitionList, superscript, subscript) import Text.Pandoc.Class (PandocMonad(..)) -import Text.Pandoc.Definition (Pandoc(..), Inline(Space), - Block(BulletList, OrderedList), Attr, nullMeta, Meta, ListNumberStyle(..), +import Text.Pandoc.Definition (Pandoc(..), Inline(Space), + Block(BulletList, OrderedList), Attr, nullMeta, Meta, ListNumberStyle(..), ListNumberDelim(..)) import Text.Pandoc.Options (ReaderOptions) -import Text.Pandoc.Parsing (readWithM, ParserT, stateOptions, ParserState, +import Text.Pandoc.Parsing (readWithM, ParserT, stateOptions, ParserState, stateMeta', blanklines, registerHeader, spaceChar, emailAddress, uri, F, runF, orderedListMarker, many1Till) -import Text.Pandoc.Shared (splitBy, stripFirstAndLast, stringify) -import Text.Parsec.Char (spaces, char, anyChar, newline, string, noneOf, +import Text.Pandoc.Shared (splitBy, stripFirstAndLast, stringify, crFilter) +import Text.Parsec.Char (spaces, char, anyChar, newline, string, noneOf, alphaNum) -import Text.Parsec.Combinator (eof, choice, many1, manyTill, count, skipMany1, +import Text.Parsec.Combinator (eof, choice, many1, manyTill, count, skipMany1, notFollowedBy, option) import Text.Parsec.Prim (many, try, updateState, getState) import Text.Parsec.Char (oneOf, space) @@ -97,7 +97,8 @@ import Text.Parsec.Prim ((<|>)) readVimwiki :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readVimwiki opts s = do - res <- readWithM parseVimwiki def{ stateOptions = opts } (unpack s) + res <- readWithM parseVimwiki def{ stateOptions = opts } + (unpack (crFilter s)) case res of Left e -> throwError e Right result -> return result @@ -110,7 +111,7 @@ type VwParser = ParserT [Char] ParserState specialChars :: [Char] specialChars = "=*-#[]_~{}`$|:%^," -spaceChars :: [Char] +spaceChars :: [Char] spaceChars = " \t\n" -- main parser @@ -134,7 +135,7 @@ block = do , mempty <$ comment , mixedList , preformatted - , displayMath + , displayMath , table , mempty <$ placeholder , blockQuote @@ -149,14 +150,14 @@ blockML = choice [preformatted, displayMath, table] header :: PandocMonad m => VwParser m Blocks header = try $ do - sp <- many spaceChar + sp <- many spaceChar eqs <- many1 (char '=') spaceChar let lev = length eqs guard $ lev <= 6 - contents <- trimInlines . mconcat <$> manyTill inline (try $ spaceChar - >> (string eqs) >> many spaceChar >> newline) - attr <- registerHeader (makeId contents, + contents <- trimInlines . mconcat <$> manyTill inline (try $ spaceChar + >> (string eqs) >> many spaceChar >> newline) + attr <- registerHeader (makeId contents, (if sp == "" then [] else ["justcenter"]), []) contents return $ B.headerWith attr lev contents @@ -184,7 +185,7 @@ blockQuote = try $ do else return $ B.blockQuote $ B.plain contents definitionList :: PandocMonad m => VwParser m Blocks -definitionList = try $ +definitionList = try $ B.definitionList <$> (many1 (dlItemWithDT <|> dlItemWithoutDT)) dlItemWithDT :: PandocMonad m => VwParser m (Inlines, [Blocks]) @@ -199,15 +200,15 @@ dlItemWithoutDT = do return $ (mempty, dds) definitionDef :: PandocMonad m => VwParser m Blocks -definitionDef = try $ - (notFollowedBy definitionTerm) >> many spaceChar +definitionDef = try $ + (notFollowedBy definitionTerm) >> many spaceChar >> (definitionDef1 <|> definitionDef2) definitionDef1 :: PandocMonad m => VwParser m Blocks definitionDef1 = try $ mempty <$ defMarkerE definitionDef2 :: PandocMonad m => VwParser m Blocks -definitionDef2 = try $ B.plain <$> +definitionDef2 = try $ B.plain <$> (defMarkerM >> (trimInlines . mconcat <$> many inline') <* newline) @@ -218,11 +219,11 @@ definitionTerm = try $ do return x definitionTerm1 :: PandocMonad m => VwParser m Inlines -definitionTerm1 = try $ +definitionTerm1 = try $ trimInlines . mconcat <$> manyTill inline' (try $ defMarkerE) definitionTerm2 :: PandocMonad m => VwParser m Inlines -definitionTerm2 = try $ trimInlines . mconcat <$> manyTill inline' +definitionTerm2 = try $ trimInlines . mconcat <$> manyTill inline' (try $ lookAhead $ (defMarkerM >> notFollowedBy hasDefMarkerM)) defMarkerM :: PandocMonad m => VwParser m Char @@ -236,8 +237,8 @@ hasDefMarkerM = manyTill (noneOf "\n") (try defMarkerM) preformatted :: PandocMonad m => VwParser m Blocks preformatted = try $ do - many spaceChar >> string "{{{" - attrText <- many (noneOf "\n") + many spaceChar >> string "{{{" + attrText <- many (noneOf "\n") lookAhead newline contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}}" >> many spaceChar >> newline)) @@ -246,14 +247,14 @@ preformatted = try $ do else return $ B.codeBlockWith (makeAttr attrText) contents makeAttr :: String -> Attr -makeAttr s = +makeAttr s = let xs = splitBy (`elem` " \t") s in ("", [], catMaybes $ map nameValue xs) nameValue :: String -> Maybe (String, String) -nameValue s = +nameValue s = let t = splitBy (== '=') s in - if length t /= 2 + if length t /= 2 then Nothing else let (a, b) = (head t, last t) in if ((length b) < 2) || ((head b, last b) /= ('"', '"')) @@ -269,7 +270,7 @@ displayMath = try $ do >> many spaceChar >> newline)) let contentsWithTags | mathTag == "" = "\\[" ++ contents ++ "\n\\]" - | otherwise = "\\begin{" ++ mathTag ++ "}" ++ contents + | otherwise = "\\begin{" ++ mathTag ++ "}" ++ contents ++ "\n\\end{" ++ mathTag ++ "}" return $ B.plain $ B.str contentsWithTags @@ -286,7 +287,7 @@ mixedList' prevInd = do else do listStart curLine <- listItemContent - let listBuilder = + let listBuilder = if builder == "ul" then B.bulletList else B.orderedList (subList, lowInd) <- (mixedList' curInd) if lowInd >= curInd @@ -297,7 +298,7 @@ mixedList' prevInd = do then return ([listBuilder curList], endInd) else return (curList, endInd) else do - let (curList, endInd) = ((combineList curLine subList), + let (curList, endInd) = ((combineList curLine subList), lowInd) if curInd > prevInd then return ([listBuilder curList], endInd) @@ -328,13 +329,13 @@ blocksThenInline = try $ do return $ mconcat $ y ++ [x] listTodoMarker :: PandocMonad m => VwParser m Inlines -listTodoMarker = try $ do - x <- between (many spaceChar >> char '[') (char ']' >> spaceChar) +listTodoMarker = try $ do + x <- between (many spaceChar >> char '[') (char ']' >> spaceChar) (oneOf " .oOX") return $ makeListMarkerSpan x makeListMarkerSpan :: Char -> Inlines -makeListMarkerSpan x = +makeListMarkerSpan x = let cl = case x of ' ' -> "done0" '.' -> "done1" @@ -347,9 +348,9 @@ makeListMarkerSpan x = combineList :: Blocks -> [Blocks] -> [Blocks] combineList x [y] = case toList y of - [BulletList z] -> [fromList $ (toList x) + [BulletList z] -> [fromList $ (toList x) ++ [BulletList z]] - [OrderedList attr z] -> [fromList $ (toList x) + [OrderedList attr z] -> [fromList $ (toList x) ++ [OrderedList attr z]] _ -> x:[y] combineList x xs = x:xs @@ -365,9 +366,9 @@ bulletListMarkers :: PandocMonad m => VwParser m String bulletListMarkers = "ul" <$ (char '*' <|> char '-') orderedListMarkers :: PandocMonad m => VwParser m String -orderedListMarkers = - ("ol" <$ (choice $ (orderedListMarker Decimal Period):(($OneParen) - <$> orderedListMarker +orderedListMarkers = + ("ol" <$ (choice $ (orderedListMarker Decimal Period):(($OneParen) + <$> orderedListMarker <$> [Decimal, LowerRoman, UpperRoman, LowerAlpha, UpperAlpha]))) <|> ("ol" <$ char '#') @@ -397,14 +398,14 @@ table2 = try $ do tableHeaderSeparator :: PandocMonad m => VwParser m () tableHeaderSeparator = try $ do - many spaceChar >> char '|' >> many1 ((many1 $ char '-') >> char '|') + many spaceChar >> char '|' >> many1 ((many1 $ char '-') >> char '|') >> many spaceChar >> newline return () - + tableRow :: PandocMonad m => VwParser m [Blocks] tableRow = try $ do many spaceChar >> char '|' - s <- lookAhead $ manyTill anyChar (try (char '|' >> many spaceChar + s <- lookAhead $ manyTill anyChar (try (char '|' >> many spaceChar >> newline)) guard $ not $ "||" `isInfixOf` ("|" ++ s ++ "|") tr <- many tableCell @@ -416,25 +417,25 @@ tableCell = try $ B.plain <$> trimInlines . mconcat <$> (manyTill inline' (char '|')) placeholder :: PandocMonad m => VwParser m () -placeholder = try $ +placeholder = try $ (choice (ph <$> ["title", "date"])) <|> noHtmlPh <|> templatePh ph :: PandocMonad m => String -> VwParser m () ph s = try $ do many spaceChar >> (string $ '%':s) >> spaceChar - contents <- (trimInlines . mconcat <$> (manyTill inline (lookAhead newline))) + contents <- (trimInlines . mconcat <$> (manyTill inline (lookAhead newline))) --use lookAhead because of placeholder in the whitespace parser let meta' = return $ B.setMeta s contents nullMeta :: F Meta updateState $ \st -> st { stateMeta' = stateMeta' st <> meta' } noHtmlPh :: PandocMonad m => VwParser m () noHtmlPh = try $ - () <$ (many spaceChar >> string "%nohtml" >> many spaceChar + () <$ (many spaceChar >> string "%nohtml" >> many spaceChar >> (lookAhead newline)) templatePh :: PandocMonad m => VwParser m () templatePh = try $ - () <$ (many spaceChar >> string "%template" >> (many $ noneOf "\n") + () <$ (many spaceChar >> string "%template" >> (many $ noneOf "\n") >> (lookAhead newline)) -- inline parser @@ -475,7 +476,7 @@ str :: PandocMonad m => VwParser m Inlines str = B.str <$> (many1 $ noneOf $ spaceChars ++ specialChars) whitespace :: PandocMonad m => VwParser m () -> VwParser m Inlines -whitespace endline = B.space <$ (skipMany1 spaceChar <|> +whitespace endline = B.space <$ (skipMany1 spaceChar <|> (try (newline >> (comment <|> placeholder)))) <|> B.softbreak <$ endline @@ -493,24 +494,24 @@ bareURL = try $ do strong :: PandocMonad m => VwParser m Inlines strong = try $ do s <- lookAhead $ between (char '*') (char '*') (many1 $ noneOf "*") - guard $ (not $ (head s) `elem` spaceChars) + guard $ (not $ (head s) `elem` spaceChars) && (not $ (last s) `elem` spaceChars) char '*' - contents <- mconcat <$> (manyTill inline' $ char '*' + contents <- mconcat <$> (manyTill inline' $ char '*' >> notFollowedBy alphaNum) - return $ (B.spanWith ((makeId contents), [], []) mempty) + return $ (B.spanWith ((makeId contents), [], []) mempty) <> (B.strong contents) -makeId :: Inlines -> String +makeId :: Inlines -> String makeId i = concat (stringify <$> (toList i)) emph :: PandocMonad m => VwParser m Inlines emph = try $ do s <- lookAhead $ between (char '_') (char '_') (many1 $ noneOf "_") - guard $ (not $ (head s) `elem` spaceChars) + guard $ (not $ (head s) `elem` spaceChars) && (not $ (last s) `elem` spaceChars) char '_' - contents <- mconcat <$> (manyTill inline' $ char '_' + contents <- mconcat <$> (manyTill inline' $ char '_' >> notFollowedBy alphaNum) return $ B.emph contents @@ -532,32 +533,32 @@ superscript = try $ subscript :: PandocMonad m => VwParser m Inlines subscript = try $ - B.subscript <$> mconcat <$> (string ",," + B.subscript <$> mconcat <$> (string ",," >> many1Till inline' (try $ string ",,")) link :: PandocMonad m => VwParser m Inlines -link = try $ do +link = try $ do string "[[" contents <- lookAhead $ manyTill anyChar (string "]]") - case '|' `elem` contents of + case '|' `elem` contents of False -> do - manyTill anyChar (string "]]") + manyTill anyChar (string "]]") -- not using try here because [[hell]o]] is not rendered as a link in vimwiki return $ B.link (procLink contents) "" (B.str contents) - True -> do + True -> do url <- manyTill anyChar $ char '|' lab <- mconcat <$> (manyTill inline $ string "]]") return $ B.link (procLink url) "" lab image :: PandocMonad m => VwParser m Inlines -image = try $ do +image = try $ do string "{{" contentText <- lookAhead $ manyTill (noneOf "\n") (try $ string "}}") images $ length $ filter (== '|') contentText images :: PandocMonad m => Int -> VwParser m Inlines images k - | k == 0 = do + | k == 0 = do imgurl <- manyTill anyChar (try $ string "}}") return $ B.image (procImgurl imgurl) "" (B.str "") | k == 1 = do @@ -578,15 +579,15 @@ images k procLink' :: String -> String procLink' s - | ((take 6 s) == "local:") = "file" ++ (drop 5 s) + | ((take 6 s) == "local:") = "file" ++ (drop 5 s) | ((take 6 s) == "diary:") = "diary/" ++ (drop 6 s) ++ ".html" - | or ((`isPrefixOf` s) <$> [ "http:", "https:", "ftp:", "file:", "mailto:", - "news:", "telnet:" ]) + | or ((`isPrefixOf` s) <$> [ "http:", "https:", "ftp:", "file:", "mailto:", + "news:", "telnet:" ]) = s | s == "" = "" | (last s) == '/' = s | otherwise = s ++ ".html" - + procLink :: String -> String procLink s = procLink' x ++ y where (x, y) = break (=='#') s @@ -606,7 +607,7 @@ tag = try $ do s <- manyTill (noneOf spaceChars) (try (char ':' >> (lookAhead space))) guard $ not $ "::" `isInfixOf` (":" ++ s ++ ":") let ss = splitBy (==':') s - return $ mconcat $ (makeTagSpan' $ head ss):(makeTagSpan <$> (tail ss)) + return $ mconcat $ (makeTagSpan' $ head ss):(makeTagSpan <$> (tail ss)) todoMark :: PandocMonad m => VwParser m Inlines todoMark = try $ do @@ -623,7 +624,7 @@ endlineBQ = () <$ try (newline <* nFBTTBSB <* string " ") endlineML :: PandocMonad m => VwParser m () endlineML = () <$ try (newline <* nFBTTBSB <* many1 spaceChar) ---- nFBTTBSB is short for notFollowedByThingsThatBreakSoftBreaks +--- nFBTTBSB is short for notFollowedByThingsThatBreakSoftBreaks nFBTTBSB :: PandocMonad m => VwParser m () nFBTTBSB = notFollowedBy newline <* @@ -639,7 +640,7 @@ hasDefMarker :: PandocMonad m => VwParser m () hasDefMarker = () <$ (manyTill (noneOf "\n") (string "::" >> oneOf spaceChars)) makeTagSpan' :: String -> Inlines -makeTagSpan' s = B.spanWith ('-' : s, [], []) (B.str "") <> +makeTagSpan' s = B.spanWith ('-' : s, [], []) (B.str "") <> B.spanWith (s, ["tag"], []) (B.str s) makeTagSpan :: String -> Inlines @@ -647,7 +648,7 @@ makeTagSpan s = (B.space) <> (makeTagSpan' s) mathTagParser :: PandocMonad m => VwParser m String mathTagParser = do - s <- try $ lookAhead (char '%' >> (manyTill (noneOf spaceChars) + s <- try $ lookAhead (char '%' >> (manyTill (noneOf spaceChars) (try $ char '%' >> many (noneOf $ '%':spaceChars) >> space))) char '%' >> string s >> char '%' return s diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 7b299c56b..53fd38ffd 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -49,6 +49,7 @@ module Text.Pandoc.Shared ( toRomanNumeral, escapeURI, tabFilter, + crFilter, -- * Date/time normalizeDate, -- * Pandoc block and inline list processing @@ -279,13 +280,12 @@ escapeURI = escapeURIString (not . needsEscaping) where needsEscaping c = isSpace c || c `elem` ['<','>','|','"','{','}','[',']','^', '`'] --- | Convert tabs to spaces and filter out DOS line endings. --- Tabs will be preserved if tab stop is set to 0. +-- | Convert tabs to spaces. Tabs will be preserved if tab stop is set to 0. tabFilter :: Int -- ^ Tab stop -> T.Text -- ^ Input -> T.Text -tabFilter tabStop = T.filter (/= '\r') . T.unlines . - (if tabStop == 0 then id else map go) . T.lines +tabFilter 0 = id +tabFilter tabStop = T.unlines . map go . T.lines where go s = let (s1, s2) = T.break (== '\t') s in if T.null s2 @@ -294,6 +294,10 @@ tabFilter tabStop = T.filter (/= '\r') . T.unlines . (tabStop - (T.length s1 `mod` tabStop)) (T.pack " ") <> go (T.drop 1 s2) +-- | Strip out DOS line endings. +crFilter :: T.Text -> T.Text +crFilter = T.filter (/= '\r') + -- -- Date/time -- -- cgit v1.2.3