diff options
author | John MacFarlane <jgm@berkeley.edu> | 2017-06-20 21:52:13 +0200 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2017-06-20 21:52:13 +0200 |
commit | 2363e6a15bdde1c206d65461bd2e21f773dbc808 (patch) | |
tree | c4e1879cd8d4cfa17c3f6cab1aa009ceb20f5ae3 /src/Text/Pandoc/Readers/Vimwiki.hs | |
parent | 4ba5ef46aeaf979bd74d8f4a5f6cea116527ddd3 (diff) | |
download | pandoc-2363e6a15bdde1c206d65461bd2e21f773dbc808.tar.gz |
Move CR filtering from tabFilter to the readers.
The readers previously assumed that CRs had been filtered
from the input. Now we strip the CRs in the readers themselves,
before parsing. (The point of this is just to simplify the
parsers.)
Shared now exports a new function `crFilter`. [API change]
And `tabFilter` no longer filters CRs.
Diffstat (limited to 'src/Text/Pandoc/Readers/Vimwiki.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/Vimwiki.hs | 151 |
1 files changed, 76 insertions, 75 deletions
diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs index 0cfbec34d..98f04eda9 100644 --- a/src/Text/Pandoc/Readers/Vimwiki.hs +++ b/src/Text/Pandoc/Readers/Vimwiki.hs @@ -33,10 +33,10 @@ Conversion of vimwiki text to 'Pandoc' document. * [X] header * [X] hrule * [X] comment - * [X] blockquote - * [X] preformatted - * [X] displaymath - * [X] bulletlist / orderedlist + * [X] blockquote + * [X] preformatted + * [X] displaymath + * [X] bulletlist / orderedlist * [X] orderedlist with 1., i., a) etc identification. * [X] todo lists -- not list builder with attributes? using span. * [X] table @@ -57,8 +57,8 @@ Conversion of vimwiki text to 'Pandoc' document. * [X] sub- and super-scripts * misc: * [X] `TODO:` mark - * [X] metadata placeholders: %title and %date - * [O] control placeholders: %template and %nohtml -- %template added to + * [X] metadata placeholders: %title and %date + * [O] control placeholders: %template and %nohtml -- %template added to meta, %nohtml ignored --} @@ -66,29 +66,29 @@ module Text.Pandoc.Readers.Vimwiki ( readVimwiki ) where import Control.Monad.Except (throwError) import Control.Monad (guard) -import Data.Default +import Data.Default import Data.Maybe import Data.Monoid ((<>)) import Data.List (isInfixOf, isPrefixOf) import Data.Text (Text, unpack) import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, fromList, toList) -import qualified Text.Pandoc.Builder - as B (headerWith, str, space, strong, emph, strikeout, code, link, image, - spanWith, para, horizontalRule, blockQuote, bulletList, plain, - orderedList, simpleTable, softbreak, codeBlockWith, imageWith, divWith, +import qualified Text.Pandoc.Builder + as B (headerWith, str, space, strong, emph, strikeout, code, link, image, + spanWith, para, horizontalRule, blockQuote, bulletList, plain, + orderedList, simpleTable, softbreak, codeBlockWith, imageWith, divWith, setMeta, definitionList, superscript, subscript) import Text.Pandoc.Class (PandocMonad(..)) -import Text.Pandoc.Definition (Pandoc(..), Inline(Space), - Block(BulletList, OrderedList), Attr, nullMeta, Meta, ListNumberStyle(..), +import Text.Pandoc.Definition (Pandoc(..), Inline(Space), + Block(BulletList, OrderedList), Attr, nullMeta, Meta, ListNumberStyle(..), ListNumberDelim(..)) import Text.Pandoc.Options (ReaderOptions) -import Text.Pandoc.Parsing (readWithM, ParserT, stateOptions, ParserState, +import Text.Pandoc.Parsing (readWithM, ParserT, stateOptions, ParserState, stateMeta', blanklines, registerHeader, spaceChar, emailAddress, uri, F, runF, orderedListMarker, many1Till) -import Text.Pandoc.Shared (splitBy, stripFirstAndLast, stringify) -import Text.Parsec.Char (spaces, char, anyChar, newline, string, noneOf, +import Text.Pandoc.Shared (splitBy, stripFirstAndLast, stringify, crFilter) +import Text.Parsec.Char (spaces, char, anyChar, newline, string, noneOf, alphaNum) -import Text.Parsec.Combinator (eof, choice, many1, manyTill, count, skipMany1, +import Text.Parsec.Combinator (eof, choice, many1, manyTill, count, skipMany1, notFollowedBy, option) import Text.Parsec.Prim (many, try, updateState, getState) import Text.Parsec.Char (oneOf, space) @@ -97,7 +97,8 @@ import Text.Parsec.Prim ((<|>)) readVimwiki :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readVimwiki opts s = do - res <- readWithM parseVimwiki def{ stateOptions = opts } (unpack s) + res <- readWithM parseVimwiki def{ stateOptions = opts } + (unpack (crFilter s)) case res of Left e -> throwError e Right result -> return result @@ -110,7 +111,7 @@ type VwParser = ParserT [Char] ParserState specialChars :: [Char] specialChars = "=*-#[]_~{}`$|:%^," -spaceChars :: [Char] +spaceChars :: [Char] spaceChars = " \t\n" -- main parser @@ -134,7 +135,7 @@ block = do , mempty <$ comment , mixedList , preformatted - , displayMath + , displayMath , table , mempty <$ placeholder , blockQuote @@ -149,14 +150,14 @@ blockML = choice [preformatted, displayMath, table] header :: PandocMonad m => VwParser m Blocks header = try $ do - sp <- many spaceChar + sp <- many spaceChar eqs <- many1 (char '=') spaceChar let lev = length eqs guard $ lev <= 6 - contents <- trimInlines . mconcat <$> manyTill inline (try $ spaceChar - >> (string eqs) >> many spaceChar >> newline) - attr <- registerHeader (makeId contents, + contents <- trimInlines . mconcat <$> manyTill inline (try $ spaceChar + >> (string eqs) >> many spaceChar >> newline) + attr <- registerHeader (makeId contents, (if sp == "" then [] else ["justcenter"]), []) contents return $ B.headerWith attr lev contents @@ -184,7 +185,7 @@ blockQuote = try $ do else return $ B.blockQuote $ B.plain contents definitionList :: PandocMonad m => VwParser m Blocks -definitionList = try $ +definitionList = try $ B.definitionList <$> (many1 (dlItemWithDT <|> dlItemWithoutDT)) dlItemWithDT :: PandocMonad m => VwParser m (Inlines, [Blocks]) @@ -199,15 +200,15 @@ dlItemWithoutDT = do return $ (mempty, dds) definitionDef :: PandocMonad m => VwParser m Blocks -definitionDef = try $ - (notFollowedBy definitionTerm) >> many spaceChar +definitionDef = try $ + (notFollowedBy definitionTerm) >> many spaceChar >> (definitionDef1 <|> definitionDef2) definitionDef1 :: PandocMonad m => VwParser m Blocks definitionDef1 = try $ mempty <$ defMarkerE definitionDef2 :: PandocMonad m => VwParser m Blocks -definitionDef2 = try $ B.plain <$> +definitionDef2 = try $ B.plain <$> (defMarkerM >> (trimInlines . mconcat <$> many inline') <* newline) @@ -218,11 +219,11 @@ definitionTerm = try $ do return x definitionTerm1 :: PandocMonad m => VwParser m Inlines -definitionTerm1 = try $ +definitionTerm1 = try $ trimInlines . mconcat <$> manyTill inline' (try $ defMarkerE) definitionTerm2 :: PandocMonad m => VwParser m Inlines -definitionTerm2 = try $ trimInlines . mconcat <$> manyTill inline' +definitionTerm2 = try $ trimInlines . mconcat <$> manyTill inline' (try $ lookAhead $ (defMarkerM >> notFollowedBy hasDefMarkerM)) defMarkerM :: PandocMonad m => VwParser m Char @@ -236,8 +237,8 @@ hasDefMarkerM = manyTill (noneOf "\n") (try defMarkerM) preformatted :: PandocMonad m => VwParser m Blocks preformatted = try $ do - many spaceChar >> string "{{{" - attrText <- many (noneOf "\n") + many spaceChar >> string "{{{" + attrText <- many (noneOf "\n") lookAhead newline contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}}" >> many spaceChar >> newline)) @@ -246,14 +247,14 @@ preformatted = try $ do else return $ B.codeBlockWith (makeAttr attrText) contents makeAttr :: String -> Attr -makeAttr s = +makeAttr s = let xs = splitBy (`elem` " \t") s in ("", [], catMaybes $ map nameValue xs) nameValue :: String -> Maybe (String, String) -nameValue s = +nameValue s = let t = splitBy (== '=') s in - if length t /= 2 + if length t /= 2 then Nothing else let (a, b) = (head t, last t) in if ((length b) < 2) || ((head b, last b) /= ('"', '"')) @@ -269,7 +270,7 @@ displayMath = try $ do >> many spaceChar >> newline)) let contentsWithTags | mathTag == "" = "\\[" ++ contents ++ "\n\\]" - | otherwise = "\\begin{" ++ mathTag ++ "}" ++ contents + | otherwise = "\\begin{" ++ mathTag ++ "}" ++ contents ++ "\n\\end{" ++ mathTag ++ "}" return $ B.plain $ B.str contentsWithTags @@ -286,7 +287,7 @@ mixedList' prevInd = do else do listStart curLine <- listItemContent - let listBuilder = + let listBuilder = if builder == "ul" then B.bulletList else B.orderedList (subList, lowInd) <- (mixedList' curInd) if lowInd >= curInd @@ -297,7 +298,7 @@ mixedList' prevInd = do then return ([listBuilder curList], endInd) else return (curList, endInd) else do - let (curList, endInd) = ((combineList curLine subList), + let (curList, endInd) = ((combineList curLine subList), lowInd) if curInd > prevInd then return ([listBuilder curList], endInd) @@ -328,13 +329,13 @@ blocksThenInline = try $ do return $ mconcat $ y ++ [x] listTodoMarker :: PandocMonad m => VwParser m Inlines -listTodoMarker = try $ do - x <- between (many spaceChar >> char '[') (char ']' >> spaceChar) +listTodoMarker = try $ do + x <- between (many spaceChar >> char '[') (char ']' >> spaceChar) (oneOf " .oOX") return $ makeListMarkerSpan x makeListMarkerSpan :: Char -> Inlines -makeListMarkerSpan x = +makeListMarkerSpan x = let cl = case x of ' ' -> "done0" '.' -> "done1" @@ -347,9 +348,9 @@ makeListMarkerSpan x = combineList :: Blocks -> [Blocks] -> [Blocks] combineList x [y] = case toList y of - [BulletList z] -> [fromList $ (toList x) + [BulletList z] -> [fromList $ (toList x) ++ [BulletList z]] - [OrderedList attr z] -> [fromList $ (toList x) + [OrderedList attr z] -> [fromList $ (toList x) ++ [OrderedList attr z]] _ -> x:[y] combineList x xs = x:xs @@ -365,9 +366,9 @@ bulletListMarkers :: PandocMonad m => VwParser m String bulletListMarkers = "ul" <$ (char '*' <|> char '-') orderedListMarkers :: PandocMonad m => VwParser m String -orderedListMarkers = - ("ol" <$ (choice $ (orderedListMarker Decimal Period):(($OneParen) - <$> orderedListMarker +orderedListMarkers = + ("ol" <$ (choice $ (orderedListMarker Decimal Period):(($OneParen) + <$> orderedListMarker <$> [Decimal, LowerRoman, UpperRoman, LowerAlpha, UpperAlpha]))) <|> ("ol" <$ char '#') @@ -397,14 +398,14 @@ table2 = try $ do tableHeaderSeparator :: PandocMonad m => VwParser m () tableHeaderSeparator = try $ do - many spaceChar >> char '|' >> many1 ((many1 $ char '-') >> char '|') + many spaceChar >> char '|' >> many1 ((many1 $ char '-') >> char '|') >> many spaceChar >> newline return () - + tableRow :: PandocMonad m => VwParser m [Blocks] tableRow = try $ do many spaceChar >> char '|' - s <- lookAhead $ manyTill anyChar (try (char '|' >> many spaceChar + s <- lookAhead $ manyTill anyChar (try (char '|' >> many spaceChar >> newline)) guard $ not $ "||" `isInfixOf` ("|" ++ s ++ "|") tr <- many tableCell @@ -416,25 +417,25 @@ tableCell = try $ B.plain <$> trimInlines . mconcat <$> (manyTill inline' (char '|')) placeholder :: PandocMonad m => VwParser m () -placeholder = try $ +placeholder = try $ (choice (ph <$> ["title", "date"])) <|> noHtmlPh <|> templatePh ph :: PandocMonad m => String -> VwParser m () ph s = try $ do many spaceChar >> (string $ '%':s) >> spaceChar - contents <- (trimInlines . mconcat <$> (manyTill inline (lookAhead newline))) + contents <- (trimInlines . mconcat <$> (manyTill inline (lookAhead newline))) --use lookAhead because of placeholder in the whitespace parser let meta' = return $ B.setMeta s contents nullMeta :: F Meta updateState $ \st -> st { stateMeta' = stateMeta' st <> meta' } noHtmlPh :: PandocMonad m => VwParser m () noHtmlPh = try $ - () <$ (many spaceChar >> string "%nohtml" >> many spaceChar + () <$ (many spaceChar >> string "%nohtml" >> many spaceChar >> (lookAhead newline)) templatePh :: PandocMonad m => VwParser m () templatePh = try $ - () <$ (many spaceChar >> string "%template" >> (many $ noneOf "\n") + () <$ (many spaceChar >> string "%template" >> (many $ noneOf "\n") >> (lookAhead newline)) -- inline parser @@ -475,7 +476,7 @@ str :: PandocMonad m => VwParser m Inlines str = B.str <$> (many1 $ noneOf $ spaceChars ++ specialChars) whitespace :: PandocMonad m => VwParser m () -> VwParser m Inlines -whitespace endline = B.space <$ (skipMany1 spaceChar <|> +whitespace endline = B.space <$ (skipMany1 spaceChar <|> (try (newline >> (comment <|> placeholder)))) <|> B.softbreak <$ endline @@ -493,24 +494,24 @@ bareURL = try $ do strong :: PandocMonad m => VwParser m Inlines strong = try $ do s <- lookAhead $ between (char '*') (char '*') (many1 $ noneOf "*") - guard $ (not $ (head s) `elem` spaceChars) + guard $ (not $ (head s) `elem` spaceChars) && (not $ (last s) `elem` spaceChars) char '*' - contents <- mconcat <$> (manyTill inline' $ char '*' + contents <- mconcat <$> (manyTill inline' $ char '*' >> notFollowedBy alphaNum) - return $ (B.spanWith ((makeId contents), [], []) mempty) + return $ (B.spanWith ((makeId contents), [], []) mempty) <> (B.strong contents) -makeId :: Inlines -> String +makeId :: Inlines -> String makeId i = concat (stringify <$> (toList i)) emph :: PandocMonad m => VwParser m Inlines emph = try $ do s <- lookAhead $ between (char '_') (char '_') (many1 $ noneOf "_") - guard $ (not $ (head s) `elem` spaceChars) + guard $ (not $ (head s) `elem` spaceChars) && (not $ (last s) `elem` spaceChars) char '_' - contents <- mconcat <$> (manyTill inline' $ char '_' + contents <- mconcat <$> (manyTill inline' $ char '_' >> notFollowedBy alphaNum) return $ B.emph contents @@ -532,32 +533,32 @@ superscript = try $ subscript :: PandocMonad m => VwParser m Inlines subscript = try $ - B.subscript <$> mconcat <$> (string ",," + B.subscript <$> mconcat <$> (string ",," >> many1Till inline' (try $ string ",,")) link :: PandocMonad m => VwParser m Inlines -link = try $ do +link = try $ do string "[[" contents <- lookAhead $ manyTill anyChar (string "]]") - case '|' `elem` contents of + case '|' `elem` contents of False -> do - manyTill anyChar (string "]]") + manyTill anyChar (string "]]") -- not using try here because [[hell]o]] is not rendered as a link in vimwiki return $ B.link (procLink contents) "" (B.str contents) - True -> do + True -> do url <- manyTill anyChar $ char '|' lab <- mconcat <$> (manyTill inline $ string "]]") return $ B.link (procLink url) "" lab image :: PandocMonad m => VwParser m Inlines -image = try $ do +image = try $ do string "{{" contentText <- lookAhead $ manyTill (noneOf "\n") (try $ string "}}") images $ length $ filter (== '|') contentText images :: PandocMonad m => Int -> VwParser m Inlines images k - | k == 0 = do + | k == 0 = do imgurl <- manyTill anyChar (try $ string "}}") return $ B.image (procImgurl imgurl) "" (B.str "") | k == 1 = do @@ -578,15 +579,15 @@ images k procLink' :: String -> String procLink' s - | ((take 6 s) == "local:") = "file" ++ (drop 5 s) + | ((take 6 s) == "local:") = "file" ++ (drop 5 s) | ((take 6 s) == "diary:") = "diary/" ++ (drop 6 s) ++ ".html" - | or ((`isPrefixOf` s) <$> [ "http:", "https:", "ftp:", "file:", "mailto:", - "news:", "telnet:" ]) + | or ((`isPrefixOf` s) <$> [ "http:", "https:", "ftp:", "file:", "mailto:", + "news:", "telnet:" ]) = s | s == "" = "" | (last s) == '/' = s | otherwise = s ++ ".html" - + procLink :: String -> String procLink s = procLink' x ++ y where (x, y) = break (=='#') s @@ -606,7 +607,7 @@ tag = try $ do s <- manyTill (noneOf spaceChars) (try (char ':' >> (lookAhead space))) guard $ not $ "::" `isInfixOf` (":" ++ s ++ ":") let ss = splitBy (==':') s - return $ mconcat $ (makeTagSpan' $ head ss):(makeTagSpan <$> (tail ss)) + return $ mconcat $ (makeTagSpan' $ head ss):(makeTagSpan <$> (tail ss)) todoMark :: PandocMonad m => VwParser m Inlines todoMark = try $ do @@ -623,7 +624,7 @@ endlineBQ = () <$ try (newline <* nFBTTBSB <* string " ") endlineML :: PandocMonad m => VwParser m () endlineML = () <$ try (newline <* nFBTTBSB <* many1 spaceChar) ---- nFBTTBSB is short for notFollowedByThingsThatBreakSoftBreaks +--- nFBTTBSB is short for notFollowedByThingsThatBreakSoftBreaks nFBTTBSB :: PandocMonad m => VwParser m () nFBTTBSB = notFollowedBy newline <* @@ -639,7 +640,7 @@ hasDefMarker :: PandocMonad m => VwParser m () hasDefMarker = () <$ (manyTill (noneOf "\n") (string "::" >> oneOf spaceChars)) makeTagSpan' :: String -> Inlines -makeTagSpan' s = B.spanWith ('-' : s, [], []) (B.str "") <> +makeTagSpan' s = B.spanWith ('-' : s, [], []) (B.str "") <> B.spanWith (s, ["tag"], []) (B.str s) makeTagSpan :: String -> Inlines @@ -647,7 +648,7 @@ makeTagSpan s = (B.space) <> (makeTagSpan' s) mathTagParser :: PandocMonad m => VwParser m String mathTagParser = do - s <- try $ lookAhead (char '%' >> (manyTill (noneOf spaceChars) + s <- try $ lookAhead (char '%' >> (manyTill (noneOf spaceChars) (try $ char '%' >> many (noneOf $ '%':spaceChars) >> space))) char '%' >> string s >> char '%' return s |