Move CR filtering from tabFilter to the readers.

The readers previously assumed that CRs had been filtered from the input. Now we strip the CRs in the readers themselves, before parsing. (The point of this is just to simplify the parsers.) Shared now exports a new function `crFilter`. [API change] And `tabFilter` no longer filters CRs.
author: John MacFarlane <jgm@berkeley.edu> 2017-06-20 21:52:13 +0200
committer: John MacFarlane <jgm@berkeley.edu> 2017-06-20 21:52:13 +0200
commit: 2363e6a15bdde1c206d65461bd2e21f773dbc808 (patch)
tree: c4e1879cd8d4cfa17c3f6cab1aa009ceb20f5ae3 /src/Text/Pandoc/Readers/Vimwiki.hs
parent: 4ba5ef46aeaf979bd74d8f4a5f6cea116527ddd3 (diff)
download: pandoc-2363e6a15bdde1c206d65461bd2e21f773dbc808.tar.gz
1 files changed, 76 insertions, 75 deletions
diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs
index 0cfbec34d..98f04eda9 100644
--- a/src/Text/Pandoc/Readers/Vimwiki.hs
+++ b/src/Text/Pandoc/Readers/Vimwiki.hs
@@ -33,10 +33,10 @@ Conversion of vimwiki text to 'Pandoc' document.
     * [X] header
     * [X] hrule
     * [X] comment
-    * [X] blockquote 
-    * [X] preformatted 
-    * [X] displaymath 
-    * [X] bulletlist / orderedlist 
+    * [X] blockquote
+    * [X] preformatted
+    * [X] displaymath
+    * [X] bulletlist / orderedlist
         * [X] orderedlist with 1., i., a) etc identification.
         * [X] todo lists -- not list builder with attributes? using span.
     * [X] table
@@ -57,8 +57,8 @@ Conversion of vimwiki text to 'Pandoc' document.
     * [X] sub- and super-scripts
 * misc:
     * [X] `TODO:` mark
-    * [X] metadata placeholders: %title and %date  
-    * [O] control placeholders: %template and %nohtml -- %template added to 
+    * [X] metadata placeholders: %title and %date
+    * [O] control placeholders: %template and %nohtml -- %template added to
           meta, %nohtml ignored
 --}
 
@@ -66,29 +66,29 @@ module Text.Pandoc.Readers.Vimwiki ( readVimwiki
                                  ) where
 import Control.Monad.Except (throwError)
 import Control.Monad (guard)
-import Data.Default 
+import Data.Default
 import Data.Maybe
 import Data.Monoid ((<>))
 import Data.List (isInfixOf, isPrefixOf)
 import Data.Text (Text, unpack)
 import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, fromList, toList)
-import qualified Text.Pandoc.Builder 
-  as B (headerWith, str, space, strong, emph, strikeout, code, link, image, 
-        spanWith, para, horizontalRule, blockQuote, bulletList, plain, 
-        orderedList, simpleTable, softbreak, codeBlockWith, imageWith, divWith, 
+import qualified Text.Pandoc.Builder
+  as B (headerWith, str, space, strong, emph, strikeout, code, link, image,
+        spanWith, para, horizontalRule, blockQuote, bulletList, plain,
+        orderedList, simpleTable, softbreak, codeBlockWith, imageWith, divWith,
         setMeta, definitionList, superscript, subscript)
 import Text.Pandoc.Class (PandocMonad(..))
-import Text.Pandoc.Definition (Pandoc(..), Inline(Space), 
-  Block(BulletList, OrderedList), Attr, nullMeta, Meta, ListNumberStyle(..), 
+import Text.Pandoc.Definition (Pandoc(..), Inline(Space),
+  Block(BulletList, OrderedList), Attr, nullMeta, Meta, ListNumberStyle(..),
   ListNumberDelim(..))
 import Text.Pandoc.Options (ReaderOptions)
-import Text.Pandoc.Parsing (readWithM, ParserT, stateOptions, ParserState, 
+import Text.Pandoc.Parsing (readWithM, ParserT, stateOptions, ParserState,
   stateMeta', blanklines, registerHeader, spaceChar, emailAddress, uri, F, runF,
   orderedListMarker, many1Till)
-import Text.Pandoc.Shared (splitBy, stripFirstAndLast, stringify)
-import Text.Parsec.Char (spaces, char, anyChar, newline, string, noneOf, 
+import Text.Pandoc.Shared (splitBy, stripFirstAndLast, stringify, crFilter)
+import Text.Parsec.Char (spaces, char, anyChar, newline, string, noneOf,
   alphaNum)
-import Text.Parsec.Combinator (eof, choice, many1, manyTill, count, skipMany1, 
+import Text.Parsec.Combinator (eof, choice, many1, manyTill, count, skipMany1,
   notFollowedBy, option)
 import Text.Parsec.Prim (many, try, updateState, getState)
 import Text.Parsec.Char (oneOf, space)
@@ -97,7 +97,8 @@ import Text.Parsec.Prim ((<|>))
 
 readVimwiki :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
 readVimwiki opts s = do
-  res <- readWithM parseVimwiki def{ stateOptions = opts } (unpack s)
+  res <- readWithM parseVimwiki def{ stateOptions = opts }
+            (unpack (crFilter s))
   case res of
        Left e -> throwError e
        Right result -> return result
@@ -110,7 +111,7 @@ type VwParser = ParserT [Char] ParserState
 specialChars :: [Char]
 specialChars = "=*-#[]_~{}`$|:%^,"
 
-spaceChars :: [Char] 
+spaceChars :: [Char]
 spaceChars = " \t\n"
 
 -- main parser
@@ -134,7 +135,7 @@ block = do
                 , mempty <$ comment
                 , mixedList
                 , preformatted
-                , displayMath 
+                , displayMath
                 , table
                 , mempty <$ placeholder
                 , blockQuote
@@ -149,14 +150,14 @@ blockML = choice [preformatted, displayMath, table]
 
 header :: PandocMonad m => VwParser m Blocks
 header = try $ do
-  sp <- many spaceChar 
+  sp <- many spaceChar
   eqs <- many1 (char '=')
   spaceChar
   let lev = length eqs
   guard $ lev <= 6
-  contents <- trimInlines . mconcat <$> manyTill inline (try $ spaceChar 
-    >> (string eqs) >> many spaceChar >> newline) 
-  attr <- registerHeader (makeId contents, 
+  contents <- trimInlines . mconcat <$> manyTill inline (try $ spaceChar
+    >> (string eqs) >> many spaceChar >> newline)
+  attr <- registerHeader (makeId contents,
     (if sp == "" then [] else ["justcenter"]), []) contents
   return $ B.headerWith attr lev contents
 
@@ -184,7 +185,7 @@ blockQuote = try $ do
      else return $ B.blockQuote $ B.plain contents
 
 definitionList :: PandocMonad m => VwParser m Blocks
-definitionList = try $ 
+definitionList = try $
   B.definitionList <$> (many1 (dlItemWithDT <|> dlItemWithoutDT))
 
 dlItemWithDT :: PandocMonad m => VwParser m (Inlines, [Blocks])
@@ -199,15 +200,15 @@ dlItemWithoutDT = do
   return $ (mempty, dds)
 
 definitionDef :: PandocMonad m => VwParser m Blocks
-definitionDef = try $ 
-  (notFollowedBy definitionTerm) >> many spaceChar 
+definitionDef = try $
+  (notFollowedBy definitionTerm) >> many spaceChar
     >> (definitionDef1 <|> definitionDef2)
 
 definitionDef1 :: PandocMonad m => VwParser m Blocks
 definitionDef1 = try $ mempty <$ defMarkerE
 
 definitionDef2 :: PandocMonad m => VwParser m Blocks
-definitionDef2 = try $ B.plain <$> 
+definitionDef2 = try $ B.plain <$>
   (defMarkerM >> (trimInlines . mconcat <$> many inline') <* newline)
 
 
@@ -218,11 +219,11 @@ definitionTerm = try $ do
   return x
 
 definitionTerm1 :: PandocMonad m => VwParser m Inlines
-definitionTerm1 = try $ 
+definitionTerm1 = try $
   trimInlines . mconcat <$> manyTill inline' (try $ defMarkerE)
 
 definitionTerm2 :: PandocMonad m => VwParser m Inlines
-definitionTerm2 = try $ trimInlines . mconcat <$> manyTill inline' 
+definitionTerm2 = try $ trimInlines . mconcat <$> manyTill inline'
   (try $ lookAhead $ (defMarkerM >> notFollowedBy hasDefMarkerM))
 
 defMarkerM :: PandocMonad m => VwParser m Char
@@ -236,8 +237,8 @@ hasDefMarkerM = manyTill (noneOf "\n") (try defMarkerM)
 
 preformatted :: PandocMonad m => VwParser m Blocks
 preformatted = try $ do
-  many spaceChar >> string "{{{" 
-  attrText <- many (noneOf "\n") 
+  many spaceChar >> string "{{{"
+  attrText <- many (noneOf "\n")
   lookAhead newline
   contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}}"
     >> many spaceChar >> newline))
@@ -246,14 +247,14 @@ preformatted = try $ do
      else return $ B.codeBlockWith (makeAttr attrText) contents
 
 makeAttr :: String -> Attr
-makeAttr s = 
+makeAttr s =
   let xs = splitBy (`elem` " \t") s in
     ("", [], catMaybes $ map nameValue xs)
 
 nameValue :: String -> Maybe (String, String)
-nameValue s = 
+nameValue s =
   let t = splitBy (== '=') s in
-    if length t /= 2 
+    if length t /= 2
       then Nothing
       else let (a, b) = (head t, last t) in
              if ((length b) < 2) || ((head b, last b) /= ('"', '"'))
@@ -269,7 +270,7 @@ displayMath = try $ do
     >> many spaceChar >> newline))
   let contentsWithTags
         | mathTag == "" = "\\[" ++ contents ++ "\n\\]"
-        | otherwise     = "\\begin{" ++ mathTag ++ "}" ++ contents 
+        | otherwise     = "\\begin{" ++ mathTag ++ "}" ++ contents
                           ++ "\n\\end{" ++ mathTag ++ "}"
   return $ B.plain $ B.str contentsWithTags
 
@@ -286,7 +287,7 @@ mixedList' prevInd = do
      else do
           listStart
           curLine <- listItemContent
-          let listBuilder = 
+          let listBuilder =
                 if builder == "ul" then B.bulletList else B.orderedList
           (subList, lowInd) <- (mixedList' curInd)
           if lowInd >= curInd
@@ -297,7 +298,7 @@ mixedList' prevInd = do
                      then return ([listBuilder curList], endInd)
                      else return (curList, endInd)
              else do
-                  let (curList, endInd) = ((combineList curLine subList), 
+                  let (curList, endInd) = ((combineList curLine subList),
                                            lowInd)
                   if curInd > prevInd
                      then return ([listBuilder curList], endInd)
@@ -328,13 +329,13 @@ blocksThenInline = try $ do
   return $ mconcat $ y ++ [x]
 
 listTodoMarker :: PandocMonad m => VwParser m Inlines
-listTodoMarker = try $ do 
-  x <- between (many spaceChar >> char '[') (char ']' >> spaceChar) 
+listTodoMarker = try $ do
+  x <- between (many spaceChar >> char '[') (char ']' >> spaceChar)
     (oneOf " .oOX")
   return $ makeListMarkerSpan x
 
 makeListMarkerSpan :: Char -> Inlines
-makeListMarkerSpan x = 
+makeListMarkerSpan x =
   let cl = case x of
             ' ' -> "done0"
             '.' -> "done1"
@@ -347,9 +348,9 @@ makeListMarkerSpan x =
 
 combineList :: Blocks -> [Blocks] -> [Blocks]
 combineList x [y] = case toList y of
-                            [BulletList z] -> [fromList $ (toList x) 
+                            [BulletList z] -> [fromList $ (toList x)
                                               ++ [BulletList z]]
-                            [OrderedList attr z] -> [fromList $ (toList x) 
+                            [OrderedList attr z] -> [fromList $ (toList x)
                                                     ++ [OrderedList attr z]]
                             _ -> x:[y]
 combineList x xs = x:xs
@@ -365,9 +366,9 @@ bulletListMarkers :: PandocMonad m => VwParser m String
 bulletListMarkers = "ul" <$ (char '*' <|> char '-')
 
 orderedListMarkers :: PandocMonad m => VwParser m String
-orderedListMarkers = 
-  ("ol" <$ (choice $ (orderedListMarker Decimal Period):(($OneParen) 
-    <$> orderedListMarker 
+orderedListMarkers =
+  ("ol" <$ (choice $ (orderedListMarker Decimal Period):(($OneParen)
+    <$> orderedListMarker
     <$> [Decimal, LowerRoman, UpperRoman, LowerAlpha, UpperAlpha])))
     <|> ("ol" <$ char '#')
 
@@ -397,14 +398,14 @@ table2 = try $ do
 
 tableHeaderSeparator :: PandocMonad m => VwParser m ()
 tableHeaderSeparator = try $ do
-  many spaceChar >> char '|' >> many1 ((many1 $ char '-') >> char '|') 
+  many spaceChar >> char '|' >> many1 ((many1 $ char '-') >> char '|')
     >> many spaceChar >> newline
   return ()
-  
+
 tableRow :: PandocMonad m => VwParser m [Blocks]
 tableRow = try $ do
   many spaceChar >> char '|'
-  s <- lookAhead $ manyTill anyChar (try (char '|' >> many spaceChar 
+  s <- lookAhead $ manyTill anyChar (try (char '|' >> many spaceChar
     >> newline))
   guard $ not $ "||" `isInfixOf` ("|" ++ s ++ "|")
   tr <- many tableCell
@@ -416,25 +417,25 @@ tableCell = try $
   B.plain <$> trimInlines . mconcat <$> (manyTill inline' (char '|'))
 
 placeholder :: PandocMonad m => VwParser m ()
-placeholder = try $ 
+placeholder = try $
   (choice (ph <$> ["title", "date"])) <|> noHtmlPh <|> templatePh
 
 ph :: PandocMonad m => String -> VwParser m ()
 ph s = try $ do
   many spaceChar >> (string $ '%':s) >> spaceChar
-  contents <- (trimInlines . mconcat <$> (manyTill inline (lookAhead newline))) 
+  contents <- (trimInlines . mconcat <$> (manyTill inline (lookAhead newline)))
     --use lookAhead because of placeholder in the whitespace parser
   let meta' = return $ B.setMeta s contents nullMeta :: F Meta
   updateState $ \st -> st { stateMeta' = stateMeta' st <> meta' }
 
 noHtmlPh :: PandocMonad m => VwParser m ()
 noHtmlPh = try $
-  () <$ (many spaceChar >> string "%nohtml" >> many spaceChar 
+  () <$ (many spaceChar >> string "%nohtml" >> many spaceChar
     >> (lookAhead newline))
 
 templatePh :: PandocMonad m => VwParser m ()
 templatePh = try $
-  () <$ (many spaceChar >> string "%template" >> (many $ noneOf "\n") 
+  () <$ (many spaceChar >> string "%template" >> (many $ noneOf "\n")
     >> (lookAhead newline))
 
 -- inline parser
@@ -475,7 +476,7 @@ str :: PandocMonad m => VwParser m Inlines
 str = B.str <$> (many1 $ noneOf $ spaceChars ++ specialChars)
 
 whitespace :: PandocMonad m => VwParser m () -> VwParser m Inlines
-whitespace endline = B.space <$ (skipMany1 spaceChar <|> 
+whitespace endline = B.space <$ (skipMany1 spaceChar <|>
                                  (try (newline >> (comment <|> placeholder))))
          <|> B.softbreak <$ endline
 
@@ -493,24 +494,24 @@ bareURL = try $ do
 strong :: PandocMonad m => VwParser m Inlines
 strong = try $ do
   s <- lookAhead $ between (char '*') (char '*') (many1 $ noneOf "*")
-  guard $ (not $ (head s) `elem` spaceChars) 
+  guard $ (not $ (head s) `elem` spaceChars)
              && (not $ (last s) `elem` spaceChars)
   char '*'
-  contents <- mconcat <$> (manyTill inline' $ char '*' 
+  contents <- mconcat <$> (manyTill inline' $ char '*'
     >> notFollowedBy alphaNum)
-  return $ (B.spanWith ((makeId contents), [], []) mempty) 
+  return $ (B.spanWith ((makeId contents), [], []) mempty)
     <> (B.strong contents)
 
-makeId :: Inlines -> String 
+makeId :: Inlines -> String
 makeId i = concat (stringify <$> (toList i))
 
 emph :: PandocMonad m => VwParser m Inlines
 emph = try $ do
   s <- lookAhead $ between (char '_') (char '_') (many1 $ noneOf "_")
-  guard $ (not $ (head s) `elem` spaceChars) 
+  guard $ (not $ (head s) `elem` spaceChars)
           && (not $ (last s) `elem` spaceChars)
   char '_'
-  contents <- mconcat <$> (manyTill inline' $ char '_' 
+  contents <- mconcat <$> (manyTill inline' $ char '_'
     >> notFollowedBy alphaNum)
   return $ B.emph contents
 
@@ -532,32 +533,32 @@ superscript = try $
 
 subscript :: PandocMonad m => VwParser m Inlines
 subscript = try $
-  B.subscript <$> mconcat <$> (string ",," 
+  B.subscript <$> mconcat <$> (string ",,"
     >> many1Till inline' (try $ string ",,"))
 
 link :: PandocMonad m => VwParser m Inlines
-link = try $ do 
+link = try $ do
   string "[["
   contents <- lookAhead $ manyTill anyChar (string "]]")
-  case '|' `elem` contents of 
+  case '|' `elem` contents of
                   False -> do
-                    manyTill anyChar (string "]]") 
+                    manyTill anyChar (string "]]")
 -- not using try here because [[hell]o]] is not rendered as a link in vimwiki
                     return $ B.link (procLink contents) "" (B.str contents)
-                  True  -> do 
+                  True  -> do
                     url <- manyTill anyChar $ char '|'
                     lab <- mconcat <$> (manyTill inline $ string "]]")
                     return $ B.link (procLink url) "" lab
 
 image :: PandocMonad m => VwParser m Inlines
-image = try $ do 
+image = try $ do
   string "{{"
   contentText <- lookAhead $ manyTill (noneOf "\n") (try $ string "}}")
   images $ length $ filter (== '|') contentText
 
 images :: PandocMonad m => Int -> VwParser m Inlines
 images k
-  | k == 0 = do 
+  | k == 0 = do
            imgurl <- manyTill anyChar (try $ string "}}")
            return $ B.image (procImgurl imgurl) "" (B.str "")
   | k == 1 = do
@@ -578,15 +579,15 @@ images k
 
 procLink' :: String -> String
 procLink' s
-  | ((take 6 s) == "local:") = "file" ++ (drop 5 s) 
+  | ((take 6 s) == "local:") = "file" ++ (drop 5 s)
   | ((take 6 s) == "diary:") = "diary/" ++ (drop 6 s) ++ ".html"
-  | or ((`isPrefixOf` s) <$> [ "http:", "https:", "ftp:", "file:", "mailto:", 
-                              "news:", "telnet:" ]) 
+  | or ((`isPrefixOf` s) <$> [ "http:", "https:", "ftp:", "file:", "mailto:",
+                              "news:", "telnet:" ])
                              = s
   | s == ""                  = ""
   | (last s) == '/'          = s
   | otherwise                = s ++ ".html"
-  
+
 procLink :: String -> String
 procLink s = procLink' x ++ y
   where (x, y) = break (=='#') s
@@ -606,7 +607,7 @@ tag = try $ do
   s <- manyTill (noneOf spaceChars) (try (char ':' >> (lookAhead space)))
   guard $ not $ "::" `isInfixOf` (":" ++ s ++ ":")
   let ss = splitBy (==':') s
-  return $ mconcat $ (makeTagSpan' $ head ss):(makeTagSpan <$> (tail ss)) 
+  return $ mconcat $ (makeTagSpan' $ head ss):(makeTagSpan <$> (tail ss))
 
 todoMark :: PandocMonad m => VwParser m Inlines
 todoMark = try $ do
@@ -623,7 +624,7 @@ endlineBQ = () <$ try (newline <* nFBTTBSB <* string "    ")
 endlineML :: PandocMonad m => VwParser m ()
 endlineML = () <$ try (newline <* nFBTTBSB <* many1 spaceChar)
 
---- nFBTTBSB is short for notFollowedByThingsThatBreakSoftBreaks 
+--- nFBTTBSB is short for notFollowedByThingsThatBreakSoftBreaks
 nFBTTBSB :: PandocMonad m => VwParser m ()
 nFBTTBSB =
     notFollowedBy newline <*
@@ -639,7 +640,7 @@ hasDefMarker :: PandocMonad m => VwParser m ()
 hasDefMarker = () <$ (manyTill (noneOf "\n") (string "::" >> oneOf spaceChars))
 
 makeTagSpan' :: String -> Inlines
-makeTagSpan' s = B.spanWith ('-' : s, [], []) (B.str "") <> 
+makeTagSpan' s = B.spanWith ('-' : s, [], []) (B.str "") <>
                   B.spanWith (s, ["tag"], []) (B.str s)
 
 makeTagSpan :: String -> Inlines
@@ -647,7 +648,7 @@ makeTagSpan s = (B.space) <> (makeTagSpan' s)
 
 mathTagParser :: PandocMonad m => VwParser m String
 mathTagParser = do
-  s <- try $ lookAhead (char '%' >> (manyTill (noneOf spaceChars) 
+  s <- try $ lookAhead (char '%' >> (manyTill (noneOf spaceChars)
     (try $ char '%' >> many (noneOf $ '%':spaceChars) >> space)))
   char '%' >> string s >> char '%'
   return s
author	John MacFarlane <jgm@berkeley.edu>	2017-06-20 21:52:13 +0200
committer	John MacFarlane <jgm@berkeley.edu>	2017-06-20 21:52:13 +0200
commit	2363e6a15bdde1c206d65461bd2e21f773dbc808 (patch)
tree	c4e1879cd8d4cfa17c3f6cab1aa009ceb20f5ae3 /src/Text/Pandoc/Readers/Vimwiki.hs
parent	4ba5ef46aeaf979bd74d8f4a5f6cea116527ddd3 (diff)
download	pandoc-2363e6a15bdde1c206d65461bd2e21f773dbc808.tar.gz