diff options
Diffstat (limited to 'src/Text/Pandoc/Readers')
47 files changed, 2427 insertions, 2307 deletions
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index 368c86d4f..40b6f77c9 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -1,4 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.CommonMark Copyright : Copyright (C) 2015-2019 John MacFarlane @@ -18,9 +20,9 @@ where import Prelude import CMarkGFM import Control.Monad.State -import Data.List (groupBy) import qualified Data.Set as Set -import Data.Text (Text, unpack) +import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Emoji (emojiToInline) @@ -40,24 +42,24 @@ readCommonMark opts s = return $ [ extTable | isEnabled Ext_pipe_tables opts ] ++ [ extAutolink | isEnabled Ext_autolink_bare_uris opts ] -convertEmojis :: String -> [Inline] -convertEmojis s@(':':xs) = - case break (==':') xs of - (ys,':':zs) -> +convertEmojis :: Text -> [Inline] +convertEmojis s@(T.uncons -> Just (':',xs)) = + case T.break (==':') xs of + (ys, T.uncons -> Just (':',zs)) -> case emojiToInline ys of Just em -> em : convertEmojis zs - Nothing -> Str (':' : ys) : convertEmojis (':':zs) + Nothing -> Str (":" <> ys) : convertEmojis (":" <> zs) _ -> [Str s] convertEmojis s = - case break (==':') s of + case T.break (==':') s of ("","") -> [] (_,"") -> [Str s] - (xs,ys) -> Str xs:convertEmojis ys + (xs,ys) -> Str xs : convertEmojis ys addHeaderIdentifiers :: ReaderOptions -> Pandoc -> Pandoc addHeaderIdentifiers opts doc = evalState (walkM (addHeaderId opts) doc) mempty -addHeaderId :: ReaderOptions -> Block -> State (Set.Set String) Block +addHeaderId :: ReaderOptions -> Block -> State (Set.Set Text) Block addHeaderId opts (Header lev (_,classes,kvs) ils) = do ids <- get let ident = uniqueIdent (readerExtensions opts) ils ids @@ -82,14 +84,14 @@ addBlock _ (Node _ THEMATIC_BREAK _) = addBlock opts (Node _ BLOCK_QUOTE nodes) = (BlockQuote (addBlocks opts nodes) :) addBlock opts (Node _ (HTML_BLOCK t) _) - | isEnabled Ext_raw_html opts = (RawBlock (Format "html") (unpack t) :) + | isEnabled Ext_raw_html opts = (RawBlock (Format "html") t :) | otherwise = id -- Note: the cmark parser will never generate CUSTOM_BLOCK, -- so we don't need to handle it: addBlock _ (Node _ (CUSTOM_BLOCK _onEnter _onExit) _nodes) = id addBlock _ (Node _ (CODE_BLOCK info t) _) = - (CodeBlock ("", take 1 (words (unpack info)), []) (unpack t) :) + (CodeBlock ("", take 1 (T.words info), []) t :) addBlock opts (Node _ (HEADING lev) nodes) = (Header lev ("",[],[]) (addInlines opts nodes) :) addBlock opts (Node _ (LIST listAttrs) nodes) = @@ -176,29 +178,28 @@ addInlines opts = foldr (addInline opts) [] addInline :: ReaderOptions -> Node -> [Inline] -> [Inline] addInline opts (Node _ (TEXT t) _) = (foldr ((++) . toinl) [] clumps ++) - where raw = unpack t - clumps = groupBy samekind raw + where clumps = T.groupBy samekind t samekind ' ' ' ' = True samekind ' ' _ = False samekind _ ' ' = False samekind _ _ = True - toinl (' ':_) = [Space] - toinl xs = if isEnabled Ext_emoji opts - then convertEmojis xs - else [Str xs] + toinl (T.uncons -> Just (' ', _)) = [Space] + toinl xs = if isEnabled Ext_emoji opts + then convertEmojis xs + else [Str xs] addInline _ (Node _ LINEBREAK _) = (LineBreak :) addInline opts (Node _ SOFTBREAK _) | isEnabled Ext_hard_line_breaks opts = (LineBreak :) | otherwise = (SoftBreak :) addInline opts (Node _ (HTML_INLINE t) _) - | isEnabled Ext_raw_html opts = (RawInline (Format "html") (unpack t) :) + | isEnabled Ext_raw_html opts = (RawInline (Format "html") t :) | otherwise = id -- Note: the cmark parser will never generate CUSTOM_BLOCK, -- so we don't need to handle it: addInline _ (Node _ (CUSTOM_INLINE _onEnter _onExit) _nodes) = id addInline _ (Node _ (CODE t) _) = - (Code ("",[],[]) (unpack t) :) + (Code ("",[],[]) t :) addInline opts (Node _ EMPH nodes) = (Emph (addInlines opts nodes) :) addInline opts (Node _ STRONG nodes) = @@ -206,7 +207,7 @@ addInline opts (Node _ STRONG nodes) = addInline opts (Node _ STRIKETHROUGH nodes) = (Strikeout (addInlines opts nodes) :) addInline opts (Node _ (LINK url title) nodes) = - (Link nullAttr (addInlines opts nodes) (unpack url, unpack title) :) + (Link nullAttr (addInlines opts nodes) (url, title) :) addInline opts (Node _ (IMAGE url title) nodes) = - (Image nullAttr (addInlines opts nodes) (unpack url, unpack title) :) + (Image nullAttr (addInlines opts nodes) (url, title) :) addInline _ _ = id diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs index ceb63ac84..1aa1dfaa4 100644 --- a/src/Text/Pandoc/Readers/Creole.hs +++ b/src/Text/Pandoc/Readers/Creole.hs @@ -19,6 +19,7 @@ import Control.Monad.Except (guard, liftM2, throwError) import qualified Data.Foldable as F import Data.Maybe (fromMaybe) import Data.Text (Text) +import qualified Data.Text as T import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad (..)) import Text.Pandoc.Definition @@ -70,7 +71,6 @@ parseCreole = do eof return $ B.doc bs - -- -- block parsers -- @@ -92,9 +92,9 @@ nowiki = try $ fmap (B.codeBlock . mconcat) (nowikiStart >> manyTill content nowikiEnd) where content = brackets <|> line - brackets = try $ option "" ((:[]) <$> newline) - <+> (char ' ' >> (many (char ' ') <+> string "}}}") <* eol) - line = option "" ((:[]) <$> newline) <+> manyTill anyChar eol + brackets = try $ option "" (T.singleton <$> newline) + <+> (char ' ' >> (manyChar (char ' ') <+> textStr "}}}") <* eol) + line = option "" (T.singleton <$> newline) <+> manyTillChar anyChar eol eol = lookAhead $ try $ nowikiEnd <|> newline nowikiStart = optional newline >> string "{{{" >> skipMany spaceChar >> newline nowikiEnd = try $ linebreak >> string "}}}" >> skipMany spaceChar >> newline @@ -106,7 +106,7 @@ header = try $ do fmap length (many1 (char '=')) guard $ level <= 6 skipSpaces - content <- B.str <$> manyTill (noneOf "\n") headerEnd + content <- B.str <$> manyTillChar (noneOf "\n") headerEnd return $ B.header level content where headerEnd = try $ skipSpaces >> many (char '=') >> skipSpaces >> newline @@ -204,7 +204,7 @@ inline = choice [ whitespace escapedChar :: PandocMonad m => CRLParser m B.Inlines escapedChar = - fmap (B.str . (:[])) (try $ char '~' >> noneOf "\t\n ") + fmap (B.str . T.singleton) (try $ char '~' >> noneOf "\t\n ") escapedLink :: PandocMonad m => CRLParser m B.Inlines escapedLink = try $ do @@ -217,8 +217,8 @@ image = try $ do (orig, src) <- wikiImg return $ B.image src "" (B.str orig) where - linkSrc = many $ noneOf "|}\n\r\t" - linkDsc = char '|' >> many (noneOf "}\n\r\t") + linkSrc = manyChar $ noneOf "|}\n\r\t" + linkDsc = char '|' >> manyChar (noneOf "}\n\r\t") wikiImg = try $ do string "{{" src <- linkSrc @@ -231,11 +231,11 @@ link = try $ do (orig, src) <- uriLink <|> wikiLink return $ B.link src "" orig where - linkSrc = many $ noneOf "|]\n\r\t" - linkDsc :: PandocMonad m => String -> CRLParser m B.Inlines + linkSrc = manyChar $ noneOf "|]\n\r\t" + linkDsc :: PandocMonad m => Text -> CRLParser m B.Inlines linkDsc otxt = B.str <$> try (option otxt - (char '|' >> many (noneOf "]\n\r\t"))) + (char '|' >> manyChar (noneOf "]\n\r\t"))) linkImg = try $ char '|' >> image wikiLink = try $ do string "[[" @@ -248,7 +248,7 @@ link = try $ do return (B.str orig, src) inlineNowiki :: PandocMonad m => CRLParser m B.Inlines -inlineNowiki = B.code <$> (start >> manyTill (noneOf "\n\r") end) +inlineNowiki = B.code <$> (start >> manyTillChar (noneOf "\n\r") end) where start = try $ string "{{{" end = try $ string "}}}" >> lookAhead (noneOf "}") @@ -271,11 +271,11 @@ linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline) innerNewline = return B.space symbol :: PandocMonad m => CRLParser m B.Inlines -symbol = fmap (B.str . (:[])) (oneOf specialChars) +symbol = fmap (B.str . T.singleton) (oneOf specialChars) str :: PandocMonad m => CRLParser m B.Inlines str = let strChar = noneOf ("\t\n " ++ specialChars) in - fmap B.str (many1 strChar) + fmap B.str (many1Char strChar) bold :: PandocMonad m => CRLParser m B.Inlines bold = B.strong . mconcat <$> diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 3f44f83f8..ade9d27a3 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.DocBook Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -536,20 +537,22 @@ instance Default DBState where readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readDocBook _ inp = do - let tree = normalizeTree . parseXML . handleInstructions - $ T.unpack $ crFilter inp + let tree = normalizeTree . parseXML . handleInstructions $ crFilter inp (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree return $ Pandoc (dbMeta st') (toList . mconcat $ bs) -- We treat <?asciidoc-br?> specially (issue #1236), converting it -- to <br/>, since xml-light doesn't parse the instruction correctly. -- Other xml instructions are simply removed from the input stream. -handleInstructions :: String -> String -handleInstructions ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions xs -handleInstructions xs = case break (=='<') xs of +handleInstructions :: Text -> Text +handleInstructions = T.pack . handleInstructions' . T.unpack + +handleInstructions' :: String -> String +handleInstructions' ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions' xs +handleInstructions' xs = case break (=='<') xs of (ys, []) -> ys - ([], '<':zs) -> '<' : handleInstructions zs - (ys, zs) -> ys ++ handleInstructions zs + ([], '<':zs) -> '<' : handleInstructions' zs + (ys, zs) -> ys ++ handleInstructions' zs getFigure :: PandocMonad m => Element -> DB m Blocks getFigure e = do @@ -580,13 +583,13 @@ convertEntity :: String -> String convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) -- convenience function to get an attribute value, defaulting to "" -attrValue :: String -> Element -> String +attrValue :: String -> Element -> Text attrValue attr elt = - fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) + maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) -- convenience function -named :: String -> Element -> Bool -named s e = qName (elName e) == s +named :: Text -> Element -> Bool +named s e = qName (elName e) == T.unpack s -- @@ -611,7 +614,7 @@ addMetadataFromElement e = do [z] -> getInlines z >>= addMeta fieldname zs -> mapM getInlines zs >>= addMeta fieldname -addMeta :: PandocMonad m => ToMetaValue a => String -> a -> DB m () +addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> DB m () addMeta field val = modify (setMeta field val) instance HasMeta DBState where @@ -638,10 +641,8 @@ admonitionTags :: [String] admonitionTags = ["important","caution","note","tip","warning"] -- Trim leading and trailing newline characters -trimNl :: String -> String -trimNl = reverse . go . reverse . go - where go ('\n':xs) = xs - go xs = xs +trimNl :: Text -> Text +trimNl = T.dropAround (== '\n') -- meld text into beginning of first paragraph of Blocks. -- assumes Blocks start with a Para; if not, does nothing. @@ -668,7 +669,7 @@ getMediaobject e = do h = case atVal "depth" of "" -> [] d -> [("height", d)] - atr = (atVal "id", words $ atVal "role", w ++ h) + atr = (atVal "id", T.words $ atVal "role", w ++ h) in return (atVal "fileref", atr) let getCaption el = case filterChild (\x -> named "caption" x || named "textobject" x @@ -691,8 +692,8 @@ parseBlock :: PandocMonad m => Content -> DB m Blocks parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE parseBlock (Text (CData _ s _)) = if all isSpace s then return mempty - else return $ plain $ trimInlines $ text s -parseBlock (CRef x) = return $ plain $ str $ map toUpper x + else return $ plain $ trimInlines $ text $ T.pack s +parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x parseBlock (Elem e) = case qName (elName e) of "toc" -> skip -- skip TOC, since in pandoc it's autogenerated @@ -740,7 +741,7 @@ parseBlock (Elem e) = "refsect2" -> sect 2 "refsect3" -> sect 3 "refsection" -> gets dbSectionLevel >>= sect . (+1) - l@_ | l `elem` admonitionTags -> parseAdmonition l + l@_ | l `elem` admonitionTags -> parseAdmonition $ T.pack l "area" -> skip "areaset" -> skip "areaspec" -> skip @@ -800,7 +801,7 @@ parseBlock (Elem e) = "subtitle" -> return mempty -- handled in parent element _ -> skip >> getBlocks e where skip = do - lift $ report $ IgnoredElement $ qName (elName e) + lift $ report $ IgnoredElement $ T.pack $ qName (elName e) return mempty parseMixed container conts = do @@ -818,7 +819,7 @@ parseBlock (Elem e) = "" -> [] x -> [x] return $ codeBlockWith (attrValue "id" e, classes', []) - $ trimNl $ strContentRecursive e + $ trimNl $ T.pack $ strContentRecursive e parseBlockquote = do attrib <- case filterChild (named "attribution") e of Nothing -> return mempty @@ -871,9 +872,9 @@ parseBlock (Elem e) = _ -> AlignDefault let toWidth c = case findAttr (unqual "colwidth") c of Just w -> fromMaybe 0 - $ safeRead $ '0': filter (\x -> + $ safeRead $ "0" <> T.filter (\x -> (x >= '0' && x <= '9') - || x == '.') w + || x == '.') (T.pack w) Nothing -> 0 :: Double let numrows = case bodyrows of [] -> 0 @@ -938,9 +939,9 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing elementToStr x = x parseInline :: PandocMonad m => Content -> DB m Inlines -parseInline (Text (CData _ s _)) = return $ text s +parseInline (Text (CData _ s _)) = return $ text $ T.pack s parseInline (CRef ref) = - return $ maybe (text $ map toUpper ref) text $ lookupEntity ref + return $ maybe (text $ T.toUpper $ T.pack ref) (text . T.pack) $ lookupEntity ref parseInline (Elem e) = case qName (elName e) of "equation" -> equation e displayMath @@ -980,7 +981,7 @@ parseInline (Elem e) = "constant" -> codeWithLang "userinput" -> codeWithLang "varargs" -> return $ code "(...)" - "keycap" -> return (str $ strContent e) + "keycap" -> return (str $ T.pack $ strContent e) "keycombo" -> keycombo <$> mapM parseInline (elContent e) "menuchoice" -> menuchoice <$> @@ -992,20 +993,20 @@ parseInline (Elem e) = let title = case attrValue "endterm" e of "" -> maybe "???" xrefTitleByElem (findElementById linkend content) - endterm -> maybe "???" strContent + endterm -> maybe "???" (T.pack . strContent) (findElementById endterm content) - return $ link ('#' : linkend) "" (text title) - "email" -> return $ link ("mailto:" ++ strContent e) "" - $ str $ strContent e - "uri" -> return $ link (strContent e) "" $ str $ strContent e + return $ link ("#" <> linkend) "" (text title) + "email" -> return $ link ("mailto:" <> T.pack (strContent e)) "" + $ str $ T.pack $ strContent e + "uri" -> return $ link (T.pack $ strContent e) "" $ str $ T.pack $ strContent e "ulink" -> link (attrValue "url" e) "" <$> innerInlines "link" -> do ils <- innerInlines let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of - Just h -> h - _ -> '#' : attrValue "linkend" e + Just h -> T.pack h + _ -> "#" <> attrValue "linkend" e let ils' = if ils == mempty then str href else ils - let attr = (attrValue "id" e, words $ attrValue "role" e, []) + let attr = (attrValue "id" e, T.words $ attrValue "role" e, []) return $ linkWith attr href "" ils' "foreignphrase" -> emph <$> innerInlines "emphasis" -> case attrValue "role" e of @@ -1023,7 +1024,7 @@ parseInline (Elem e) = "br" -> return linebreak _ -> skip >> innerInlines where skip = do - lift $ report $ IgnoredElement $ qName (elName e) + lift $ report $ IgnoredElement $ T.pack $ qName (elName e) return mempty innerInlines = (trimInlines . mconcat) <$> @@ -1032,7 +1033,7 @@ parseInline (Elem e) = let classes' = case attrValue "language" e of "" -> [] l -> [l] - return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e + return $ codeWith (attrValue "id" e,classes',[]) $ T.pack $ strContentRecursive e simpleList = (mconcat . intersperse (str "," <> space)) <$> mapM getInlines (filterChildren (named "member") e) segmentedList = do @@ -1062,8 +1063,8 @@ parseInline (Elem e) = -- if there's no such attribute, employ some heuristics based on what -- docbook-xsl does. xrefTitleByElem el - | not (null xrefLabel) = xrefLabel - | otherwise = case qName (elName el) of + | not (T.null xrefLabel) = xrefLabel + | otherwise = case qName (elName el) of "chapter" -> descendantContent "title" el "section" -> descendantContent "title" el "sect1" -> descendantContent "title" el @@ -1073,10 +1074,10 @@ parseInline (Elem e) = "sect5" -> descendantContent "title" el "cmdsynopsis" -> descendantContent "command" el "funcsynopsis" -> descendantContent "function" el - _ -> qName (elName el) ++ "_title" + _ -> T.pack $ qName (elName el) ++ "_title" where xrefLabel = attrValue "xreflabel" el - descendantContent name = maybe "???" strContent + descendantContent name = maybe "???" (T.pack . strContent) . filterElementName (\n -> qName n == name) -- | Extract a math equation from an element @@ -1088,20 +1089,20 @@ equation :: Monad m => Element -- ^ The element from which to extract a mathematical equation - -> (String -> Inlines) + -> (Text -> Inlines) -- ^ A constructor for some Inlines, taking the TeX code as input -> m Inlines equation e constructor = - return $ mconcat $ map constructor $ mathMLEquations ++ latexEquations + return $ mconcat $ map constructor $ mathMLEquations <> latexEquations where - mathMLEquations :: [String] + mathMLEquations :: [Text] mathMLEquations = map writeTeX $ rights $ readMath (\x -> qName (elName x) == "math" && qPrefix (elName x) == Just "mml") - (readMathML . showElement) + (readMathML . T.pack . showElement) - latexEquations :: [String] + latexEquations :: [Text] latexEquations = readMath (\x -> qName (elName x) == "mathphrase") - (concat . fmap showVerbatimCData . elContent) + (T.concat . fmap showVerbatimCData . elContent) readMath :: (Element -> Bool) -> (Element -> b) -> [b] readMath childPredicate fromElement = @@ -1111,9 +1112,10 @@ equation e constructor = -- | Get the actual text stored in a CData block. 'showContent' -- returns the text still surrounded by the [[CDATA]] tags. -showVerbatimCData :: Content -> String -showVerbatimCData (Text (CData _ d _)) = d -showVerbatimCData c = showContent c +showVerbatimCData :: Content -> Text +showVerbatimCData (Text (CData _ d _)) = T.pack d +showVerbatimCData c = T.pack $ showContent c + -- | Set the prefix of a name to 'Nothing' removePrefix :: QName -> QName diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 387c3c7e2..cd4ff01db 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -4,6 +4,7 @@ {-# LANGUAGE PatternGuards #-} {-# LANGUAGE MultiWayIf #-} {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.Docx Copyright : Copyright (C) 2014-2019 Jesse Rosenthal @@ -68,12 +69,12 @@ import Data.Default (Default) import Data.List (delete, intersect) import Data.Char (isSpace) import qualified Data.Map as M +import qualified Data.Text as T import Data.Maybe (isJust, fromMaybe) import Data.Sequence (ViewL (..), viewl) import qualified Data.Sequence as Seq import qualified Data.Set as Set import Text.Pandoc.Builder --- import Text.Pandoc.Definition import Text.Pandoc.MediaBag (MediaBag) import Text.Pandoc.Options import Text.Pandoc.Readers.Docx.Combine @@ -101,14 +102,14 @@ readDocx opts bytes readDocx _ _ = throwError $ PandocSomeError "couldn't parse docx file" -data DState = DState { docxAnchorMap :: M.Map String String - , docxAnchorSet :: Set.Set String - , docxImmedPrevAnchor :: Maybe String +data DState = DState { docxAnchorMap :: M.Map T.Text T.Text + , docxAnchorSet :: Set.Set T.Text + , docxImmedPrevAnchor :: Maybe T.Text , docxMediaBag :: MediaBag , docxDropCap :: Inlines -- keep track of (numId, lvl) values for -- restarting - , docxListState :: M.Map (String, String) Integer + , docxListState :: M.Map (T.Text, T.Text) Integer , docxPrevPara :: Inlines } @@ -142,7 +143,7 @@ spansToKeep = [] divsToKeep :: [ParaStyleName] divsToKeep = ["Definition", "Definition Term"] -metaStyles :: M.Map ParaStyleName String +metaStyles :: M.Map ParaStyleName T.Text metaStyles = M.fromList [ ("Title", "title") , ("Subtitle", "subtitle") , ("Author", "author") @@ -167,7 +168,7 @@ isEmptyPar (Paragraph _ parParts) = isEmptyElem _ = True isEmptyPar _ = False -bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map String MetaValue) +bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map T.Text MetaValue) bodyPartsToMeta' [] = return M.empty bodyPartsToMeta' (bp : bps) | (Paragraph pPr parParts) <- bp @@ -232,22 +233,22 @@ runElemToInlines Tab = space runElemToInlines SoftHyphen = text "\xad" runElemToInlines NoBreakHyphen = text "\x2011" -runElemToString :: RunElem -> String -runElemToString (TextRun s) = s -runElemToString LnBrk = ['\n'] -runElemToString Tab = ['\t'] -runElemToString SoftHyphen = ['\xad'] -runElemToString NoBreakHyphen = ['\x2011'] +runElemToText :: RunElem -> T.Text +runElemToText (TextRun s) = s +runElemToText LnBrk = T.singleton '\n' +runElemToText Tab = T.singleton '\t' +runElemToText SoftHyphen = T.singleton '\xad' +runElemToText NoBreakHyphen = T.singleton '\x2011' -runToString :: Run -> String -runToString (Run _ runElems) = concatMap runElemToString runElems -runToString _ = "" +runToText :: Run -> T.Text +runToText (Run _ runElems) = T.concat $ map runElemToText runElems +runToText _ = "" -parPartToString :: ParPart -> String -parPartToString (PlainRun run) = runToString run -parPartToString (InternalHyperLink _ runs) = concatMap runToString runs -parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs -parPartToString _ = "" +parPartToText :: ParPart -> T.Text +parPartToText (PlainRun run) = runToText run +parPartToText (InternalHyperLink _ runs) = T.concat $ map runToText runs +parPartToText (ExternalHyperLink _ runs) = T.concat $ map runToText runs +parPartToText _ = "" blacklistedCharStyles :: [CharStyleName] blacklistedCharStyles = ["Hyperlink"] @@ -310,7 +311,7 @@ runToInlines :: PandocMonad m => Run -> DocxContext m Inlines runToInlines (Run rs runElems) | maybe False isCodeCharStyle $ rParentStyle rs = do rPr <- resolveDependentRunStyle rs - let codeString = code $ concatMap runElemToString runElems + let codeString = code $ T.concat $ map runElemToText runElems return $ case rVertAlign rPr of Just SupScrpt -> superscript codeString Just SubScrpt -> subscript codeString @@ -328,17 +329,17 @@ runToInlines (Endnote bps) = do return $ note blksList runToInlines (InlineDrawing fp title alt bs ext) = do (lift . lift) $ P.insertMedia fp Nothing bs - return $ imageWith (extentToAttr ext) fp title $ text alt + return $ imageWith (extentToAttr ext) (T.pack fp) title $ text alt runToInlines InlineChart = return $ spanWith ("", ["chart"], []) $ text "[CHART]" extentToAttr :: Extent -> Attr extentToAttr (Just (w, h)) = ("", [], [("width", showDim w), ("height", showDim h)] ) where - showDim d = show (d / 914400) ++ "in" + showDim d = tshow (d / 914400) <> "in" extentToAttr _ = nullAttr -blocksToInlinesWarn :: PandocMonad m => String -> Blocks -> DocxContext m Inlines +blocksToInlinesWarn :: PandocMonad m => T.Text -> Blocks -> DocxContext m Inlines blocksToInlinesWarn cmtId blks = do let blkList = toList blks notParaOrPlain :: Block -> Bool @@ -347,7 +348,7 @@ blocksToInlinesWarn cmtId blks = do notParaOrPlain _ = True unless ( not (any notParaOrPlain blkList)) $ lift $ P.report $ DocxParserWarning $ - "Docx comment " ++ cmtId ++ " will not retain formatting" + "Docx comment " <> cmtId <> " will not retain formatting" return $ blocksToInlines' blkList -- The majority of work in this function is done in the primed @@ -440,12 +441,12 @@ parPartToInlines' (BookMark _ anchor) = return $ spanWith (newAnchor, ["anchor"], []) mempty parPartToInlines' (Drawing fp title alt bs ext) = do (lift . lift) $ P.insertMedia fp Nothing bs - return $ imageWith (extentToAttr ext) fp title $ text alt + return $ imageWith (extentToAttr ext) (T.pack fp) title $ text alt parPartToInlines' Chart = return $ spanWith ("", ["chart"], []) $ text "[CHART]" parPartToInlines' (InternalHyperLink anchor runs) = do ils <- smushInlines <$> mapM runToInlines runs - return $ link ('#' : anchor) "" ils + return $ link ("#" <> anchor) "" ils parPartToInlines' (ExternalHyperLink target runs) = do ils <- smushInlines <$> mapM runToInlines runs return $ link target "" ils @@ -463,7 +464,7 @@ isAnchorSpan (Span (_, classes, kvs) _) = null kvs isAnchorSpan _ = False -dummyAnchors :: [String] +dummyAnchors :: [T.Text] dummyAnchors = ["_GoBack"] makeHeaderAnchor :: PandocMonad m => Blocks -> DocxContext m Blocks @@ -477,7 +478,7 @@ makeHeaderAnchor' (Header n (ident, classes, kvs) ils) , (Span (anchIdent, ["anchor"], _) cIls) <- c = do hdrIDMap <- gets docxAnchorMap exts <- readerExtensions <$> asks docxOptions - let newIdent = if null ident + let newIdent = if T.null ident then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap) else ident newIls = concatMap f ils where f il | il == c = cIls @@ -490,7 +491,7 @@ makeHeaderAnchor' (Header n (ident, classes, kvs) ils) = do hdrIDMap <- gets docxAnchorMap exts <- readerExtensions <$> asks docxOptions - let newIdent = if null ident + let newIdent = if T.null ident then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap) else ident modify $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap} @@ -558,8 +559,8 @@ parStyleToTransform pPr else transform parStyleToTransform _ = return id -normalizeToClassName :: (FromStyleName a) => a -> String -normalizeToClassName = map go . fromStyleName +normalizeToClassName :: (FromStyleName a) => a -> T.Text +normalizeToClassName = T.map go . fromStyleName where go c | isSpace c = '-' | otherwise = c @@ -574,7 +575,8 @@ bodyPartToBlocks (Paragraph pPr parparts) return $ transform $ codeBlock $ - concatMap parPartToString parparts + T.concat $ + map parPartToText parparts | Just (style, n) <- pHeading pPr = do ils <-local (\s-> s{docxInHeaderBlock=True}) (smushInlines <$> mapM parPartToInlines parparts) @@ -646,7 +648,7 @@ bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do , ("num-id", numId) , ("format", fmt) , ("text", txt) - , ("start", show start) + , ("start", tshow start) ] modify $ \st -> st{ docxListState = -- expire all the continuation data for lists of level > this one: @@ -705,12 +707,12 @@ bodyPartToBlocks (OMathPara e) = -- replace targets with generated anchors. rewriteLink' :: PandocMonad m => Inline -> DocxContext m Inline -rewriteLink' l@(Link attr ils ('#':target, title)) = do +rewriteLink' l@(Link attr ils (T.uncons -> Just ('#',target), title)) = do anchorMap <- gets docxAnchorMap case M.lookup target anchorMap of Just newTarget -> do modify $ \s -> s{docxAnchorSet = Set.insert newTarget (docxAnchorSet s)} - return $ Link attr ils ('#':newTarget, title) + return $ Link attr ils ("#" <> newTarget, title) Nothing -> do modify $ \s -> s{docxAnchorSet = Set.insert target (docxAnchorSet s)} return l diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs index da40a80ea..82791d669 100644 --- a/src/Text/Pandoc/Readers/Docx/Combine.hs +++ b/src/Text/Pandoc/Readers/Docx/Combine.hs @@ -1,7 +1,8 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE TypeSynonymInstances #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.Combine Copyright : © 2014-2019 Jesse Rosenthal <jrosenthal@jhu.edu>, diff --git a/src/Text/Pandoc/Readers/Docx/Fields.hs b/src/Text/Pandoc/Readers/Docx/Fields.hs index e7a916f1c..05d9dd697 100644 --- a/src/Text/Pandoc/Readers/Docx/Fields.hs +++ b/src/Text/Pandoc/Readers/Docx/Fields.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.Fields Copyright : Copyright (C) 2014-2019 Jesse Rosenthal @@ -16,16 +17,18 @@ module Text.Pandoc.Readers.Docx.Fields ( FieldInfo(..) ) where import Prelude +import Data.Functor (($>)) +import qualified Data.Text as T import Text.Parsec -import Text.Parsec.String (Parser) +import Text.Parsec.Text (Parser) -type URL = String +type URL = T.Text data FieldInfo = HyperlinkField URL | UnknownField deriving (Show) -parseFieldInfo :: String -> Either ParseError FieldInfo +parseFieldInfo :: T.Text -> Either ParseError FieldInfo parseFieldInfo = parse fieldInfo "" fieldInfo :: Parser FieldInfo @@ -34,31 +37,31 @@ fieldInfo = <|> return UnknownField -escapedQuote :: Parser String -escapedQuote = string "\\\"" +escapedQuote :: Parser T.Text +escapedQuote = string "\\\"" $> "\\\"" -inQuotes :: Parser String +inQuotes :: Parser T.Text inQuotes = - (try escapedQuote) <|> (anyChar >>= (\c -> return [c])) + (try escapedQuote) <|> (anyChar >>= (\c -> return $ T.singleton c)) -quotedString :: Parser String +quotedString :: Parser T.Text quotedString = do char '"' - concat <$> manyTill inQuotes (try (char '"')) + T.concat <$> manyTill inQuotes (try (char '"')) -unquotedString :: Parser String -unquotedString = manyTill anyChar (try $ lookAhead space *> return () <|> eof) +unquotedString :: Parser T.Text +unquotedString = T.pack <$> manyTill anyChar (try $ lookAhead space *> return () <|> eof) -fieldArgument :: Parser String +fieldArgument :: Parser T.Text fieldArgument = quotedString <|> unquotedString -- there are other switches, but this is the only one I've seen in the wild so far, so it's the first one I'll implement. See §17.16.5.25 -hyperlinkSwitch :: Parser (String, String) +hyperlinkSwitch :: Parser (T.Text, T.Text) hyperlinkSwitch = do sw <- string "\\l" spaces farg <- fieldArgument - return (sw, farg) + return (T.pack sw, farg) hyperlink :: Parser URL hyperlink = do @@ -68,6 +71,6 @@ hyperlink = do farg <- fieldArgument switches <- spaces *> many hyperlinkSwitch let url = case switches of - ("\\l", s) : _ -> farg ++ ('#': s) + ("\\l", s) : _ -> farg <> "#" <> s _ -> farg return url diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs index eb24640c5..b7b7a3835 100644 --- a/src/Text/Pandoc/Readers/Docx/Lists.hs +++ b/src/Text/Pandoc/Readers/Docx/Lists.hs @@ -22,6 +22,7 @@ import Prelude import Data.List import Data.Maybe import Data.String (fromString) +import qualified Data.Text as T import Text.Pandoc.Generic (bottomUp) import Text.Pandoc.JSON import Text.Pandoc.Readers.Docx.Parse (ParaStyleName) @@ -45,20 +46,20 @@ getNumId _ = Nothing getNumIdN :: Block -> Integer getNumIdN b = fromMaybe (-1) (getNumId b) -getText :: Block -> Maybe String +getText :: Block -> Maybe T.Text getText (Div (_, _, kvs) _) = lookup "text" kvs getText _ = Nothing data ListType = Itemized | Enumerated ListAttributes -listStyleMap :: [(String, ListNumberStyle)] +listStyleMap :: [(T.Text, ListNumberStyle)] listStyleMap = [("upperLetter", UpperAlpha), ("lowerLetter", LowerAlpha), ("upperRoman", UpperRoman), ("lowerRoman", LowerRoman), ("decimal", Decimal)] -listDelimMap :: [(String, ListNumberDelim)] +listDelimMap :: [(T.Text, ListNumberDelim)] listDelimMap = [("%1)", OneParen), ("(%1)", TwoParens), ("%1.", Period)] @@ -82,11 +83,11 @@ getListType b@(Div (_, _, kvs) _) | isListItem b = _ -> Nothing getListType _ = Nothing -listParagraphDivs :: [String] +listParagraphDivs :: [T.Text] listParagraphDivs = ["list-paragraph"] listParagraphStyles :: [ParaStyleName] -listParagraphStyles = map fromString listParagraphDivs +listParagraphStyles = map (fromString . T.unpack) listParagraphDivs -- This is a first stab at going through and attaching meaning to list -- paragraphs, without an item marker, following a list item. We diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 889bd80fc..8598ada6f 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.Parse Copyright : Copyright (C) 2014-2019 Jesse Rosenthal @@ -63,6 +64,7 @@ import qualified Data.ByteString.Lazy as B import Data.Char (chr, ord, readLitChar) import Data.List import qualified Data.Map as M +import qualified Data.Text as T import Data.Maybe import System.FilePath import Text.Pandoc.Readers.Docx.Util @@ -71,7 +73,7 @@ import Text.Pandoc.Shared (filteredFilesFromArchive, safeRead) import qualified Text.Pandoc.UTF8 as UTF8 import Text.TeXMath (Exp) import Text.TeXMath.Readers.OMML (readOMML) -import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, stringToFont) +import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont) import Text.XML.Light import qualified Text.XML.Light.Cursor as XMLC @@ -88,7 +90,7 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes } deriving Show -data ReaderState = ReaderState { stateWarnings :: [String] +data ReaderState = ReaderState { stateWarnings :: [T.Text] , stateFldCharState :: FldCharState } deriving Show @@ -119,7 +121,6 @@ eitherToD (Left _) = throwError DocxError concatMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b] concatMapM f xs = liftM concat (mapM f xs) - -- This is similar to `mapMaybe`: it maps a function returning the D -- monad over a list, and only keeps the non-erroring return values. mapD :: (a -> D b) -> [a] -> D [b] @@ -178,18 +179,18 @@ type ParStyleMap = M.Map ParaStyleId ParStyle data Numbering = Numbering NameSpaces [Numb] [AbstractNumb] deriving Show -data Numb = Numb String String [LevelOverride] +data Numb = Numb T.Text T.Text [LevelOverride] deriving Show -- ilvl startOverride lvl -data LevelOverride = LevelOverride String (Maybe Integer) (Maybe Level) +data LevelOverride = LevelOverride T.Text (Maybe Integer) (Maybe Level) deriving Show -data AbstractNumb = AbstractNumb String [Level] +data AbstractNumb = AbstractNumb T.Text [Level] deriving Show -- ilvl format string start -data Level = Level String String String (Maybe Integer) +data Level = Level T.Text T.Text T.Text (Maybe Integer) deriving Show data DocumentLocation = InDocument | InFootnote | InEndnote @@ -199,11 +200,11 @@ data Relationship = Relationship DocumentLocation RelId Target deriving Show data Notes = Notes NameSpaces - (Maybe (M.Map String Element)) - (Maybe (M.Map String Element)) + (Maybe (M.Map T.Text Element)) + (Maybe (M.Map T.Text Element)) deriving Show -data Comments = Comments NameSpaces (M.Map String Element) +data Comments = Comments NameSpaces (M.Map T.Text Element) deriving Show data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer @@ -238,8 +239,8 @@ defaultParagraphStyle = ParagraphStyle { pStyle = [] data BodyPart = Paragraph ParagraphStyle [ParPart] - | ListItem ParagraphStyle String String (Maybe Level) [ParPart] - | Tbl String TblGrid TblLook [Row] + | ListItem ParagraphStyle T.Text T.Text (Maybe Level) [ParPart] + | Tbl T.Text TblGrid TblLook [Row] | OMathPara [Exp] deriving Show @@ -279,7 +280,7 @@ data ParPart = PlainRun Run | BookMark BookMarkId Anchor | InternalHyperLink Anchor [Run] | ExternalHyperLink URL [Run] - | Drawing FilePath String String B.ByteString Extent -- title, alt + | Drawing FilePath T.Text T.Text B.ByteString Extent -- title, alt | Chart -- placeholder for now | PlainOMath [Exp] | Field FieldInfo [Run] @@ -290,28 +291,28 @@ data ParPart = PlainRun Run data Run = Run RunStyle [RunElem] | Footnote [BodyPart] | Endnote [BodyPart] - | InlineDrawing FilePath String String B.ByteString Extent -- title, alt + | InlineDrawing FilePath T.Text T.Text B.ByteString Extent -- title, alt | InlineChart -- placeholder deriving Show -data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen +data RunElem = TextRun T.Text | LnBrk | Tab | SoftHyphen | NoBreakHyphen deriving Show -type Target = String -type Anchor = String -type URL = String -type BookMarkId = String -type RelId = String -type ChangeId = String -type CommentId = String -type Author = String -type ChangeDate = String -type CommentDate = String +type Target = T.Text +type Anchor = T.Text +type URL = T.Text +type BookMarkId = T.Text +type RelId = T.Text +type ChangeId = T.Text +type CommentId = T.Text +type Author = T.Text +type ChangeDate = T.Text +type CommentDate = T.Text archiveToDocx :: Archive -> Either DocxError Docx archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive -archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [String]) +archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [T.Text]) archiveToDocxWithWarnings archive = do docXmlPath <- case getDocumentXmlPath archive of Just fp -> Right fp @@ -341,7 +342,7 @@ archiveToDocxWithWarnings archive = do Right doc -> Right (Docx doc, stateWarnings st) Left e -> Left e -getDocumentXmlPath :: Archive -> Maybe String +getDocumentXmlPath :: Archive -> Maybe FilePath getDocumentXmlPath zf = do entry <- findEntryByPath "_rels/.rels" zf relsElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry @@ -394,7 +395,7 @@ constructBogusParStyleData stName = ParStyle , numInfo = Nothing , psParentStyle = Nothing , pStyleName = stName - , pStyleId = ParaStyleId . filter (/=' ') . fromStyleName $ stName + , pStyleId = ParaStyleId . T.filter (/=' ') . fromStyleName $ stName } archiveToNotes :: Archive -> Notes @@ -441,8 +442,8 @@ filePathToRelType path docXmlPath = relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship relElemToRelationship relType element | qName (elName element) == "Relationship" = do - relId <- findAttr (QName "Id" Nothing Nothing) element - target <- findAttr (QName "Target" Nothing Nothing) element + relId <- findAttrText (QName "Id" Nothing Nothing) element + target <- findAttrText (QName "Target" Nothing Nothing) element return $ Relationship relType relId target relElemToRelationship _ _ = Nothing @@ -464,7 +465,7 @@ filePathIsMedia fp = in (dir == "word/media/") -lookupLevel :: String -> String -> Numbering -> Maybe Level +lookupLevel :: T.Text -> T.Text -> Numbering -> Maybe Level lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do (absNumId, ovrrides) <- lookup numId $ map (\(Numb nid absnumid ovrRides) -> (nid, (absnumid, ovrRides))) numbs @@ -483,7 +484,7 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do loElemToLevelOverride :: NameSpaces -> Element -> Maybe LevelOverride loElemToLevelOverride ns element | isElem ns "w" "lvlOverride" element = do - ilvl <- findAttrByName ns "w" "ilvl" element + ilvl <- findAttrTextByName ns "w" "ilvl" element let startOverride = findChildByName ns "w" "startOverride" element >>= findAttrByName ns "w" "val" >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) @@ -495,9 +496,9 @@ loElemToLevelOverride _ _ = Nothing numElemToNum :: NameSpaces -> Element -> Maybe Numb numElemToNum ns element | isElem ns "w" "num" element = do - numId <- findAttrByName ns "w" "numId" element + numId <- findAttrTextByName ns "w" "numId" element absNumId <- findChildByName ns "w" "abstractNumId" element - >>= findAttrByName ns "w" "val" + >>= findAttrTextByName ns "w" "val" let lvlOverrides = mapMaybe (loElemToLevelOverride ns) (findChildrenByName ns "w" "lvlOverride" element) @@ -507,7 +508,7 @@ numElemToNum _ _ = Nothing absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb absNumElemToAbsNum ns element | isElem ns "w" "abstractNum" element = do - absNumId <- findAttrByName ns "w" "abstractNumId" element + absNumId <- findAttrTextByName ns "w" "abstractNumId" element let levelElems = findChildrenByName ns "w" "lvl" element levels = mapMaybe (levelElemToLevel ns) levelElems return $ AbstractNumb absNumId levels @@ -516,11 +517,11 @@ absNumElemToAbsNum _ _ = Nothing levelElemToLevel :: NameSpaces -> Element -> Maybe Level levelElemToLevel ns element | isElem ns "w" "lvl" element = do - ilvl <- findAttrByName ns "w" "ilvl" element + ilvl <- findAttrTextByName ns "w" "ilvl" element fmt <- findChildByName ns "w" "numFmt" element - >>= findAttrByName ns "w" "val" + >>= findAttrTextByName ns "w" "val" txt <- findChildByName ns "w" "lvlText" element - >>= findAttrByName ns "w" "val" + >>= findAttrTextByName ns "w" "val" let start = findChildByName ns "w" "start" element >>= findAttrByName ns "w" "val" >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) @@ -544,11 +545,11 @@ archiveToNumbering :: Archive -> Numbering archiveToNumbering archive = fromMaybe (Numbering [] [] []) (archiveToNumbering' archive) -elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map String Element) +elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map T.Text Element) elemToNotes ns notetype element - | isElem ns "w" (notetype ++ "s") element = + | isElem ns "w" (notetype <> "s") element = let pairs = mapMaybe - (\e -> findAttrByName ns "w" "id" e >>= + (\e -> findAttrTextByName ns "w" "id" e >>= (\a -> Just (a, e))) (findChildrenByName ns "w" notetype element) in @@ -556,11 +557,11 @@ elemToNotes ns notetype element M.fromList pairs elemToNotes _ _ _ = Nothing -elemToComments :: NameSpaces -> Element -> M.Map String Element +elemToComments :: NameSpaces -> Element -> M.Map T.Text Element elemToComments ns element | isElem ns "w" "comments" element = let pairs = mapMaybe - (\e -> findAttrByName ns "w" "id" e >>= + (\e -> findAttrTextByName ns "w" "id" e >>= (\a -> Just (a, e))) (findChildrenByName ns "w" "comment" element) in @@ -632,7 +633,7 @@ testBitMask bitMaskS n = pHeading :: ParagraphStyle -> Maybe (ParaStyleName, Int) pHeading = getParStyleField headingLev . pStyle -pNumInfo :: ParagraphStyle -> Maybe (String, String) +pNumInfo :: ParagraphStyle -> Maybe (T.Text, T.Text) pNumInfo = getParStyleField numInfo . pStyle elemToBodyPart :: NameSpaces -> Element -> D BodyPart @@ -640,7 +641,7 @@ elemToBodyPart ns element | isElem ns "w" "p" element , (c:_) <- findChildrenByName ns "m" "oMathPara" element = do - expsLst <- eitherToD $ readOMML $ showElement c + expsLst <- eitherToD $ readOMML $ T.pack $ showElement c return $ OMathPara expsLst elemToBodyPart ns element | isElem ns "w" "p" element @@ -664,7 +665,7 @@ elemToBodyPart ns element | isElem ns "w" "tbl" element = do let caption' = findChildByName ns "w" "tblPr" element >>= findChildByName ns "w" "tblCaption" - >>= findAttrByName ns "w" "val" + >>= findAttrTextByName ns "w" "val" caption = fromMaybe "" caption' grid' = case findChildByName ns "w" "tblGrid" element of Just g -> elemToTblGrid ns g @@ -687,10 +688,10 @@ lookupRelationship docLocation relid rels = where pairs = map (\(Relationship loc relid' target) -> ((loc, relid'), target)) rels -expandDrawingId :: String -> D (FilePath, B.ByteString) +expandDrawingId :: T.Text -> D (FilePath, B.ByteString) expandDrawingId s = do location <- asks envLocation - target <- asks (lookupRelationship location s . envRelationships) + target <- asks (fmap T.unpack . lookupRelationship location s . envRelationships) case target of Just filepath -> do bytes <- asks (lookup ("word/" ++ filepath) . envMedia) @@ -699,12 +700,12 @@ expandDrawingId s = do Nothing -> throwError DocxError Nothing -> throwError DocxError -getTitleAndAlt :: NameSpaces -> Element -> (String, String) +getTitleAndAlt :: NameSpaces -> Element -> (T.Text, T.Text) getTitleAndAlt ns element = let mbDocPr = findChildByName ns "wp" "inline" element >>= findChildByName ns "wp" "docPr" - title = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "title") - alt = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "descr") + title = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "title") + alt = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "descr") in (title, alt) elemToParPart :: NameSpaces -> Element -> D ParPart @@ -716,7 +717,7 @@ elemToParPart ns element = let (title, alt) = getTitleAndAlt ns drawingElem a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem - >>= findAttrByName ns "r" "embed" + >>= findAttrTextByName ns "r" "embed" in case drawing of Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem) @@ -726,7 +727,7 @@ elemToParPart ns element | isElem ns "w" "r" element , Just _ <- findChildByName ns "w" "pict" element = let drawing = findElement (elemName ns "v" "imagedata") element - >>= findAttrByName ns "r" "id" + >>= findAttrTextByName ns "r" "id" in case drawing of -- Todo: check out title and attr for deprecated format. @@ -795,7 +796,7 @@ elemToParPart ns element fldCharState <- gets stateFldCharState case fldCharState of FldCharOpen -> do - info <- eitherToD $ parseFieldInfo $ strContent instrText + info <- eitherToD $ parseFieldInfo $ T.pack $ strContent instrText modify $ \st -> st{stateFldCharState = FldCharFieldInfo info} return NullParPart _ -> return NullParPart @@ -816,56 +817,56 @@ elemToParPart ns element return $ ChangedRuns change runs elemToParPart ns element | isElem ns "w" "bookmarkStart" element - , Just bmId <- findAttrByName ns "w" "id" element - , Just bmName <- findAttrByName ns "w" "name" element = + , Just bmId <- findAttrTextByName ns "w" "id" element + , Just bmName <- findAttrTextByName ns "w" "name" element = return $ BookMark bmId bmName elemToParPart ns element | isElem ns "w" "hyperlink" element - , Just relId <- findAttrByName ns "r" "id" element = do + , Just relId <- findAttrTextByName ns "r" "id" element = do location <- asks envLocation runs <- mapD (elemToRun ns) (elChildren element) rels <- asks envRelationships case lookupRelationship location relId rels of Just target -> - case findAttrByName ns "w" "anchor" element of - Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs + case findAttrTextByName ns "w" "anchor" element of + Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) runs Nothing -> return $ ExternalHyperLink target runs Nothing -> return $ ExternalHyperLink "" runs elemToParPart ns element | isElem ns "w" "hyperlink" element - , Just anchor <- findAttrByName ns "w" "anchor" element = do + , Just anchor <- findAttrTextByName ns "w" "anchor" element = do runs <- mapD (elemToRun ns) (elChildren element) return $ InternalHyperLink anchor runs elemToParPart ns element | isElem ns "w" "commentRangeStart" element - , Just cmtId <- findAttrByName ns "w" "id" element = do + , Just cmtId <- findAttrTextByName ns "w" "id" element = do (Comments _ commentMap) <- asks envComments case M.lookup cmtId commentMap of Just cmtElem -> elemToCommentStart ns cmtElem Nothing -> throwError WrongElem elemToParPart ns element | isElem ns "w" "commentRangeEnd" element - , Just cmtId <- findAttrByName ns "w" "id" element = + , Just cmtId <- findAttrTextByName ns "w" "id" element = return $ CommentEnd cmtId elemToParPart ns element | isElem ns "m" "oMath" element = - fmap PlainOMath (eitherToD $ readOMML $ showElement element) + fmap PlainOMath (eitherToD $ readOMML $ T.pack $ showElement element) elemToParPart _ _ = throwError WrongElem elemToCommentStart :: NameSpaces -> Element -> D ParPart elemToCommentStart ns element | isElem ns "w" "comment" element - , Just cmtId <- findAttrByName ns "w" "id" element - , Just cmtAuthor <- findAttrByName ns "w" "author" element - , Just cmtDate <- findAttrByName ns "w" "date" element = do + , Just cmtId <- findAttrTextByName ns "w" "id" element + , Just cmtAuthor <- findAttrTextByName ns "w" "author" element + , Just cmtDate <- findAttrTextByName ns "w" "date" element = do bps <- mapD (elemToBodyPart ns) (elChildren element) return $ CommentStart cmtId cmtAuthor cmtDate bps elemToCommentStart _ _ = throwError WrongElem -lookupFootnote :: String -> Notes -> Maybe Element +lookupFootnote :: T.Text -> Notes -> Maybe Element lookupFootnote s (Notes _ fns _) = fns >>= M.lookup s -lookupEndnote :: String -> Notes -> Maybe Element +lookupEndnote :: T.Text -> Notes -> Maybe Element lookupEndnote s (Notes _ _ ens) = ens >>= M.lookup s elemToExtent :: Element -> Extent @@ -876,7 +877,7 @@ elemToExtent drawingElem = where wp_ns = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem - >>= findAttr (QName at Nothing Nothing) >>= safeRead + >>= findAttr (QName at Nothing Nothing) >>= safeRead . T.pack childElemToRun :: NameSpaces -> Element -> D Run @@ -887,7 +888,7 @@ childElemToRun ns element = let (title, alt) = getTitleAndAlt ns element a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem - >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) + >>= findAttrText (QName "embed" (lookup "r" ns) (Just "r")) in case drawing of Just s -> expandDrawingId s >>= @@ -900,7 +901,7 @@ childElemToRun ns element = return InlineChart childElemToRun ns element | isElem ns "w" "footnoteReference" element - , Just fnId <- findAttrByName ns "w" "id" element = do + , Just fnId <- findAttrTextByName ns "w" "id" element = do notes <- asks envNotes case lookupFootnote fnId notes of Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e) @@ -908,7 +909,7 @@ childElemToRun ns element Nothing -> return $ Footnote [] childElemToRun ns element | isElem ns "w" "endnoteReference" element - , Just enId <- findAttrByName ns "w" "id" element = do + , Just enId <- findAttrTextByName ns "w" "id" element = do notes <- asks envNotes case lookupEndnote enId notes of Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e) @@ -961,15 +962,15 @@ getParStyleField _ _ = Nothing getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange getTrackedChange ns element | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element - , Just cId <- findAttrByName ns "w" "id" element - , Just cAuthor <- findAttrByName ns "w" "author" element - , Just cDate <- findAttrByName ns "w" "date" element = + , Just cId <- findAttrTextByName ns "w" "id" element + , Just cAuthor <- findAttrTextByName ns "w" "author" element + , Just cDate <- findAttrTextByName ns "w" "date" element = Just $ TrackedChange Insertion (ChangeInfo cId cAuthor cDate) getTrackedChange ns element | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element - , Just cId <- findAttrByName ns "w" "id" element - , Just cAuthor <- findAttrByName ns "w" "author" element - , Just cDate <- findAttrByName ns "w" "date" element = + , Just cId <- findAttrTextByName ns "w" "id" element + , Just cAuthor <- findAttrTextByName ns "w" "author" element + , Just cDate <- findAttrTextByName ns "w" "date" element = Just $ TrackedChange Deletion (ChangeInfo cId cAuthor cDate) getTrackedChange _ _ = Nothing @@ -978,7 +979,7 @@ elemToParagraphStyle ns element sty | Just pPr <- findChildByName ns "w" "pPr" element = let style = mapMaybe - (fmap ParaStyleId . findAttrByName ns "w" "val") + (fmap ParaStyleId . findAttrTextByName ns "w" "val") (findChildrenByName ns "w" "pStyle" pPr) in ParagraphStyle {pStyle = mapMaybe (`M.lookup` sty) style @@ -1010,7 +1011,7 @@ elemToRunStyleD ns element charStyles <- asks envCharStyles let parentSty = findChildByName ns "w" "rStyle" rPr >>= - findAttrByName ns "w" "val" >>= + findAttrTextByName ns "w" "val" >>= flip M.lookup charStyles . CharStyleId return $ elemToRunStyle ns element parentSty elemToRunStyleD _ _ = return defaultRunStyle @@ -1020,12 +1021,12 @@ elemToRunElem ns element | isElem ns "w" "t" element || isElem ns "w" "delText" element || isElem ns "m" "t" element = do - let str = strContent element + let str = T.pack $ strContent element font <- asks envFont case font of Nothing -> return $ TextRun str Just f -> return . TextRun $ - map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str + T.map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str | isElem ns "w" "br" element = return LnBrk | isElem ns "w" "tab" element = return Tab | isElem ns "w" "softHyphen" element = return SoftHyphen @@ -1043,11 +1044,11 @@ getSymChar ns element | Just s <- lowerFromPrivate <$> getCodepoint , Just font <- getFont = case readLitChar ("\\x" ++ s) of - [(char, _)] -> TextRun . maybe "" (:[]) $ getUnicode font char + [(char, _)] -> TextRun . maybe "" T.singleton $ getUnicode font char _ -> TextRun "" where getCodepoint = findAttrByName ns "w" "char" element - getFont = stringToFont =<< findAttrByName ns "w" "font" element + getFont = textToFont . T.pack =<< findAttrByName ns "w" "font" element lowerFromPrivate ('F':xs) = '0':xs lowerFromPrivate xs = xs getSymChar _ _ = TextRun "" @@ -1059,7 +1060,7 @@ elemToRunElems ns element let qualName = elemName ns "w" let font = do fontElem <- findElement (qualName "rFonts") element - stringToFont =<< + textToFont . T.pack =<< foldr ((<|>) . (flip findAttr fontElem . qualName)) Nothing ["ascii", "hAnsi"] local (setFont font) (mapD (elemToRunElem ns) (elChildren element)) elemToRunElems _ _ = throwError WrongElem diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs index ac2d6fa07..f81707e92 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs @@ -3,6 +3,7 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.Parse.Styles Copyright : Copyright (C) 2014-2019 Jesse Rosenthal @@ -46,20 +47,19 @@ import Prelude import Codec.Archive.Zip import Control.Applicative ((<|>)) import Control.Monad.Except -import Data.Char (toLower) -import Data.List import Data.Function (on) import Data.String (IsString(..)) import qualified Data.Map as M +import qualified Data.Text as T import Data.Maybe import Data.Coerce import Text.Pandoc.Readers.Docx.Util import qualified Text.Pandoc.UTF8 as UTF8 import Text.XML.Light -newtype CharStyleId = CharStyleId String +newtype CharStyleId = CharStyleId T.Text deriving (Show, Eq, Ord, IsString, FromStyleId) -newtype ParaStyleId = ParaStyleId String +newtype ParaStyleId = ParaStyleId T.Text deriving (Show, Eq, Ord, IsString, FromStyleId) newtype CharStyleName = CharStyleName CIString @@ -68,25 +68,31 @@ newtype ParaStyleName = ParaStyleName CIString deriving (Show, Eq, Ord, IsString, FromStyleName) -- Case-insensitive comparisons -newtype CIString = CIString String deriving (Show, IsString, FromStyleName) +newtype CIString = CIString T.Text deriving (Show, IsString, FromStyleName) class FromStyleName a where - fromStyleName :: a -> String + fromStyleName :: a -> T.Text instance FromStyleName String where + fromStyleName = T.pack + +instance FromStyleName T.Text where fromStyleName = id class FromStyleId a where - fromStyleId :: a -> String + fromStyleId :: a -> T.Text instance FromStyleId String where + fromStyleId = T.pack + +instance FromStyleId T.Text where fromStyleId = id instance Eq CIString where - (==) = (==) `on` map toLower . coerce + (==) = (==) `on` T.toCaseFold . coerce instance Ord CIString where - compare = compare `on` map toLower . coerce + compare = compare `on` T.toCaseFold . coerce data VertAlign = BaseLn | SupScrpt | SubScrpt deriving Show @@ -108,7 +114,7 @@ data RunStyle = RunStyle { isBold :: Maybe Bool deriving Show data ParStyle = ParStyle { headingLev :: Maybe (ParaStyleName, Int) - , numInfo :: Maybe (String, String) + , numInfo :: Maybe (T.Text, T.Text) , psParentStyle :: Maybe ParStyle , pStyleName :: ParaStyleName , pStyleId :: ParaStyleId @@ -146,7 +152,7 @@ isBasedOnStyle ns element parentStyle , Just styleType <- findAttrByName ns "w" "type" element , styleType == cStyleType parentStyle , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>= - findAttrByName ns "w" "val" + findAttrTextByName ns "w" "val" , Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps) | isElem ns "w" "style" element , Just styleType <- findAttrByName ns "w" "type" element @@ -234,7 +240,7 @@ checkOnOff _ _ _ = Nothing elemToCharStyle :: NameSpaces -> Element -> Maybe CharStyle -> Maybe CharStyle elemToCharStyle ns element parentStyle - = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element) + = CharStyle <$> (CharStyleId <$> findAttrTextByName ns "w" "styleId" element) <*> getElementStyleName ns element <*> (Just $ elemToRunStyle ns element parentStyle) @@ -267,32 +273,32 @@ elemToRunStyle _ _ _ = defaultRunStyle getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int) getHeaderLevel ns element | Just styleName <- getElementStyleName ns element - , Just n <- stringToInteger =<< - (stripPrefix "heading " . map toLower $ + , Just n <- stringToInteger . T.unpack =<< + (T.stripPrefix "heading " . T.toLower $ fromStyleName styleName) , n > 0 = Just (styleName, fromInteger n) getHeaderLevel _ _ = Nothing -getElementStyleName :: Coercible String a => NameSpaces -> Element -> Maybe a +getElementStyleName :: Coercible T.Text a => NameSpaces -> Element -> Maybe a getElementStyleName ns el = coerce <$> - ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val") - <|> findAttrByName ns "w" "styleId" el) + ((findChildByName ns "w" "name" el >>= findAttrTextByName ns "w" "val") + <|> findAttrTextByName ns "w" "styleId" el) -getNumInfo :: NameSpaces -> Element -> Maybe (String, String) +getNumInfo :: NameSpaces -> Element -> Maybe (T.Text, T.Text) getNumInfo ns element = do let numPr = findChildByName ns "w" "pPr" element >>= findChildByName ns "w" "numPr" lvl = fromMaybe "0" (numPr >>= findChildByName ns "w" "ilvl" >>= - findAttrByName ns "w" "val") + findAttrTextByName ns "w" "val") numId <- numPr >>= findChildByName ns "w" "numId" >>= - findAttrByName ns "w" "val" + findAttrTextByName ns "w" "val" return (numId, lvl) elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle elemToParStyleData ns element parentStyle - | Just styleId <- findAttrByName ns "w" "styleId" element + | Just styleId <- findAttrTextByName ns "w" "styleId" element , Just styleName <- getElementStyleName ns element = Just $ ParStyle { diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs index f4855efd2..0de1114bd 100644 --- a/src/Text/Pandoc/Readers/Docx/Util.hs +++ b/src/Text/Pandoc/Readers/Docx/Util.hs @@ -19,11 +19,14 @@ module Text.Pandoc.Readers.Docx.Util ( , elemToNameSpaces , findChildByName , findChildrenByName + , findAttrText , findAttrByName + , findAttrTextByName ) where import Prelude import Data.Maybe (mapMaybe) +import qualified Data.Text as T import Text.XML.Light type NameSpaces = [(String, String)] @@ -55,7 +58,13 @@ findChildrenByName ns pref name el = let ns' = ns ++ elemToNameSpaces el in findChildren (elemName ns' pref name) el +findAttrText :: QName -> Element -> Maybe T.Text +findAttrText x = fmap T.pack . findAttr x + findAttrByName :: NameSpaces -> String -> String -> Element -> Maybe String findAttrByName ns pref name el = let ns' = ns ++ elemToNameSpaces el in findAttr (elemName ns' pref name) el + +findAttrTextByName :: NameSpaces -> String -> String -> Element -> Maybe T.Text +findAttrTextByName a b c = fmap T.pack . findAttrByName a b c diff --git a/src/Text/Pandoc/Readers/DokuWiki.hs b/src/Text/Pandoc/Readers/DokuWiki.hs index 60d406df1..3a92cfa19 100644 --- a/src/Text/Pandoc/Readers/DokuWiki.hs +++ b/src/Text/Pandoc/Readers/DokuWiki.hs @@ -2,6 +2,7 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.DokuWiki Copyright : Copyright (C) 2018-2019 Alexander Krotov @@ -20,8 +21,7 @@ import Control.Monad import Control.Monad.Except (throwError) import Data.Char (isAlphaNum, isDigit) import qualified Data.Foldable as F -import Data.List (intercalate, transpose, isPrefixOf, isSuffixOf) -import Data.List.Split (splitOn) +import Data.List (transpose) import Data.Maybe (fromMaybe, catMaybes) import Data.Text (Text) import qualified Data.Text as T @@ -31,7 +31,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Error (PandocError (PandocParsecError)) import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed, nested) -import Text.Pandoc.Shared (crFilter, trim, underlineSpan) +import Text.Pandoc.Shared (crFilter, trim, underlineSpan, tshow) -- | Read DokuWiki from an input string and return a Pandoc document. readDokuWiki :: PandocMonad m @@ -42,7 +42,7 @@ readDokuWiki opts s = do let input = crFilter s res <- runParserT parseDokuWiki def {stateOptions = opts } "source" input case res of - Left e -> throwError $ PandocParsecError (T.unpack input) e + Left e -> throwError $ PandocParsecError input e Right d -> return d type DWParser = ParserT Text ParserState @@ -71,9 +71,9 @@ parseDokuWiki = B.doc . mconcat <$> many block <* spaces <* eof -- | Parse <code> and <file> attributes -codeLanguage :: PandocMonad m => DWParser m (String, [String], [(String, String)]) +codeLanguage :: PandocMonad m => DWParser m (Text, [Text], [(Text, Text)]) codeLanguage = try $ do - rawLang <- option "-" (spaceChar *> manyTill anyChar (lookAhead (spaceChar <|> char '>'))) + rawLang <- option "-" (spaceChar *> manyTillChar anyChar (lookAhead (spaceChar <|> char '>'))) let attr = case rawLang of "-" -> [] l -> [l] @@ -81,16 +81,16 @@ codeLanguage = try $ do -- | Generic parser for <code> and <file> tags codeTag :: PandocMonad m - => ((String, [String], [(String, String)]) -> String -> a) - -> String + => ((Text, [Text], [(Text, Text)]) -> Text -> a) + -> Text -> DWParser m a codeTag f tag = try $ f <$ char '<' - <* string tag + <* textStr tag <*> codeLanguage <* manyTill anyChar (char '>') <* optional (manyTill spaceChar eol) - <*> manyTill anyChar (try $ string "</" <* string tag <* char '>') + <*> manyTillChar anyChar (try $ string "</" <* textStr tag <* char '>') -- * Inline parsers @@ -167,19 +167,19 @@ underlined :: PandocMonad m => DWParser m B.Inlines underlined = try $ underlineSpan <$> enclosed (string "__") nestedInlines nowiki :: PandocMonad m => DWParser m B.Inlines -nowiki = try $ B.text <$ string "<nowiki>" <*> manyTill anyChar (try $ string "</nowiki>") +nowiki = try $ B.text <$ string "<nowiki>" <*> manyTillChar anyChar (try $ string "</nowiki>") percent :: PandocMonad m => DWParser m B.Inlines -percent = try $ B.text <$> enclosed (string "%%") nestedString +percent = try $ B.text <$> enclosed (string "%%") nestedText -nestedString :: (Show a, PandocMonad m) - => DWParser m a -> DWParser m String -nestedString end = innerSpace <|> count 1 nonspaceChar +nestedText :: (Show a, PandocMonad m) + => DWParser m a -> DWParser m Text +nestedText end = innerSpace <|> countChar 1 nonspaceChar where - innerSpace = try $ many1 spaceChar <* notFollowedBy end + innerSpace = try $ many1Char spaceChar <* notFollowedBy end monospaced :: PandocMonad m => DWParser m B.Inlines -monospaced = try $ B.code <$> enclosed (string "''") nestedString +monospaced = try $ B.code <$> enclosed (string "''") nestedText subscript :: PandocMonad m => DWParser m B.Inlines subscript = try $ B.subscript <$> between (string "<sub>") (try $ string "</sub>") nestedInlines @@ -201,12 +201,12 @@ inlineFile :: PandocMonad m => DWParser m B.Inlines inlineFile = codeTag B.codeWith "file" inlineHtml :: PandocMonad m => DWParser m B.Inlines -inlineHtml = try $ B.rawInline "html" <$ string "<html>" <*> manyTill anyChar (try $ string "</html>") +inlineHtml = try $ B.rawInline "html" <$ string "<html>" <*> manyTillChar anyChar (try $ string "</html>") inlinePhp :: PandocMonad m => DWParser m B.Inlines -inlinePhp = try $ B.codeWith ("", ["php"], []) <$ string "<php>" <*> manyTill anyChar (try $ string "</php>") +inlinePhp = try $ B.codeWith ("", ["php"], []) <$ string "<php>" <*> manyTillChar anyChar (try $ string "</php>") -makeLink :: (String, String) -> B.Inlines +makeLink :: (Text, Text) -> B.Inlines makeLink (text, url) = B.link url "" $ B.str text autoEmail :: PandocMonad m => DWParser m B.Inlines @@ -220,7 +220,7 @@ autoLink = try $ do state <- getState guard $ stateAllowLinks state (text, url) <- uri - guard $ checkLink (last url) + guard $ checkLink (T.last url) return $ makeLink (text, url) where checkLink c @@ -234,10 +234,10 @@ nocache :: PandocMonad m => DWParser m B.Inlines nocache = try $ mempty <$ string "~~NOCACHE~~" str :: PandocMonad m => DWParser m B.Inlines -str = B.str <$> (many1 alphaNum <|> count 1 characterReference) +str = B.str <$> (many1Char alphaNum <|> countChar 1 characterReference) symbol :: PandocMonad m => DWParser m B.Inlines -symbol = B.str <$> count 1 nonspaceChar +symbol = B.str <$> countChar 1 nonspaceChar link :: PandocMonad m => DWParser m B.Inlines link = try $ do @@ -248,77 +248,78 @@ link = try $ do setState $ st{ stateAllowLinks = True } return l -isExternalLink :: String -> Bool -isExternalLink s = - case dropWhile (\c -> isAlphaNum c || (c `elem` ['-', '.', '+'])) s of - (':':'/':'/':_) -> True - _ -> False - -isAbsolutePath :: String -> Bool -isAbsolutePath ('.':_) = False -isAbsolutePath s = ':' `elem` s - -normalizeDots :: String -> String -normalizeDots path@('.':_) = - case dropWhile (== '.') path of - ':':_ -> path - _ -> takeWhile (== '.') path ++ ':':dropWhile (== '.') path -normalizeDots path = path +isExternalLink :: Text -> Bool +isExternalLink s = "://" `T.isPrefixOf` sSuff + where + sSuff = T.dropWhile (\c -> isAlphaNum c || (c `elem` ['-', '.', '+'])) s + +isAbsolutePath :: Text -> Bool +isAbsolutePath (T.uncons -> Just ('.', _)) = False +isAbsolutePath s = T.any (== ':') s + +normalizeDots :: Text -> Text +normalizeDots path + | not (T.null pref) = case T.uncons suff of + Just (':', _) -> path + _ -> pref <> ":" <> suff + | otherwise = path + where + (pref, suff) = T.span (== '.') path -normalizeInternalPath :: String -> String +normalizeInternalPath :: Text -> Text normalizeInternalPath path = if isAbsolutePath path then ensureAbsolute normalizedPath else normalizedPath where - normalizedPath = intercalate "/" $ dropWhile (== ".") $ splitOn ":" $ normalizeDots path - ensureAbsolute s@('/':_) = s - ensureAbsolute s = '/':s + normalizedPath = T.intercalate "/" $ dropWhile (== ".") $ T.splitOn ":" $ normalizeDots path + ensureAbsolute s@(T.uncons -> Just ('/', _)) = s + ensureAbsolute s = "/" <> s -normalizePath :: String -> String +normalizePath :: Text -> Text normalizePath path = if isExternalLink path then path else normalizeInternalPath path -urlToText :: String -> String +urlToText :: Text -> Text urlToText url = if isExternalLink url then url - else reverse $ takeWhile (/= ':') $ reverse url + else T.takeWhileEnd (/= ':') url -- Parse link or image parseLink :: PandocMonad m - => (String -> Maybe B.Inlines -> B.Inlines) - -> String - -> String + => (Text -> Maybe B.Inlines -> B.Inlines) + -> Text + -> Text -> DWParser m B.Inlines parseLink f l r = f - <$ string l - <*> many1Till anyChar (lookAhead (void (char '|') <|> try (void $ string r))) - <*> optionMaybe (B.trimInlines . mconcat <$> (char '|' *> manyTill inline (try $ lookAhead $ string r))) - <* string r + <$ textStr l + <*> many1TillChar anyChar (lookAhead (void (char '|') <|> try (void $ textStr r))) + <*> optionMaybe (B.trimInlines . mconcat <$> (char '|' *> manyTill inline (try $ lookAhead $ textStr r))) + <* textStr r -- | Split Interwiki link into left and right part -- | Return Nothing if it is not Interwiki link -splitInterwiki :: String -> Maybe (String, String) +splitInterwiki :: Text -> Maybe (Text, Text) splitInterwiki path = - case span (\c -> isAlphaNum c || c == '.') path of - (l, '>':r) -> Just (l, r) + case T.span (\c -> isAlphaNum c || c == '.') path of + (l, T.uncons -> Just ('>', r)) -> Just (l, r) _ -> Nothing -interwikiToUrl :: String -> String -> String -interwikiToUrl "callto" page = "callto://" ++ page -interwikiToUrl "doku" page = "https://www.dokuwiki.org/" ++ page -interwikiToUrl "phpfn" page = "https://secure.php.net/" ++ page -interwikiToUrl "tel" page = "tel:" ++ page -interwikiToUrl "wp" page = "https://en.wikipedia.org/wiki/" ++ page -interwikiToUrl "wpde" page = "https://de.wikipedia.org/wiki/" ++ page -interwikiToUrl "wpes" page = "https://es.wikipedia.org/wiki/" ++ page -interwikiToUrl "wpfr" page = "https://fr.wikipedia.org/wiki/" ++ page -interwikiToUrl "wpjp" page = "https://jp.wikipedia.org/wiki/" ++ page -interwikiToUrl "wppl" page = "https://pl.wikipedia.org/wiki/" ++ page -interwikiToUrl _ page = "https://www.google.com/search?q=" ++ page ++ "&btnI=lucky" +interwikiToUrl :: Text -> Text -> Text +interwikiToUrl "callto" page = "callto://" <> page +interwikiToUrl "doku" page = "https://www.dokuwiki.org/" <> page +interwikiToUrl "phpfn" page = "https://secure.php.net/" <> page +interwikiToUrl "tel" page = "tel:" <> page +interwikiToUrl "wp" page = "https://en.wikipedia.org/wiki/" <> page +interwikiToUrl "wpde" page = "https://de.wikipedia.org/wiki/" <> page +interwikiToUrl "wpes" page = "https://es.wikipedia.org/wiki/" <> page +interwikiToUrl "wpfr" page = "https://fr.wikipedia.org/wiki/" <> page +interwikiToUrl "wpjp" page = "https://jp.wikipedia.org/wiki/" <> page +interwikiToUrl "wppl" page = "https://pl.wikipedia.org/wiki/" <> page +interwikiToUrl _ page = "https://www.google.com/search?q=" <> page <> "&btnI=lucky" linkText :: PandocMonad m => DWParser m B.Inlines linkText = parseLink fromRaw "[[" "]]" @@ -338,23 +339,23 @@ linkText = parseLink fromRaw "[[" "]]" Just (_, r) -> r -- Matches strings like "100x100" (width x height) and "50" (width) -isWidthHeightParameter :: String -> Bool +isWidthHeightParameter :: Text -> Bool isWidthHeightParameter s = - case s of - (x:xs) -> - isDigit x && case dropWhile isDigit xs of - ('x':ys@(_:_)) -> all isDigit ys - "" -> True + case T.uncons s of + Just (x, xs) -> + isDigit x && case T.uncons $ T.dropWhile isDigit xs of + Just ('x', ys) | not (T.null ys) -> T.all isDigit ys + Nothing -> True _ -> False _ -> False -parseWidthHeight :: String -> (Maybe String, Maybe String) +parseWidthHeight :: Text -> (Maybe Text, Maybe Text) parseWidthHeight s = (width, height) where - width = Just $ takeWhile isDigit s + width = Just $ T.takeWhile isDigit s height = - case dropWhile isDigit s of - ('x':xs) -> Just xs + case T.uncons $ T.dropWhile isDigit s of + Just ('x', xs) -> Just xs _ -> Nothing image :: PandocMonad m => DWParser m B.Inlines @@ -365,17 +366,17 @@ image = try $ parseLink fromRaw "{{" "}}" then B.link normalizedPath "" (fromMaybe defaultDescription description) else B.imageWith ("", classes, attributes) normalizedPath "" (fromMaybe defaultDescription description) where - (path', parameters) = span (/= '?') $ trim path + (path', parameters) = T.span (/= '?') $ trim path normalizedPath = normalizePath path' - leftPadding = " " `isPrefixOf` path - rightPadding = " " `isSuffixOf` path + leftPadding = " " `T.isPrefixOf` path + rightPadding = " " `T.isSuffixOf` path classes = case (leftPadding, rightPadding) of (False, False) -> [] (False, True) -> ["align-left"] (True, False) -> ["align-right"] (True, True) -> ["align-center"] - parameterList = splitOn "&" $ drop 1 parameters + parameterList = T.splitOn "&" $ T.drop 1 parameters linkOnly = "linkonly" `elem` parameterList (width, height) = maybe (Nothing, Nothing) parseWidthHeight (F.find isWidthHeightParameter parameterList) attributes = catMaybes [fmap ("width",) width, fmap ("height",) height] @@ -389,7 +390,7 @@ block = do <|> blockElements <|> para skipMany blankline - trace (take 60 $ show $ B.toList res) + trace (T.take 60 $ tshow $ B.toList res) return res blockElements :: PandocMonad m => DWParser m B.Blocks @@ -417,30 +418,30 @@ header = try $ do attr <- registerHeader nullAttr contents return $ B.headerWith attr (7 - lev) contents -list :: PandocMonad m => String -> DWParser m B.Blocks +list :: PandocMonad m => Text -> DWParser m B.Blocks list prefix = bulletList prefix <|> orderedList prefix -bulletList :: PandocMonad m => String -> DWParser m B.Blocks +bulletList :: PandocMonad m => Text -> DWParser m B.Blocks bulletList prefix = try $ B.bulletList <$> parseList prefix '*' -orderedList :: PandocMonad m => String -> DWParser m B.Blocks +orderedList :: PandocMonad m => Text -> DWParser m B.Blocks orderedList prefix = try $ B.orderedList <$> parseList prefix '-' parseList :: PandocMonad m - => String + => Text -> Char -> DWParser m [B.Blocks] parseList prefix marker = many1 ((<>) <$> item <*> fmap mconcat (many continuation)) where - continuation = try $ list (" " ++ prefix) - item = try $ string prefix *> char marker *> char ' ' *> itemContents + continuation = try $ list (" " <> prefix) + item = try $ textStr prefix *> char marker *> char ' ' *> itemContents itemContents = B.plain . mconcat <$> many1Till inline' eol indentedCode :: PandocMonad m => DWParser m B.Blocks -indentedCode = try $ B.codeBlock . unlines <$> many1 indentedLine +indentedCode = try $ B.codeBlock . T.unlines <$> many1 indentedLine where - indentedLine = try $ string " " *> manyTill anyChar eol + indentedLine = try $ string " " *> manyTillChar anyChar eol quote :: PandocMonad m => DWParser m B.Blocks quote = try $ nestedQuote 0 @@ -456,13 +457,13 @@ blockHtml :: PandocMonad m => DWParser m B.Blocks blockHtml = try $ B.rawBlock "html" <$ string "<HTML>" <* optional (manyTill spaceChar eol) - <*> manyTill anyChar (try $ string "</HTML>") + <*> manyTillChar anyChar (try $ string "</HTML>") blockPhp :: PandocMonad m => DWParser m B.Blocks blockPhp = try $ B.codeBlockWith ("", ["php"], []) <$ string "<PHP>" <* optional (manyTill spaceChar eol) - <*> manyTill anyChar (try $ string "</PHP>") + <*> manyTillChar anyChar (try $ string "</PHP>") table :: PandocMonad m => DWParser m B.Blocks table = do diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index 8e9746090..93ddeb9ee 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -1,6 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.EPUB Copyright : Copyright (C) 2014-2019 Matthew Pickering @@ -24,7 +25,8 @@ import Control.DeepSeq (NFData, deepseq) import Control.Monad (guard, liftM, liftM2, mplus) import Control.Monad.Except (throwError) import qualified Data.ByteString.Lazy as BL (ByteString) -import Data.List (isInfixOf, isPrefixOf) +import Data.List (isInfixOf) +import qualified Data.Text as T import qualified Data.Map as M (Map, elems, fromList, lookup) import Data.Maybe (fromMaybe, mapMaybe) import qualified Data.Text.Lazy as TL @@ -67,9 +69,9 @@ archiveToEPUB os archive = do -- No need to collapse here as the image path is from the manifest file let coverDoc = fromMaybe mempty (imageToPandoc <$> cover) spine <- parseSpine items content - let escapedSpine = map (escapeURI . takeFileName . fst) spine + let escapedSpine = map (escapeURI . T.pack . takeFileName . fst) spine Pandoc _ bs <- - foldM' (\a b -> ((a <>) . walk (prependHash escapedSpine)) + foldM' (\a b -> ((a <>) . walk (prependHash $ escapedSpine)) `liftM` parseSpineElem root b) mempty spine let ast = coverDoc <> Pandoc meta bs fetchImages (M.elems items) root archive ast @@ -79,7 +81,7 @@ archiveToEPUB os archive = do parseSpineElem :: PandocMonad m => FilePath -> (FilePath, MimeType) -> m Pandoc parseSpineElem (normalise -> r) (normalise -> path, mime) = do doc <- mimeToReader mime r path - let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty + let docSpan = B.doc $ B.para $ B.spanWith (T.pack $ takeFileName path, [], []) mempty return $ docSpan <> doc mimeToReader :: PandocMonad m => MimeType -> FilePath -> FilePath -> m Pandoc mimeToReader "application/xhtml+xml" (unEscapeString -> root) @@ -108,18 +110,19 @@ fetchImages mimes root arc (query iq -> links) = <$> findEntryByPath abslink arc iq :: Inline -> [FilePath] -iq (Image _ _ (url, _)) = [url] +iq (Image _ _ (url, _)) = [T.unpack url] iq _ = [] -- Remove relative paths renameImages :: FilePath -> Inline -> Inline renameImages root img@(Image attr a (url, b)) - | "data:" `isPrefixOf` url = img - | otherwise = Image attr a (collapseFilePath (root </> url), b) + | "data:" `T.isPrefixOf` url = img + | otherwise = Image attr a ( T.pack $ collapseFilePath (root </> T.unpack url) + , b) renameImages _ x = x imageToPandoc :: FilePath -> Pandoc -imageToPandoc s = B.doc . B.para $ B.image s "" mempty +imageToPandoc s = B.doc . B.para $ B.image (T.pack s) "" mempty imageMimes :: [MimeType] imageMimes = ["image/gif", "image/jpeg", "image/png"] @@ -144,7 +147,7 @@ parseManifest content coverId = do uid <- findAttrE (emptyName "id") e href <- findAttrE (emptyName "href") e mime <- findAttrE (emptyName "media-type") e - return (uid, (href, mime)) + return (uid, (href, T.pack mime)) parseSpine :: PandocMonad m => Items -> Element -> m [(FilePath, MimeType)] parseSpine is e = do @@ -172,11 +175,11 @@ parseMeta content = do -- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem parseMetaItem :: Element -> Meta -> Meta parseMetaItem e@(stripNamespace . elName -> field) meta = - addMetaField (renameMeta field) (B.str $ strContent e) meta + addMetaField (renameMeta field) (B.str $ T.pack $ strContent e) meta -renameMeta :: String -> String +renameMeta :: String -> T.Text renameMeta "creator" = "author" -renameMeta s = s +renameMeta s = T.pack s getManifest :: PandocMonad m => Archive -> m (String, Element) getManifest archive = do @@ -197,26 +200,26 @@ getManifest archive = do fixInternalReferences :: FilePath -> Pandoc -> Pandoc fixInternalReferences pathToFile = walk (renameImages root) - . walk (fixBlockIRs filename) + . walk (fixBlockIRs filename) . walk (fixInlineIRs filename) where - (root, escapeURI -> filename) = splitFileName pathToFile + (root, T.unpack . escapeURI . T.pack -> filename) = splitFileName pathToFile fixInlineIRs :: String -> Inline -> Inline fixInlineIRs s (Span as v) = Span (fixAttrs s as) v fixInlineIRs s (Code as code) = Code (fixAttrs s as) code -fixInlineIRs s (Link as is ('#':url, tit)) = +fixInlineIRs s (Link as is (T.uncons -> Just ('#', url), tit)) = Link (fixAttrs s as) is (addHash s url, tit) fixInlineIRs s (Link as is t) = Link (fixAttrs s as) is t fixInlineIRs _ v = v -prependHash :: [String] -> Inline -> Inline +prependHash :: [T.Text] -> Inline -> Inline prependHash ps l@(Link attr is (url, tit)) - | or [s `isPrefixOf` url | s <- ps] = - Link attr is ('#':url, tit) + | or [s `T.isPrefixOf` url | s <- ps] = + Link attr is ("#" <> url, tit) | otherwise = l prependHash _ i = i @@ -230,17 +233,17 @@ fixBlockIRs s (CodeBlock as code) = fixBlockIRs _ b = b fixAttrs :: FilePath -> B.Attr -> B.Attr -fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . null) cs, removeEPUBAttrs kvs) +fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . T.null) cs, removeEPUBAttrs kvs) -addHash :: String -> String -> String +addHash :: String -> T.Text -> T.Text addHash _ "" = "" -addHash s ident = takeFileName s ++ "#" ++ ident +addHash s ident = T.pack (takeFileName s) <> "#" <> ident -removeEPUBAttrs :: [(String, String)] -> [(String, String)] +removeEPUBAttrs :: [(T.Text, T.Text)] -> [(T.Text, T.Text)] removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs -isEPUBAttr :: (String, String) -> Bool -isEPUBAttr (k, _) = "epub:" `isPrefixOf` k +isEPUBAttr :: (T.Text, a) -> Bool +isEPUBAttr (k, _) = "epub:" `T.isPrefixOf` k -- Library @@ -291,4 +294,4 @@ findElementE :: PandocMonad m => QName -> Element -> m Element findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x mkE :: PandocMonad m => String -> Maybe a -> m a -mkE s = maybe (throwError . PandocParseError $ s) return +mkE s = maybe (throwError . PandocParseError $ T.pack $ s) return diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs index 0b25b9fed..6eed3c104 100644 --- a/src/Text/Pandoc/Readers/FB2.hs +++ b/src/Text/Pandoc/Readers/FB2.hs @@ -1,5 +1,6 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.FB2 Copyright : Copyright (C) 2018-2019 Alexander Krotov @@ -27,12 +28,11 @@ import Control.Monad.Except (throwError) import Control.Monad.State.Strict import Data.ByteString.Lazy.Char8 ( pack ) import Data.ByteString.Base64.Lazy -import Data.Char (isSpace, toUpper) import Data.Functor -import Data.List (dropWhileEnd, intersperse) -import Data.List.Split (splitOn) +import Data.List (intersperse) import qualified Data.Map as M import Data.Text (Text) +import qualified Data.Text as T import Data.Default import Data.Maybe import Text.HTML.TagSoup.Entity (lookupEntity) @@ -48,8 +48,8 @@ type FB2 m = StateT FB2State m data FB2State = FB2State{ fb2SectionLevel :: Int , fb2Meta :: Meta - , fb2Authors :: [String] - , fb2Notes :: M.Map String Blocks + , fb2Authors :: [Text] + , fb2Notes :: M.Map Text Blocks } deriving Show instance Default FB2State where @@ -76,19 +76,20 @@ readFB2 _ inp = -- * Utility functions -trim :: String -> String -trim = dropWhileEnd isSpace . dropWhile isSpace +trim :: Text -> Text +trim = T.strip -removeHash :: String -> String -removeHash ('#':xs) = xs -removeHash xs = xs +removeHash :: Text -> Text +removeHash t = case T.uncons t of + Just ('#', xs) -> xs + _ -> t -convertEntity :: String -> String -convertEntity e = fromMaybe (map toUpper e) (lookupEntity e) +convertEntity :: String -> Text +convertEntity e = maybe (T.toUpper $ T.pack e) T.pack $ lookupEntity e parseInline :: PandocMonad m => Content -> FB2 m Inlines parseInline (Elem e) = - case qName $ elName e of + case T.pack $ qName $ elName e of "strong" -> strong <$> parseStyleType e "emphasis" -> emph <$> parseStyleType e "style" -> parseNamedStyle e @@ -96,12 +97,12 @@ parseInline (Elem e) = "strikethrough" -> strikeout <$> parseStyleType e "sub" -> subscript <$> parseStyleType e "sup" -> superscript <$> parseStyleType e - "code" -> pure $ code $ strContent e + "code" -> pure $ code $ T.pack $ strContent e "image" -> parseInlineImageElement e name -> do report $ IgnoredElement name pure mempty -parseInline (Text x) = pure $ text $ cdData x +parseInline (Text x) = pure $ text $ T.pack $ cdData x parseInline (CRef r) = pure $ str $ convertEntity r parseSubtitle :: PandocMonad m => Element -> FB2 m Blocks @@ -111,7 +112,7 @@ parseSubtitle e = headerWith ("", ["unnumbered"], []) <$> gets fb2SectionLevel < parseRootElement :: PandocMonad m => Element -> FB2 m Blocks parseRootElement e = - case qName $ elName e of + case T.pack $ qName $ elName e of "FictionBook" -> do -- Parse notes before parsing the rest of the content. case filterChild isNotesBody e of @@ -144,7 +145,7 @@ parseNote e = Just sectionId -> do content <- mconcat <$> mapM parseSectionChild (dropTitle $ elChildren e) oldNotes <- gets fb2Notes - modify $ \s -> s { fb2Notes = M.insert ("#" ++ sectionId) content oldNotes } + modify $ \s -> s { fb2Notes = M.insert ("#" <> T.pack sectionId) content oldNotes } pure () where isTitle x = qName (elName x) == "title" @@ -156,7 +157,7 @@ parseNote e = -- | Parse a child of @\<FictionBook>@ element. parseFictionBookChild :: PandocMonad m => Element -> FB2 m Blocks parseFictionBookChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "stylesheet" -> pure mempty -- stylesheet is ignored "description" -> mempty <$ mapM_ parseDescriptionChild (elChildren e) "body" -> if isNotesBody e @@ -168,7 +169,7 @@ parseFictionBookChild e = -- | Parse a child of @\<description>@ element. parseDescriptionChild :: PandocMonad m => Element -> FB2 m () parseDescriptionChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "title-info" -> mapM_ parseTitleInfoChild (elChildren e) "src-title-info" -> pure () -- ignore "document-info" -> pure () @@ -176,13 +177,13 @@ parseDescriptionChild e = "custom-info" -> pure () "output" -> pure () name -> do - report $ IgnoredElement $ name ++ " in description" + report $ IgnoredElement $ name <> " in description" pure mempty -- | Parse a child of @\<body>@ element. parseBodyChild :: PandocMonad m => Element -> FB2 m Blocks parseBodyChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "image" -> parseImageElement e "title" -> header <$> gets fb2SectionLevel <*> parseTitleType (elContent e) "epigraph" -> parseEpigraph e @@ -196,25 +197,25 @@ parseBinaryElement e = (Nothing, _) -> report $ IgnoredElement "binary without id attribute" (Just _, Nothing) -> report $ IgnoredElement "binary without content-type attribute" - (Just filename, contentType) -> insertMedia filename contentType (decodeLenient (pack (strContent e))) + (Just filename, contentType) -> insertMedia filename (T.pack <$> contentType) (decodeLenient (pack (strContent e))) -- * Type parsers -- | Parse @authorType@ -parseAuthor :: PandocMonad m => Element -> FB2 m String -parseAuthor e = unwords . catMaybes <$> mapM parseAuthorChild (elChildren e) +parseAuthor :: PandocMonad m => Element -> FB2 m Text +parseAuthor e = T.unwords . catMaybes <$> mapM parseAuthorChild (elChildren e) -parseAuthorChild :: PandocMonad m => Element -> FB2 m (Maybe String) +parseAuthorChild :: PandocMonad m => Element -> FB2 m (Maybe Text) parseAuthorChild e = - case qName $ elName e of - "first-name" -> pure $ Just $ strContent e - "middle-name" -> pure $ Just $ strContent e - "last-name" -> pure $ Just $ strContent e - "nickname" -> pure $ Just $ strContent e - "home-page" -> pure $ Just $ strContent e - "email" -> pure $ Just $ strContent e + case T.pack $ qName $ elName e of + "first-name" -> pure $ Just $ T.pack $ strContent e + "middle-name" -> pure $ Just $ T.pack $ strContent e + "last-name" -> pure $ Just $ T.pack $ strContent e + "nickname" -> pure $ Just $ T.pack $ strContent e + "home-page" -> pure $ Just $ T.pack $ strContent e + "email" -> pure $ Just $ T.pack $ strContent e name -> do - report $ IgnoredElement $ name ++ " in author" + report $ IgnoredElement $ name <> " in author" pure Nothing -- | Parse @titleType@ @@ -236,13 +237,13 @@ parseTitleContent _ = pure Nothing parseImageElement :: PandocMonad m => Element -> FB2 m Blocks parseImageElement e = case href of - Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash src) title alt + Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash $ T.pack src) title alt Nothing -> do report $ IgnoredElement " image without href" pure mempty - where alt = maybe mempty str $ findAttr (unqual "alt") e - title = fromMaybe "" $ findAttr (unqual "title") e - imgId = fromMaybe "" $ findAttr (unqual "id") e + where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e + title = maybe "" T.pack $ findAttr (unqual "title") e + imgId = maybe "" T.pack $ findAttr (unqual "id") e href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e -- | Parse @pType@ @@ -256,7 +257,7 @@ parseCite e = blockQuote . mconcat <$> mapM parseCiteChild (elChildren e) -- | Parse @citeType@ child parseCiteChild :: PandocMonad m => Element -> FB2 m Blocks parseCiteChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "p" -> para <$> parsePType e "poem" -> parsePoem e "empty-line" -> pure horizontalRule @@ -271,13 +272,13 @@ parsePoem e = mconcat <$> mapM parsePoemChild (elChildren e) parsePoemChild :: PandocMonad m => Element -> FB2 m Blocks parsePoemChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "title" -> parseTitle e "subtitle" -> parseSubtitle e "epigraph" -> parseEpigraph e "stanza" -> parseStanza e "text-author" -> para <$> parsePType e - "date" -> pure $ para $ text $ strContent e + "date" -> pure $ para $ text $ T.pack $ strContent e name -> report (UnexpectedXmlElement name "poem") $> mempty parseStanza :: PandocMonad m => Element -> FB2 m Blocks @@ -290,7 +291,7 @@ joinLineBlocks [] = [] parseStanzaChild :: PandocMonad m => Element -> FB2 m Blocks parseStanzaChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "title" -> parseTitle e "subtitle" -> parseSubtitle e "v" -> lineBlock . (:[]) <$> parsePType e @@ -300,11 +301,11 @@ parseStanzaChild e = parseEpigraph :: PandocMonad m => Element -> FB2 m Blocks parseEpigraph e = divWith (divId, ["epigraph"], []) . mconcat <$> mapM parseEpigraphChild (elChildren e) - where divId = fromMaybe "" $ findAttr (unqual "id") e + where divId = maybe "" T.pack $ findAttr (unqual "id") e parseEpigraphChild :: PandocMonad m => Element -> FB2 m Blocks parseEpigraphChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "p" -> para <$> parsePType e "poem" -> parsePoem e "cite" -> parseCite e @@ -318,7 +319,7 @@ parseAnnotation e = mconcat <$> mapM parseAnnotationChild (elChildren e) parseAnnotationChild :: PandocMonad m => Element -> FB2 m Blocks parseAnnotationChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "p" -> para <$> parsePType e "poem" -> parsePoem e "cite" -> parseCite e @@ -332,14 +333,14 @@ parseSection :: PandocMonad m => Element -> FB2 m Blocks parseSection e = do n <- gets fb2SectionLevel modify $ \st -> st{ fb2SectionLevel = n + 1 } - let sectionId = fromMaybe "" $ findAttr (unqual "id") e + let sectionId = maybe "" T.pack $ findAttr (unqual "id") e bs <- divWith (sectionId, ["section"], []) . mconcat <$> mapM parseSectionChild (elChildren e) modify $ \st -> st{ fb2SectionLevel = n } pure bs parseSectionChild :: PandocMonad m => Element -> FB2 m Blocks parseSectionChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "title" -> parseBodyChild e "epigraph" -> parseEpigraph e "image" -> parseImageElement e @@ -361,16 +362,16 @@ parseStyleType e = mconcat <$> mapM parseInline (elContent e) parseNamedStyle :: PandocMonad m => Element -> FB2 m Inlines parseNamedStyle e = do content <- mconcat <$> mapM parseNamedStyleChild (elContent e) - let lang = maybeToList $ ("lang",) <$> findAttr (QName "lang" Nothing (Just "xml")) e + let lang = maybeToList $ ("lang",) . T.pack <$> findAttr (QName "lang" Nothing (Just "xml")) e case findAttr (unqual "name") e of - Just name -> pure $ spanWith ("", [name], lang) content + Just name -> pure $ spanWith ("", [T.pack name], lang) content Nothing -> do report $ IgnoredElement "link without required name" pure mempty parseNamedStyleChild :: PandocMonad m => Content -> FB2 m Inlines parseNamedStyleChild (Elem e) = - case qName (elName e) of + case T.pack $ qName (elName e) of "strong" -> strong <$> parseStyleType e "emphasis" -> emph <$> parseStyleType e "style" -> parseNamedStyle e @@ -378,10 +379,10 @@ parseNamedStyleChild (Elem e) = "strikethrough" -> strikeout <$> parseStyleType e "sub" -> subscript <$> parseStyleType e "sup" -> superscript <$> parseStyleType e - "code" -> pure $ code $ strContent e + "code" -> pure $ code $ T.pack $ strContent e "image" -> parseInlineImageElement e name -> do - report $ IgnoredElement $ name ++ " in style" + report $ IgnoredElement $ name <> " in style" pure mempty parseNamedStyleChild x = parseInline x @@ -390,7 +391,7 @@ parseLinkType :: PandocMonad m => Element -> FB2 m Inlines parseLinkType e = do content <- mconcat <$> mapM parseStyleLinkType (elContent e) notes <- gets fb2Notes - case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of + case T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of Just href -> case findAttr (QName "type" Nothing Nothing) e of Just "note" -> case M.lookup href notes of Nothing -> pure $ link href "" content @@ -417,19 +418,21 @@ parseTable _ = pure mempty -- TODO: tables are not supported yet -- | Parse @title-infoType@ parseTitleInfoChild :: PandocMonad m => Element -> FB2 m () parseTitleInfoChild e = - case qName (elName e) of + case T.pack $ qName (elName e) of "genre" -> pure () "author" -> parseAuthor e >>= \author -> modify (\st -> st {fb2Authors = author:fb2Authors st}) - "book-title" -> modify (setMeta "title" (text $ strContent e)) + "book-title" -> modify (setMeta "title" (text $ T.pack $ strContent e)) "annotation" -> parseAnnotation e >>= modify . setMeta "abstract" - "keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ splitOn "," $ strContent e)) - "date" -> modify (setMeta "date" (text $ strContent e)) + "keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ T.splitOn "," + $ T.pack + $ strContent e)) + "date" -> modify (setMeta "date" (text $ T.pack $ strContent e)) "coverpage" -> parseCoverPage e "lang" -> pure () "src-lang" -> pure () "translator" -> pure () "sequence" -> pure () - name -> report $ IgnoredElement $ name ++ " in title-info" + name -> report $ IgnoredElement $ name <> " in title-info" parseCoverPage :: PandocMonad m => Element -> FB2 m () parseCoverPage e = @@ -437,7 +440,7 @@ parseCoverPage e = Just img -> case href of Just src -> modify (setMeta "cover-image" (MetaString $ removeHash src)) Nothing -> pure () - where href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img + where href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img Nothing -> pure () -- | Parse @inlineImageType@ element @@ -450,5 +453,5 @@ parseInlineImageElement e = Nothing -> do report $ IgnoredElement "inline image without href" pure mempty - where alt = maybe mempty str $ findAttr (unqual "alt") e - href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e + where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e + href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index e03ac6a97..1c2892d6a 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -1,5 +1,5 @@ -{-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} @@ -35,8 +35,7 @@ import Control.Monad.Reader (ReaderT, ask, asks, lift, local, runReaderT) import Data.Char (isAlphaNum, isLetter) import Data.Default (Default (..), def) import Data.Foldable (for_) -import Data.List (isPrefixOf) -import Data.List.Split (wordsBy, splitWhen) +import Data.List.Split (splitWhen) import qualified Data.Map as M import Data.Maybe (fromMaybe, isJust, isNothing) import Data.Monoid (First (..)) @@ -62,8 +61,8 @@ import Text.Pandoc.Options ( extensionEnabled) import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Shared (addMetaField, blocksToInlines', crFilter, escapeURI, - extractSpaces, htmlSpanLikeElements, - onlySimpleTableCells, safeRead, underlineSpan) + extractSpaces, htmlSpanLikeElements, elemText, splitTextBy, + onlySimpleTableCells, safeRead, underlineSpan, tshow) import Text.Pandoc.Walk import Text.Parsec.Error import Text.TeXMath (readMathML, writeTeX) @@ -93,14 +92,14 @@ readHtml opts inp = do "source" tags case result of Right doc -> return doc - Left err -> throwError $ PandocParseError $ getError err + Left err -> throwError $ PandocParseError $ T.pack $ getError err replaceNotes :: PandocMonad m => [Block] -> TagParser m [Block] replaceNotes bs = do st <- getState return $ walk (replaceNotes' (noteTable st)) bs -replaceNotes' :: [(String, Blocks)] -> Inline -> Inline +replaceNotes' :: [(Text, Blocks)] -> Inline -> Inline replaceNotes' noteTbl (RawInline (Format "noteref") ref) = maybe (Str "") (Note . B.toList) $ lookup ref noteTbl replaceNotes' _ x = x @@ -108,9 +107,9 @@ replaceNotes' _ x = x data HTMLState = HTMLState { parserState :: ParserState, - noteTable :: [(String, Blocks)], + noteTable :: [(Text, Blocks)], baseHref :: Maybe URI, - identifiers :: Set.Set String, + identifiers :: Set.Set Text, logMessages :: [LogMessage], macros :: M.Map Text Macro } @@ -134,7 +133,7 @@ pHtml :: PandocMonad m => TagParser m Blocks pHtml = try $ do (TagOpen "html" attr) <- lookAhead pAny for_ (lookup "lang" attr) $ - updateState . B.setMeta "lang" . B.text . T.unpack + updateState . B.setMeta "lang" . B.text pInTags "html" block pBody :: PandocMonad m => TagParser m Blocks @@ -146,11 +145,11 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAny) setTitle t = mempty <$ updateState (B.setMeta "title" t) pMetaTag = do mt <- pSatisfy (matchTagOpen "meta" []) - let name = T.unpack $ fromAttrib "name" mt - if null name + let name = fromAttrib "name" mt + if T.null name then return mempty else do - let content = T.unpack $ fromAttrib "content" mt + let content = fromAttrib "content" mt updateState $ \s -> let ps = parserState s in s{ parserState = ps{ @@ -187,13 +186,13 @@ block = do , pFigure , pRawHtmlBlock ] - trace (take 60 $ show $ B.toList res) + trace (T.take 60 $ tshow $ B.toList res) return res -namespaces :: PandocMonad m => [(String, TagParser m Inlines)] +namespaces :: PandocMonad m => [(Text, TagParser m Inlines)] namespaces = [(mathMLNamespace, pMath True)] -mathMLNamespace :: String +mathMLNamespace :: Text mathMLNamespace = "http://www.w3.org/1998/Math/MathML" eSwitch :: (PandocMonad m, Monoid a) @@ -233,7 +232,7 @@ eFootnote = try $ do content <- pInTags tag block addNote ident content -addNote :: PandocMonad m => String -> Blocks -> TagParser m () +addNote :: PandocMonad m => Text -> Blocks -> TagParser m () addNote uid cont = updateState (\s -> s {noteTable = (uid, cont) : noteTable s}) eNoteref :: PandocMonad m => TagParser m Inlines @@ -245,8 +244,8 @@ eNoteref = try $ do -> (lookup "type" as <|> lookup "epub:type" as) == Just "noteref" _ -> False) - ident <- case T.unpack <$> lookup "href" attr of - Just ('#':rest) -> return rest + ident <- case lookup "href" attr >>= T.uncons of + Just ('#', rest) -> return rest _ -> mzero _ <- manyTill pAny (pSatisfy (\case TagClose t -> t == tag @@ -287,7 +286,7 @@ pListItem nonItem = do maybe id addId (lookup "id" attr) <$> pInTags "li" block <* skipMany nonItem -parseListStyleType :: String -> ListNumberStyle +parseListStyleType :: Text -> ListNumberStyle parseListStyleType "lower-roman" = LowerRoman parseListStyleType "upper-roman" = UpperRoman parseListStyleType "lower-alpha" = LowerAlpha @@ -295,7 +294,7 @@ parseListStyleType "upper-alpha" = UpperAlpha parseListStyleType "decimal" = Decimal parseListStyleType _ = DefaultStyle -parseTypeAttr :: String -> ListNumberStyle +parseTypeAttr :: Text -> ListNumberStyle parseTypeAttr "i" = LowerRoman parseTypeAttr "I" = UpperRoman parseTypeAttr "a" = LowerAlpha @@ -404,20 +403,19 @@ pDiv = try $ do pRawHtmlBlock :: PandocMonad m => TagParser m Blocks pRawHtmlBlock = do - raw <- T.unpack <$> - (pHtmlBlock "script" <|> pHtmlBlock "style" <|> pHtmlBlock "textarea" - <|> pRawTag) + raw <- (pHtmlBlock "script" <|> pHtmlBlock "style" <|> pHtmlBlock "textarea" + <|> pRawTag) exts <- getOption readerExtensions - if extensionEnabled Ext_raw_html exts && not (null raw) + if extensionEnabled Ext_raw_html exts && not (T.null raw) then return $ B.rawBlock "html" raw else ignore raw -ignore :: (Monoid a, PandocMonad m) => String -> TagParser m a +ignore :: (Monoid a, PandocMonad m) => Text -> TagParser m a ignore raw = do pos <- getPosition -- raw can be null for tags like <!DOCTYPE>; see paRawTag -- in this case we don't want a warning: - unless (null raw) $ + unless (T.null raw) $ logMessage $ SkippedContent raw pos return mempty @@ -438,7 +436,7 @@ eSection = try $ do headerLevel :: Text -> TagParser m Int headerLevel tagtype = - case safeRead (T.unpack (T.drop 1 tagtype)) of + case safeRead (T.drop 1 tagtype) of Just level -> -- try (do -- guardEnabled Ext_epub_html_exts @@ -468,7 +466,7 @@ pHeader = try $ do level <- headerLevel tagtype contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof) let ident = fromMaybe "" $ lookup "id" attr - let classes = maybe [] words $ lookup "class" attr + let classes = maybe [] T.words $ lookup "class" attr let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"] attr'' <- registerHeader (ident, classes, keyvals) contents return $ if bodyTitle @@ -529,14 +527,14 @@ pCol = try $ do optional $ pSatisfy (matchTagClose "col") skipMany pBlank let width = case lookup "width" attribs of - Nothing -> case lookup "style" attribs of - Just ('w':'i':'d':'t':'h':':':xs) | '%' `elem` xs -> - fromMaybe 0.0 $ safeRead (filter - (`notElem` (" \t\r\n%'\";" :: [Char])) xs) - _ -> 0.0 - Just x | not (null x) && last x == '%' -> - fromMaybe 0.0 $ safeRead (init x) - _ -> 0.0 + Nothing -> case lookup "style" attribs of + Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs -> + fromMaybe 0.0 $ safeRead (T.filter + (`notElem` (" \t\r\n%'\";" :: [Char])) xs) + _ -> 0.0 + Just (T.unsnoc -> Just (xs, '%')) -> + fromMaybe 0.0 $ safeRead xs + _ -> 0.0 if width > 0.0 then return $ width / 100.0 else return 0.0 @@ -562,7 +560,7 @@ pCell celltype = try $ do let extractAlign' [] = "" extractAlign' ("text-align":x:_) = x extractAlign' (_:xs) = extractAlign' xs - let extractAlign = extractAlign' . wordsBy (`elem` [' ','\t',';',':']) + let extractAlign = extractAlign' . splitTextBy (`elemText` " \t;:") let align = case maybeFromAttrib "align" tag `mplus` (extractAlign <$> maybeFromAttrib "style" tag) of Just "left" -> AlignLeft @@ -610,7 +608,7 @@ pFigure = try $ do let caption = fromMaybe mempty mbcap case B.toList <$> mbimg of Just [Image attr _ (url, tit)] -> - return $ B.para $ B.imageWith attr url ("fig:" ++ tit) caption + return $ B.para $ B.imageWith attr url ("fig:" <> tit) caption _ -> mzero pCodeBlock :: PandocMonad m => TagParser m Blocks @@ -618,21 +616,21 @@ pCodeBlock = try $ do TagOpen _ attr' <- pSatisfy (matchTagOpen "pre" []) let attr = toStringAttr attr' contents <- manyTill pAny (pCloses "pre" <|> eof) - let rawText = concatMap tagToString contents + let rawText = T.concat $ map tagToText contents -- drop leading newline if any - let result' = case rawText of - '\n':xs -> xs - _ -> rawText + let result' = case T.uncons rawText of + Just ('\n', xs) -> xs + _ -> rawText -- drop trailing newline if any - let result = case reverse result' of - '\n':_ -> init result' - _ -> result' + let result = case T.unsnoc result' of + Just (result'', '\n') -> result'' + _ -> result' return $ B.codeBlockWith (mkAttr attr) result -tagToString :: Tag Text -> String -tagToString (TagText s) = T.unpack s -tagToString (TagOpen "br" _) = "\n" -tagToString _ = "" +tagToText :: Tag Text -> Text +tagToText (TagText s) = s +tagToText (TagOpen "br" _) = "\n" +tagToText _ = "" inline :: PandocMonad m => TagParser m Inlines inline = choice @@ -667,7 +665,7 @@ pLocation = do pSat :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text) pSat f = do pos <- getPosition - token show (const pos) (\x -> if f x then Just x else Nothing) + token tshow (const pos) (\x -> if f x then Just x else Nothing) pSatisfy :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text) pSatisfy f = try $ optional pLocation >> pSat f @@ -688,10 +686,10 @@ pQ = choice $ map try [citedQuote, normalQuote] where citedQuote = do tag <- pSatisfy $ tagOpenLit "q" (any ((=="cite") . fst)) - url <- canonicalizeUrl $ T.unpack $ fromAttrib "cite" tag - let uid = fromMaybe (T.unpack $ fromAttrib "name" tag) $ + url <- canonicalizeUrl $ fromAttrib "cite" tag + let uid = fromMaybe (fromAttrib "name" tag) $ maybeFromAttrib "id" tag - let cls = words $ T.unpack $ fromAttrib "class" tag + let cls = T.words $ fromAttrib "class" tag makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url)]) normalQuote = do @@ -729,7 +727,7 @@ pSpanLike = TagOpen _ attrs <- pSatisfy $ tagOpenLit tagName (const True) let (ids, cs, kvs) = mkAttr . toStringAttr $ attrs content <- mconcat <$> manyTill inline (pCloses tagName <|> eof) - return $ B.spanWith (ids, T.unpack tagName : cs, kvs) content + return $ B.spanWith (ids, tagName : cs, kvs) content pSmall :: PandocMonad m => TagParser m Inlines pSmall = pInlinesInTags "small" (B.spanWith ("",["small"],[])) @@ -753,19 +751,18 @@ pLineBreak = do -- Unlike fromAttrib from tagsoup, this distinguishes -- between a missing attribute and an attribute with empty content. -maybeFromAttrib :: String -> Tag Text -> Maybe String -maybeFromAttrib name (TagOpen _ attrs) = - T.unpack <$> lookup (T.pack name) attrs +maybeFromAttrib :: Text -> Tag Text -> Maybe Text +maybeFromAttrib name (TagOpen _ attrs) = lookup name attrs maybeFromAttrib _ _ = Nothing pLink :: PandocMonad m => TagParser m Inlines pLink = try $ do tag <- pSatisfy $ tagOpenLit "a" (const True) - let title = T.unpack $ fromAttrib "title" tag + let title = fromAttrib "title" tag -- take id from id attribute if present, otherwise name - let uid = fromMaybe (T.unpack $ fromAttrib "name" tag) $ + let uid = fromMaybe (fromAttrib "name" tag) $ maybeFromAttrib "id" tag - let cls = words $ T.unpack $ fromAttrib "class" tag + let cls = T.words $ fromAttrib "class" tag lab <- mconcat <$> manyTill inline (pCloses "a") -- check for href; if href, then a link, otherwise a span case maybeFromAttrib "href" tag of @@ -778,34 +775,33 @@ pLink = try $ do pImage :: PandocMonad m => TagParser m Inlines pImage = do tag <- pSelfClosing (=="img") (isJust . lookup "src") - url <- canonicalizeUrl $ T.unpack $ fromAttrib "src" tag - let title = T.unpack $ fromAttrib "title" tag - let alt = T.unpack $ fromAttrib "alt" tag - let uid = T.unpack $ fromAttrib "id" tag - let cls = words $ T.unpack $ fromAttrib "class" tag + url <- canonicalizeUrl $ fromAttrib "src" tag + let title = fromAttrib "title" tag + let alt = fromAttrib "alt" tag + let uid = fromAttrib "id" tag + let cls = T.words $ fromAttrib "class" tag let getAtt k = case fromAttrib k tag of "" -> [] - v -> [(T.unpack k, T.unpack v)] + v -> [(k, v)] let kvs = concatMap getAtt ["width", "height", "sizes", "srcset"] return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt) -pCodeWithClass :: PandocMonad m => [(T.Text,String)] -> TagParser m Inlines -pCodeWithClass elemToClass = try $ do +pCodeWithClass :: PandocMonad m => [(T.Text,Text)] -> TagParser m Inlines +pCodeWithClass elemToClass = try $ do let tagTest = flip elem . fmap fst $ elemToClass TagOpen open attr' <- pSatisfy $ tagOpen tagTest (const True) result <- manyTill pAny (pCloses open) let (ids,cs,kvs) = mkAttr . toStringAttr $ attr' cs' = maybe cs (:cs) . lookup open $ elemToClass return . B.codeWith (ids,cs',kvs) . - unwords . lines . T.unpack . innerText $ result + T.unwords . T.lines . innerText $ result pCode :: PandocMonad m => TagParser m Inlines pCode = try $ do (TagOpen open attr') <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True) let attr = toStringAttr attr' result <- manyTill pAny (pCloses open) - return $ B.codeWith (mkAttr attr) $ unwords $ lines $ T.unpack $ - innerText result + return $ B.codeWith (mkAttr attr) $ T.unwords $ T.lines $ innerText result pSpan :: PandocMonad m => TagParser m Inlines pSpan = try $ do @@ -817,7 +813,7 @@ pSpan = try $ do where styleAttr = fromMaybe "" $ lookup "style" attr fontVariant = fromMaybe "" $ pickStyleAttrProps ["font-variant"] styleAttr classes = fromMaybe [] $ - words <$> lookup "class" attr + T.words <$> lookup "class" attr let tag = if isSmallCaps then B.smallcaps else B.spanWith (mkAttr attr) return $ tag contents @@ -829,18 +825,17 @@ pRawHtmlInline = do then pSatisfy (not . isBlockTag) else pSatisfy isInlineTag exts <- getOption readerExtensions - let raw = T.unpack $ renderTags' [result] + let raw = renderTags' [result] if extensionEnabled Ext_raw_html exts then return $ B.rawInline "html" raw else ignore raw -mathMLToTeXMath :: String -> Either String String +mathMLToTeXMath :: Text -> Either Text Text mathMLToTeXMath s = writeTeX <$> readMathML s -toStringAttr :: [(Text, Text)] -> [(String, String)] +toStringAttr :: [(Text, Text)] -> [(Text, Text)] toStringAttr = map go - where go (x,y) = (T.unpack (fromMaybe x $ T.stripPrefix "data-" x), - T.unpack y) + where go (x,y) = (fromMaybe x $ T.stripPrefix "data-" x, y) pScriptMath :: PandocMonad m => TagParser m Inlines pScriptMath = try $ do @@ -849,8 +844,7 @@ pScriptMath = try $ do Just x | "math/tex" `T.isPrefixOf` x -> return $ "display" `T.isSuffixOf` x _ -> mzero - contents <- T.unpack . innerText <$> - manyTill pAny (pSatisfy (matchTagClose "script")) + contents <- innerText <$> manyTill pAny (pSatisfy (matchTagClose "script")) return $ (if isdisplay then B.displayMath else B.math) contents pMath :: PandocMonad m => Bool -> TagParser m Inlines @@ -862,11 +856,11 @@ pMath inCase = try $ do unless inCase $ guard (maybe True (== mathMLNamespace) (lookup "xmlns" attr)) contents <- manyTill pAny (pSatisfy (matchTagClose "math")) - case mathMLToTeXMath (T.unpack $ renderTags $ + case mathMLToTeXMath (renderTags $ [open] <> contents <> [TagClose "math"]) of Left _ -> return $ B.spanWith ("",["math"],attr) $ B.text $ - T.unpack $ innerText contents - Right [] -> return mempty + innerText contents + Right "" -> return mempty Right x -> return $ case lookup "display" attr of Just "block" -> B.displayMath x _ -> B.math x @@ -925,7 +919,7 @@ pTagText = try $ do parsed <- lift $ lift $ flip runReaderT qu $ runParserT (many pTagContents) st "text" str case parsed of - Left _ -> throwError $ PandocParseError $ "Could not parse `" <> T.unpack str <> "'" + Left _ -> throwError $ PandocParseError $ "Could not parse `" <> str <> "'" Right result -> return $ mconcat result pBlank :: PandocMonad m => TagParser m () @@ -954,11 +948,11 @@ pRawTeX = do guardEnabled Ext_raw_tex inp <- getInput st <- getState - res <- lift $ runParserT (withRaw rawLaTeXInline) st "chunk" (T.unpack inp) + res <- lift $ runParserT (withRaw rawLaTeXInline) st "chunk" inp case res of Left _ -> mzero Right (contents, raw) -> do - _ <- count (length raw) anyChar + _ <- count (T.length raw) anyChar return $ B.rawInline "tex" contents pStr :: PandocMonad m => InlinesParser m Inlines @@ -966,7 +960,7 @@ pStr = do result <- many1 $ satisfy $ \c -> not (isSpace c) && not (isSpecial c) && not (isBad c) updateLastStrPos - return $ B.str result + return $ B.str $ T.pack result isSpecial :: Char -> Bool isSpecial '"' = True @@ -982,7 +976,7 @@ isSpecial '\8221' = True isSpecial _ = False pSymbol :: PandocMonad m => InlinesParser m Inlines -pSymbol = satisfy isSpecial >>= return . B.str . (:[]) +pSymbol = satisfy isSpecial >>= return . B.str . T.singleton isBad :: Char -> Bool isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML @@ -1019,7 +1013,7 @@ pBad = do '\158' -> '\382' '\159' -> '\376' _ -> '?' - return $ B.str [c'] + return $ B.str $ T.singleton c' pSpace :: PandocMonad m => InlinesParser m Inlines pSpace = many1 (satisfy isSpace) >>= \xs -> @@ -1156,8 +1150,8 @@ _ `closes` _ = False -- | Matches a stretch of HTML in balanced tags. htmlInBalanced :: Monad m - => (Tag String -> Bool) - -> ParserT String st m String + => (Tag Text -> Bool) + -> ParserT Text st m Text htmlInBalanced f = try $ do lookAhead (char '<') inp <- getInput @@ -1174,21 +1168,21 @@ htmlInBalanced f = try $ do (TagClose _ : TagPosition er ec : _) -> do let ls = er - sr let cs = ec - sc - lscontents <- unlines <$> count ls anyLine + lscontents <- T.unlines <$> count ls anyLine cscontents <- count cs anyChar closetag <- do x <- many (satisfy (/='>')) char '>' return (x <> ">") - return (lscontents <> cscontents <> closetag) + return $ lscontents <> T.pack cscontents <> T.pack closetag _ -> mzero _ -> mzero -htmlInBalanced' :: String - -> [Tag String] - -> [Tag String] +htmlInBalanced' :: Text + -> [Tag Text] + -> [Tag Text] htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts - where go :: Int -> [Tag String] -> Maybe [Tag String] + where go :: Int -> [Tag Text] -> Maybe [Tag Text] go n (t@(TagOpen tn' _):rest) | tn' == tagname = (t :) <$> go (n + 1) rest go 1 (t@(TagClose tn'):_) | tn' == tagname = @@ -1204,8 +1198,8 @@ hasTagWarning _ = False -- | Matches a tag meeting a certain condition. htmlTag :: (HasReaderOptions st, Monad m) - => (Tag String -> Bool) - -> ParserT [Char] st m (Tag String, String) + => (Tag Text -> Bool) + -> ParserT Text st m (Tag Text, Text) htmlTag f = try $ do lookAhead (char '<') startpos <- getPosition @@ -1213,7 +1207,7 @@ htmlTag f = try $ do let ts = canonicalizeTags $ parseTagsOptions parseOptions{ optTagWarning = False , optTagPosition = True } - (inp ++ " ") -- add space to ensure that + (inp <> " ") -- add space to ensure that -- we get a TagPosition after the tag (next, ln, col) <- case ts of (TagPosition{} : next : TagPosition ln col : _) @@ -1225,13 +1219,12 @@ htmlTag f = try $ do -- so we exclude . even though it's a valid character -- in XML element names let isNameChar c = isAlphaNum c || c == ':' || c == '-' || c == '_' - let isName s = case s of - [] -> False - (c:cs) -> isLetter c && all isNameChar cs - let isPI s = case s of - ('?':_) -> True -- processing instruction - _ -> False - + let isName s = case T.uncons s of + Nothing -> False + Just (c, cs) -> isLetter c && T.all isNameChar cs + let isPI s = case T.uncons s of + Just ('?', _) -> True -- processing instruction + _ -> False let endpos = if ln == 1 then setSourceColumn startpos (sourceColumn startpos + (col - 1)) @@ -1247,18 +1240,18 @@ htmlTag f = try $ do -- basic sanity check, since the parser is very forgiving -- and finds tags in stuff like x<y) guard $ isName tagname || isPI tagname - guard $ not $ null tagname + guard $ not $ T.null tagname -- <https://example.org> should NOT be a tag either. -- tagsoup will parse it as TagOpen "https:" [("example.org","")] - guard $ last tagname /= ':' + guard $ T.last tagname /= ':' char '<' rendered <- manyTill anyChar endAngle - return (next, "<" ++ rendered ++ ">") + return (next, T.pack $ "<" ++ rendered ++ ">") case next of TagComment s - | "<!--" `isPrefixOf` inp -> do + | "<!--" `T.isPrefixOf` inp -> do string "<!--" - count (length s) anyChar + count (T.length s) anyChar string "-->" stripComments <- getOption readerStripComments if stripComments @@ -1272,12 +1265,12 @@ htmlTag f = try $ do handleTag tagname _ -> mzero -mkAttr :: [(String, String)] -> Attr +mkAttr :: [(Text, Text)] -> Attr mkAttr attr = (attribsId, attribsClasses, attribsKV) where attribsId = fromMaybe "" $ lookup "id" attr - attribsClasses = words (fromMaybe "" $ lookup "class" attr) <> epubTypes + attribsClasses = T.words (fromMaybe "" $ lookup "class" attr) <> epubTypes attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr - epubTypes = words $ fromMaybe "" $ lookup "epub:type" attr + epubTypes = T.words $ fromMaybe "" $ lookup "epub:type" attr -- Strip namespace prefixes stripPrefixes :: [Tag Text] -> [Tag Text] @@ -1304,11 +1297,11 @@ isSpace _ = False -- Utilities -- | Adjusts a url according to the document's base URL. -canonicalizeUrl :: PandocMonad m => String -> TagParser m String +canonicalizeUrl :: PandocMonad m => Text -> TagParser m Text canonicalizeUrl url = do mbBaseHref <- baseHref <$> getState - return $ case (parseURIReference url, mbBaseHref) of - (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs) + return $ case (parseURIReference (T.unpack url), mbBaseHref) of + (Just rel, Just bs) -> tshow (rel `nonStrictRelativeTo` bs) _ -> url diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index 0a048b6e6..3fc2f9715 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE CPP #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Haddock Copyright : Copyright (C) 2013 David Lazar @@ -17,9 +18,10 @@ module Text.Pandoc.Readers.Haddock import Prelude import Control.Monad.Except (throwError) -import Data.List (intersperse, stripPrefix) +import Data.List (intersperse) import Data.Maybe (fromMaybe) import Data.Text (Text, unpack) +import qualified Data.Text as T import Documentation.Haddock.Parser import Documentation.Haddock.Types as H import Text.Pandoc.Builder (Blocks, Inlines) @@ -28,7 +30,7 @@ import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Options -import Text.Pandoc.Shared (crFilter, splitBy, trim) +import Text.Pandoc.Shared (crFilter, splitTextBy, trim) -- | Parse Haddock markup and return a 'Pandoc' document. @@ -51,7 +53,7 @@ docHToBlocks d' = case d' of DocEmpty -> mempty DocAppend (DocParagraph (DocHeader h)) (DocParagraph (DocAName ident)) -> - B.headerWith (ident,[],[]) (headerLevel h) + B.headerWith (T.pack ident,[],[]) (headerLevel h) (docHToInlines False $ headerTitle h) DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2) DocString _ -> inlineFallback @@ -73,12 +75,12 @@ docHToBlocks d' = DocDefList items -> B.definitionList (map (\(d,t) -> (docHToInlines False d, [consolidatePlains $ docHToBlocks t])) items) - DocCodeBlock (DocString s) -> B.codeBlockWith ("",[],[]) s + DocCodeBlock (DocString s) -> B.codeBlockWith ("",[],[]) $ T.pack s DocCodeBlock d -> B.para $ docHToInlines True d DocHyperlink _ -> inlineFallback DocPic _ -> inlineFallback DocAName _ -> inlineFallback - DocProperty s -> B.codeBlockWith ("",["property","haskell"],[]) (trim s) + DocProperty s -> B.codeBlockWith ("",["property","haskell"],[]) (trim $ T.pack s) DocExamples es -> mconcat $ map (\e -> makeExample ">>>" (exampleExpression e) (exampleResult e)) es DocTable H.Table{ tableHeaderRows = headerRows @@ -114,58 +116,58 @@ docHToInlines isCode d' = (docHToInlines isCode d2) DocString s | isCode -> mconcat $ intersperse B.linebreak - $ map B.code $ splitBy (=='\n') s - | otherwise -> B.text s + $ map B.code $ splitTextBy (=='\n') $ T.pack s + | otherwise -> B.text $ T.pack s DocParagraph _ -> mempty DocIdentifier ident -> case toRegular (DocIdentifier ident) of - DocIdentifier s -> B.codeWith ("",["haskell","identifier"],[]) s + DocIdentifier s -> B.codeWith ("",["haskell","identifier"],[]) $ T.pack s _ -> mempty - DocIdentifierUnchecked s -> B.codeWith ("",["haskell","identifier"],[]) s - DocModule s -> B.codeWith ("",["haskell","module"],[]) s + DocIdentifierUnchecked s -> B.codeWith ("",["haskell","identifier"],[]) $ T.pack s + DocModule s -> B.codeWith ("",["haskell","module"],[]) $ T.pack s DocWarning _ -> mempty -- TODO DocEmphasis d -> B.emph (docHToInlines isCode d) - DocMonospaced (DocString s) -> B.code s + DocMonospaced (DocString s) -> B.code $ T.pack s DocMonospaced d -> docHToInlines True d DocBold d -> B.strong (docHToInlines isCode d) - DocMathInline s -> B.math s - DocMathDisplay s -> B.displayMath s + DocMathInline s -> B.math $ T.pack s + DocMathDisplay s -> B.displayMath $ T.pack s DocHeader _ -> mempty DocUnorderedList _ -> mempty DocOrderedList _ -> mempty DocDefList _ -> mempty DocCodeBlock _ -> mempty - DocHyperlink h -> B.link (hyperlinkUrl h) (hyperlinkUrl h) - (maybe (B.text $ hyperlinkUrl h) (docHToInlines isCode) + DocHyperlink h -> B.link (T.pack $ hyperlinkUrl h) (T.pack $ hyperlinkUrl h) + (maybe (B.text $ T.pack $ hyperlinkUrl h) (docHToInlines isCode) (hyperlinkLabel h)) - DocPic p -> B.image (pictureUri p) (fromMaybe (pictureUri p) $ pictureTitle p) - (maybe mempty B.text $ pictureTitle p) - DocAName s -> B.spanWith (s,["anchor"],[]) mempty + DocPic p -> B.image (T.pack $ pictureUri p) (T.pack $ fromMaybe (pictureUri p) $ pictureTitle p) + (maybe mempty (B.text . T.pack) $ pictureTitle p) + DocAName s -> B.spanWith (T.pack s,["anchor"],[]) mempty DocProperty _ -> mempty DocExamples _ -> mempty DocTable _ -> mempty -- | Create an 'Example', stripping superfluous characters as appropriate -makeExample :: String -> String -> [String] -> Blocks +makeExample :: T.Text -> String -> [String] -> Blocks makeExample prompt expression result = B.para $ B.codeWith ("",["prompt"],[]) prompt <> B.space - <> B.codeWith ([], ["haskell","expr"], []) (trim expression) + <> B.codeWith ("", ["haskell","expr"], []) (trim $ T.pack expression) <> B.linebreak <> mconcat (intersperse B.linebreak $ map coder result') where -- 1. drop trailing whitespace from the prompt, remember the prefix - prefix = takeWhile (`elem` " \t") prompt + prefix = T.takeWhile (`elem` (" \t" :: String)) prompt -- 2. drop, if possible, the exact same sequence of whitespace -- characters from each result line -- -- 3. interpret lines that only contain the string "<BLANKLINE>" as an -- empty line - result' = map (substituteBlankLine . tryStripPrefix prefix) result + result' = map (substituteBlankLine . tryStripPrefix prefix . T.pack) result where - tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys + tryStripPrefix xs ys = fromMaybe ys $ T.stripPrefix xs ys substituteBlankLine "<BLANKLINE>" = "" substituteBlankLine line = line - coder = B.codeWith ([], ["result"], []) + coder = B.codeWith ("", ["result"], []) diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs index dbca5a59f..8efc230cc 100644 --- a/src/Text/Pandoc/Readers/Ipynb.hs +++ b/src/Text/Pandoc/Readers/Ipynb.hs @@ -4,6 +4,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Ipynb Copyright : Copyright (C) 2019 John MacFarlane @@ -19,7 +20,6 @@ module Text.Pandoc.Readers.Ipynb ( readIpynb ) where import Prelude import Data.Char (isDigit) -import Data.List (isPrefixOf) import Data.Maybe (fromMaybe) import Data.Digest.Pure.SHA (sha1, showDigest) import Text.Pandoc.Options @@ -30,6 +30,7 @@ import Text.Pandoc.Definition import Data.Ipynb as Ipynb import Text.Pandoc.Class import Text.Pandoc.MIME (extensionFromMimeType) +import Text.Pandoc.Shared (tshow) import Text.Pandoc.UTF8 import Text.Pandoc.Walk (walk) import Text.Pandoc.Error @@ -51,15 +52,15 @@ readIpynb opts t = do Left _ -> case eitherDecode src of Right (notebook3 :: Notebook NbV3) -> notebookToPandoc opts notebook3 - Left err -> throwError $ PandocIpynbDecodingError err + Left err -> throwError $ PandocIpynbDecodingError $ T.pack err notebookToPandoc :: PandocMonad m => ReaderOptions -> Notebook a -> m Pandoc notebookToPandoc opts notebook = do let cells = notebookCells notebook let (fmt,fmtminor) = notebookFormat notebook - let m = M.insert "nbformat" (MetaString $ show fmt) $ - M.insert "nbformat_minor" (MetaString $ show fmtminor) $ + let m = M.insert "nbformat" (MetaString $ tshow fmt) $ + M.insert "nbformat_minor" (MetaString $ tshow fmtminor) $ jsonMetaToMeta (notebookMetadata notebook) let lang = case M.lookup "kernelspec" m of Just (MetaMap ks) -> @@ -72,7 +73,7 @@ notebookToPandoc opts notebook = do return $ Pandoc (Meta $ M.insert "jupyter" (MetaMap m) mempty) blocks cellToBlocks :: PandocMonad m - => ReaderOptions -> String -> Cell a -> m B.Blocks + => ReaderOptions -> Text -> Cell a -> m B.Blocks cellToBlocks opts lang c = do let Source ts = cellSource c let source = mconcat ts @@ -100,19 +101,18 @@ cellToBlocks opts lang c = do "text/markdown" -> "markdown" "text/x-rsrt" -> "rst" _ -> format - return $ B.divWith ("",["cell","raw"],kvs) $ B.rawBlock format' - $ T.unpack source + return $ B.divWith ("",["cell","raw"],kvs) $ B.rawBlock format' source Ipynb.Code{ codeOutputs = outputs, codeExecutionCount = ec } -> do outputBlocks <- mconcat <$> mapM outputToBlock outputs - let kvs' = maybe kvs (\x -> ("execution_count", show x):kvs) ec + let kvs' = maybe kvs (\x -> ("execution_count", tshow x):kvs) ec return $ B.divWith ("",["cell","code"],kvs') $ - B.codeBlockWith ("",[lang],[]) (T.unpack source) + B.codeBlockWith ("",[lang],[]) source <> outputBlocks -- Remove attachment: prefix from images... fixImage :: Inline -> Inline fixImage (Image attr lab (src,tit)) - | "attachment:" `isPrefixOf` src = Image attr lab (drop 11 src, tit) + | "attachment:" `T.isPrefixOf` src = Image attr lab (T.drop 11 src, tit) fixImage x = x addAttachment :: PandocMonad m => (Text, MimeBundle) -> m () @@ -120,19 +120,19 @@ addAttachment (fname, mimeBundle) = do let fp = T.unpack fname case M.toList (unMimeBundle mimeBundle) of (mimeType, BinaryData bs):_ -> - insertMedia fp (Just $ T.unpack mimeType) (BL.fromStrict bs) + insertMedia fp (Just mimeType) (BL.fromStrict bs) (mimeType, TextualData t):_ -> - insertMedia fp (Just $ T.unpack mimeType) + insertMedia fp (Just mimeType) (BL.fromStrict $ TE.encodeUtf8 t) (mimeType, JsonData v):_ -> - insertMedia fp (Just $ T.unpack mimeType) (encode v) - [] -> report $ CouldNotFetchResource fp "no attachment" + insertMedia fp (Just mimeType) (encode v) + [] -> report $ CouldNotFetchResource fname "no attachment" outputToBlock :: PandocMonad m => Output a -> m B.Blocks outputToBlock Stream{ streamName = sName, streamText = Source text } = do - return $ B.divWith ("",["output","stream",T.unpack sName],[]) - $ B.codeBlock $ T.unpack . mconcat $ text + return $ B.divWith ("",["output","stream",sName],[]) + $ B.codeBlock $ T.concat $ text outputToBlock DisplayData{ displayData = data', displayMetadata = metadata' } = B.divWith ("",["output", "display_data"],[]) <$> @@ -140,15 +140,15 @@ outputToBlock DisplayData{ displayData = data', outputToBlock ExecuteResult{ executeCount = ec, executeData = data', executeMetadata = metadata' } = - B.divWith ("",["output", "execute_result"],[("execution_count",show ec)]) + B.divWith ("",["output", "execute_result"],[("execution_count",tshow ec)]) <$> handleData metadata' data' outputToBlock Err{ errName = ename, errValue = evalue, errTraceback = traceback } = do return $ B.divWith ("",["output","error"], - [("ename",T.unpack ename), - ("evalue",T.unpack evalue)]) - $ B.codeBlock $ T.unpack . T.unlines $ traceback + [("ename",ename), + ("evalue",evalue)]) + $ B.codeBlock $ T.unlines $ traceback -- We want to display the richest output possible given -- the output format. @@ -174,54 +174,53 @@ handleData metadata (MimeBundle mb) = let metaPairs = jsonMetaToPairs meta let bl = BL.fromStrict bs -- SHA1 hash for filename - let mt' = T.unpack mt - let fname = showDigest (sha1 bl) ++ - case extensionFromMimeType mt' of + let fname = T.pack (showDigest (sha1 bl)) <> + case extensionFromMimeType mt of Nothing -> "" - Just ext -> '.':ext - insertMedia fname (Just mt') bl + Just ext -> "." <> ext + insertMedia (T.unpack fname) (Just mt) bl return $ B.para $ B.imageWith ("",[],metaPairs) fname "" mempty | otherwise = return mempty dataBlock ("text/html", TextualData t) - = return $ B.rawBlock "html" $ T.unpack t + = return $ B.rawBlock "html" $ t dataBlock ("text/latex", TextualData t) - = return $ B.rawBlock "latex" $ T.unpack t + = return $ B.rawBlock "latex" $ t dataBlock ("text/plain", TextualData t) = - return $ B.codeBlock $ T.unpack t + return $ B.codeBlock $ t dataBlock (_, JsonData v) = - return $ B.codeBlockWith ("",["json"],[]) $ toStringLazy $ encode v + return $ B.codeBlockWith ("",["json"],[]) $ T.pack $ toStringLazy $ encode v dataBlock _ = return mempty -jsonMetaToMeta :: JSONMeta -> M.Map String MetaValue -jsonMetaToMeta = M.mapKeys T.unpack . M.map valueToMetaValue +jsonMetaToMeta :: JSONMeta -> M.Map Text MetaValue +jsonMetaToMeta = M.map valueToMetaValue where valueToMetaValue :: Value -> MetaValue valueToMetaValue x@(Object{}) = case fromJSON x of - Error s -> MetaString s + Error s -> MetaString $ T.pack s Success jm' -> MetaMap $ jsonMetaToMeta jm' valueToMetaValue x@(Array{}) = case fromJSON x of - Error s -> MetaString s + Error s -> MetaString $ T.pack s Success xs -> MetaList $ map valueToMetaValue xs valueToMetaValue (Bool b) = MetaBool b - valueToMetaValue (String t) = MetaString (T.unpack t) + valueToMetaValue (String t) = MetaString t valueToMetaValue (Number n) - | Scientific.isInteger n = MetaString (show (floor n :: Integer)) - | otherwise = MetaString (show n) + | Scientific.isInteger n = MetaString (tshow (floor n :: Integer)) + | otherwise = MetaString (tshow n) valueToMetaValue Aeson.Null = MetaString "" -jsonMetaToPairs :: JSONMeta -> [(String, String)] -jsonMetaToPairs = M.toList . M.mapKeys T.unpack . M.map +jsonMetaToPairs :: JSONMeta -> [(Text, Text)] +jsonMetaToPairs = M.toList . M.map (\case String t | not (T.all isDigit t) , t /= "true" , t /= "false" - -> T.unpack t - x -> UTF8.toStringLazy $ Aeson.encode x) + -> t + x -> T.pack $ UTF8.toStringLazy $ Aeson.encode x) diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index e074599eb..320b9c1dd 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.JATS Copyright : Copyright (C) 2017-2019 Hamish Mackenzie @@ -76,13 +77,13 @@ convertEntity :: String -> String convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) -- convenience function to get an attribute value, defaulting to "" -attrValue :: String -> Element -> String +attrValue :: String -> Element -> Text attrValue attr = fromMaybe "" . maybeAttrValue attr -maybeAttrValue :: String -> Element -> Maybe String +maybeAttrValue :: String -> Element -> Maybe Text maybeAttrValue attr elt = - lookupAttrBy (\x -> qName x == attr) (elAttribs elt) + T.pack <$> lookupAttrBy (\x -> qName x == attr) (elAttribs elt) -- convenience function named :: String -> Element -> Bool @@ -90,7 +91,7 @@ named s e = qName (elName e) == s -- -addMeta :: PandocMonad m => ToMetaValue a => String -> a -> JATS m () +addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> JATS m () addMeta field val = modify (setMeta field val) instance HasMeta JATSState where @@ -126,15 +127,13 @@ isBlockElement (Elem e) = qName (elName e) `S.member` blocktags isBlockElement _ = False -- Trim leading and trailing newline characters -trimNl :: String -> String -trimNl = reverse . go . reverse . go - where go ('\n':xs) = xs - go xs = xs +trimNl :: Text -> Text +trimNl = T.dropAround (== '\n') -- function that is used by both graphic (in parseBlock) -- and inline-graphic (in parseInline) getGraphic :: PandocMonad m - => Maybe (Inlines, String) -> Element -> JATS m Inlines + => Maybe (Inlines, Text) -> Element -> JATS m Inlines getGraphic mbfigdata e = do let atVal a = attrValue a e (ident, title, caption) = @@ -142,7 +141,7 @@ getGraphic mbfigdata e = do Just (capt, i) -> (i, "fig:" <> atVal "title", capt) Nothing -> (atVal "id", atVal "title", text (atVal "alt-text")) - attr = (ident, words $ atVal "role", []) + attr = (ident, T.words $ atVal "role", []) imageUrl = atVal "href" return $ imageWith attr imageUrl title caption @@ -155,8 +154,8 @@ parseBlock :: PandocMonad m => Content -> JATS m Blocks parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE parseBlock (Text (CData _ s _)) = if all isSpace s then return mempty - else return $ plain $ trimInlines $ text s -parseBlock (CRef x) = return $ plain $ str $ map toUpper x + else return $ plain $ trimInlines $ text $ T.pack s +parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x parseBlock (Elem e) = case qName (elName e) of "p" -> parseMixed para (elContent e) @@ -167,7 +166,7 @@ parseBlock (Elem e) = "bullet" -> bulletList <$> listitems listType -> do let start = fromMaybe 1 $ - (strContent <$> (filterElement (named "list-item") e + (textContent <$> (filterElement (named "list-item") e >>= filterElement (named "label"))) >>= safeRead orderedListWith (start, parseListStyleType listType, DefaultDelim) @@ -204,7 +203,7 @@ parseBlock (Elem e) = "" -> [] x -> [x] return $ codeBlockWith (attrValue "id" e, classes', []) - $ trimNl $ strContentRecursive e + $ trimNl $ textContentRecursive e parseBlockquote = do attrib <- case filterChild (named "attribution") e of Nothing -> return mempty @@ -267,9 +266,9 @@ parseBlock (Elem e) = Just "right" -> AlignRight Just "center" -> AlignCenter _ -> AlignDefault - let toWidth c = case findAttr (unqual "colwidth") c of + let toWidth c = case findAttrText (unqual "colwidth") c of Just w -> fromMaybe 0 - $ safeRead $ '0': filter (\x -> + $ safeRead $ "0" <> T.filter (\x -> isDigit x || x == '.') w Nothing -> 0 :: Double let numrows = foldl' max 0 $ map length bodyrows @@ -363,7 +362,7 @@ parseRefList e = do return mempty parseRef :: PandocMonad m - => Element -> JATS m (Map.Map String MetaValue) + => Element -> JATS m (Map.Map Text MetaValue) parseRef e = do let refId = text $ attrValue "id" e let getInlineText n = maybe (return mempty) getInlines . filterChild (named n) @@ -396,7 +395,7 @@ parseRef e = do family <- maybe (return mempty) getInlines $ filterChild (named "surname") nm return $ toMetaValue $ Map.fromList [ - ("given", given) + ("given" :: Text, given) , ("family", family) ] personGroups <- mapM (\pg -> @@ -406,7 +405,7 @@ parseRef e = do toMetaValue names)) personGroups' return $ Map.fromList $ - [ ("id", toMetaValue refId) + [ ("id" :: Text, toMetaValue refId) , ("type", toMetaValue refType) , ("title", toMetaValue refTitle) , ("container-title", toMetaValue refContainerTitle) @@ -415,7 +414,7 @@ parseRef e = do , ("title", toMetaValue refTitle) , ("issued", toMetaValue $ Map.fromList [ - ("year", refYear) + ("year" :: Text, refYear) ]) , ("volume", toMetaValue refVolume) , ("page", toMetaValue refPages) @@ -424,6 +423,15 @@ parseRef e = do Nothing -> return $ Map.insert "id" (toMetaValue refId) mempty -- TODO handle mixed-citation +findAttrText :: QName -> Element -> Maybe Text +findAttrText x = fmap T.pack . findAttr x + +textContent :: Element -> Text +textContent = T.pack . strContent + +textContentRecursive :: Element -> Text +textContentRecursive = T.pack . strContentRecursive + strContentRecursive :: Element -> String strContentRecursive = strContent . (\e' -> e'{ elContent = map elementToStr $ elContent e' }) @@ -433,9 +441,9 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing elementToStr x = x parseInline :: PandocMonad m => Content -> JATS m Inlines -parseInline (Text (CData _ s _)) = return $ text s +parseInline (Text (CData _ s _)) = return $ text $ T.pack s parseInline (CRef ref) = - return $ maybe (text $ map toUpper ref) text $ lookupEntity ref + return $ maybe (text $ T.toUpper $ T.pack ref) text $ T.pack <$> lookupEntity ref parseInline (Elem e) = case qName (elName e) of "italic" -> emph <$> innerInlines @@ -464,7 +472,7 @@ parseInline (Elem e) = "xref" -> do ils <- innerInlines let rid = attrValue "rid" e - let rids = words rid + let rids = T.words rid let refType = ("ref-type",) <$> maybeAttrValue "ref-type" e let attr = (attrValue "id" e, [], maybeToList refType) return $ if refType == Just ("ref-type","bibr") @@ -477,13 +485,13 @@ parseInline (Elem e) = , citationNoteNum = 0 , citationHash = 0}) rids) ils - else linkWith attr ('#' : rid) "" ils + else linkWith attr ("#" <> rid) "" ils "ext-link" -> do ils <- innerInlines - let title = fromMaybe "" $ findAttr (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e + let title = fromMaybe "" $ findAttrText (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of - Just h -> h - _ -> '#' : attrValue "rid" e + Just h -> T.pack h + _ -> "#" <> attrValue "rid" e let ils' = if ils == mempty then str href else ils let attr = (attrValue "id" e, [], []) return $ linkWith attr href title ils' @@ -491,23 +499,23 @@ parseInline (Elem e) = "disp-formula" -> formula displayMath "inline-formula" -> formula math "math" | qPrefix (elName e) == Just "mml" -> return . math $ mathML e - "tex-math" -> return . math $ strContent e + "tex-math" -> return . math $ textContent e - "email" -> return $ link ("mailto:" ++ strContent e) "" - $ str $ strContent e - "uri" -> return $ link (strContent e) "" $ str $ strContent e + "email" -> return $ link ("mailto:" <> textContent e) "" + $ str $ textContent e + "uri" -> return $ link (textContent e) "" $ str $ textContent e "fn" -> (note . mconcat) <$> mapM parseBlock (elContent e) _ -> innerInlines where innerInlines = (trimInlines . mconcat) <$> mapM parseInline (elContent e) mathML x = - case readMathML . showElement $ everywhere (mkT removePrefix) x of + case readMathML . T.pack . showElement $ everywhere (mkT removePrefix) x of Left _ -> mempty Right m -> writeTeX m formula constructor = do let whereToLook = fromMaybe e $ filterElement (named "alternatives") e - texMaths = map strContent $ + texMaths = map textContent $ filterChildren (named "tex-math") whereToLook mathMLs = map mathML $ filterChildren isMathML whereToLook @@ -520,4 +528,4 @@ parseInline (Elem e) = let classes' = case attrValue "language" e of "" -> [] l -> [l] - return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e + return $ codeWith (attrValue "id" e,classes',[]) $ textContentRecursive e diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index be19964a4..5c9a3e69c 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1,9 +1,10 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.LaTeX Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -29,9 +30,9 @@ import Prelude import Control.Applicative (many, optional, (<|>)) import Control.Monad import Control.Monad.Except (throwError) -import Data.Char (isDigit, isLetter, toLower, toUpper, chr) +import Data.Char (isDigit, isLetter, toUpper, chr) import Data.Default -import Data.List (intercalate, isPrefixOf) +import Data.List (intercalate) import qualified Data.Map as M import Data.Maybe (fromMaybe, maybeToList) import qualified Data.Set as Set @@ -44,7 +45,7 @@ import Text.Pandoc.Builder import Text.Pandoc.Class (PandocMonad, PandocPure, getResourcePath, lookupEnv, readFileFromDirs, report, setResourcePath, setTranslations, translateTerm, trace, fileExists) -import Text.Pandoc.Error (PandocError ( PandocParseError, PandocParsecError)) +import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError)) import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension) import Text.Pandoc.ImageSize (numUnit, showFl) import Text.Pandoc.Logging @@ -77,7 +78,7 @@ readLaTeX opts ltx = do (tokenize "source" (crFilter ltx)) case parsed of Right result -> return result - Left e -> throwError $ PandocParsecError (T.unpack ltx) e + Left e -> throwError $ PandocParsecError ltx e parseLaTeX :: PandocMonad m => LP m Pandoc parseLaTeX = do @@ -99,13 +100,13 @@ parseLaTeX = do walk (resolveRefs (sLabels st)) doc' return $ Pandoc meta bs' -resolveRefs :: M.Map String [Inline] -> Inline -> Inline +resolveRefs :: M.Map Text [Inline] -> Inline -> Inline resolveRefs labels x@(Link (ident,classes,kvs) _ _) = case (lookup "reference-type" kvs, lookup "reference" kvs) of (Just "ref", Just lab) -> case M.lookup lab labels of - Just txt -> Link (ident,classes,kvs) txt ('#':lab, "") + Just txt -> Link (ident,classes,kvs) txt ("#" <> lab, "") Nothing -> x _ -> x resolveRefs _ x = x @@ -123,11 +124,11 @@ resolveRefs _ x = x rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => ParserT String s m String + => ParserT Text s m Text rawLaTeXBlock = do lookAhead (try (char '\\' >> letter)) inp <- getInput - let toks = tokenize "source" $ T.pack inp + let toks = tokenize "source" inp snd <$> (rawLaTeXParser toks False (macroDef (const mempty)) blocks <|> (rawLaTeXParser toks True (do choice (map controlSeq @@ -151,14 +152,14 @@ beginOrEndCommand = try $ do (inlineEnvironments :: M.Map Text (LP PandocPure Inlines)) then mzero else return $ rawBlock "latex" - (T.unpack (txt <> untokenize rawargs)) + (txt <> untokenize rawargs) rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => ParserT String s m String + => ParserT Text s m Text rawLaTeXInline = do lookAhead (try (char '\\' >> letter)) inp <- getInput - let toks = tokenize "source" $ T.pack inp + let toks = tokenize "source" inp raw <- snd <$> ( rawLaTeXParser toks True (mempty <$ (controlSeq "input" >> skipMany opt >> braced)) @@ -167,23 +168,23 @@ rawLaTeXInline = do inlines ) finalbraces <- mconcat <$> many (try (string "{}")) -- see #5439 - return $ raw <> finalbraces + return $ raw <> T.pack finalbraces -inlineCommand :: PandocMonad m => ParserT String ParserState m Inlines +inlineCommand :: PandocMonad m => ParserT Text ParserState m Inlines inlineCommand = do lookAhead (try (char '\\' >> letter)) inp <- getInput - let toks = tokenize "source" $ T.pack inp + let toks = tokenize "source" inp fst <$> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand') inlines -- inline elements: word :: PandocMonad m => LP m Inlines -word = (str . T.unpack . untoken) <$> satisfyTok isWordTok +word = (str . untoken) <$> satisfyTok isWordTok regularSymbol :: PandocMonad m => LP m Inlines -regularSymbol = (str . T.unpack . untoken) <$> satisfyTok isRegularSymbol +regularSymbol = (str . untoken) <$> satisfyTok isRegularSymbol where isRegularSymbol (Tok _ Symbol t) = not $ T.any isSpecial t isRegularSymbol _ = False isSpecial c = c `Set.member` specialChars @@ -199,14 +200,14 @@ inlineGroup = do doLHSverb :: PandocMonad m => LP m Inlines doLHSverb = - (codeWith ("",["haskell"],[]) . T.unpack . untokenize) + (codeWith ("",["haskell"],[]) . untokenize) <$> manyTill (satisfyTok (not . isNewlineTok)) (symbol '|') -mkImage :: PandocMonad m => [(String, String)] -> String -> LP m Inlines -mkImage options src = do +mkImage :: PandocMonad m => [(Text, Text)] -> Text -> LP m Inlines +mkImage options (T.unpack -> src) = do let replaceTextwidth (k,v) = case numUnit v of - Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%") + Just (num, "\\textwidth") -> (k, showFl (num * 100) <> "%") _ -> (k, v) let kvs = map replaceTextwidth $ filter (\(k,_) -> k `elem` ["width", "height"]) options @@ -223,10 +224,10 @@ mkImage options src = do then return s' else findFile s es src' <- case takeExtension src of - "" | not (null defaultExt) -> return $ addExtension src defaultExt + "" | not (T.null defaultExt) -> return $ addExtension src $ T.unpack defaultExt | otherwise -> findFile src exts _ -> return src - return $ imageWith attr src' "" alt + return $ imageWith attr (T.pack src') "" alt doxspace :: PandocMonad m => LP m Inlines doxspace = @@ -435,7 +436,7 @@ siUnitMap = M.fromList , ("zetta", str "Z") ] -lit :: String -> LP m Inlines +lit :: Text -> LP m Inlines lit = pure . str removeDoubleQuotes :: Text -> Text @@ -471,7 +472,7 @@ quoted' :: PandocMonad m -> LP m () -> LP m Inlines quoted' f starter ender = do - startchs <- (T.unpack . untokenize) <$> starter + startchs <- untokenize <$> starter smart <- extensionEnabled Ext_smart <$> getOption readerExtensions if smart then do @@ -487,7 +488,7 @@ quoted' f starter ender = do enquote :: PandocMonad m => Bool -> Maybe Text -> LP m Inlines enquote starred mblang = do skipopts - let lang = (T.unpack <$> mblang) >>= babelLangToBCP47 + let lang = mblang >>= babelLangToBCP47 let langspan = case lang of Nothing -> id Just l -> spanWith ("",[],[("lang", renderLang l)]) @@ -503,27 +504,27 @@ blockquote citations mblang = do cs <- cites NormalCitation False return $ para (cite cs mempty) else return mempty - let lang = (T.unpack <$> mblang) >>= babelLangToBCP47 + let lang = mblang >>= babelLangToBCP47 let langdiv = case lang of Nothing -> id Just l -> divWith ("",[],[("lang", renderLang l)]) bs <- grouped block return $ blockQuote . langdiv $ (bs <> citePar) -doAcronym :: PandocMonad m => String -> LP m Inlines +doAcronym :: PandocMonad m => Text -> LP m Inlines doAcronym form = do acro <- braced - return . mconcat $ [spanWith ("",[],[("acronym-label", toksToString acro), - ("acronym-form", "singular+" ++ form)]) - $ str $ toksToString acro] + return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro), + ("acronym-form", "singular+" <> form)]) + $ str $ untokenize acro] -doAcronymPlural :: PandocMonad m => String -> LP m Inlines +doAcronymPlural :: PandocMonad m => Text -> LP m Inlines doAcronymPlural form = do acro <- braced plural <- lit "s" - return . mconcat $ [spanWith ("",[],[("acronym-label", toksToString acro), - ("acronym-form", "plural+" ++ form)]) $ - mconcat [str $ toksToString acro, plural]] + return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro), + ("acronym-form", "plural+" <> form)]) $ + mconcat [str $ untokenize acro, plural]] doverb :: PandocMonad m => LP m Inlines doverb = do @@ -532,7 +533,7 @@ doverb = do Just (c, ts) | T.null ts -> return c _ -> mzero withVerbatimMode $ - (code . T.unpack . untokenize) <$> + (code . untokenize) <$> manyTill (notFollowedBy newlineTok >> verbTok marker) (symbol marker) verbTok :: PandocMonad m => Char -> LP m Tok @@ -547,7 +548,7 @@ verbTok stopchar = do : totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp return $ Tok pos toktype t1 -listingsLanguage :: [(String, String)] -> Maybe String +listingsLanguage :: [(Text, Text)] -> Maybe Text listingsLanguage opts = case lookup "language" opts of Nothing -> Nothing @@ -562,10 +563,10 @@ dolstinline = do domintinline :: PandocMonad m => LP m Inlines domintinline = do skipopts - cls <- toksToString <$> braced + cls <- untokenize <$> braced doinlinecode [cls] -doinlinecode :: PandocMonad m => [String] -> LP m Inlines +doinlinecode :: PandocMonad m => [Text] -> LP m Inlines doinlinecode classes = do Tok _ Symbol t <- anySymbol marker <- case T.uncons t of @@ -573,14 +574,14 @@ doinlinecode classes = do _ -> mzero let stopchar = if marker == '{' then '}' else marker withVerbatimMode $ - (codeWith ("",classes,[]) . map nlToSpace . T.unpack . untokenize) <$> + (codeWith ("",classes,[]) . T.map nlToSpace . untokenize) <$> manyTill (verbTok stopchar) (symbol stopchar) nlToSpace :: Char -> Char nlToSpace '\n' = ' ' nlToSpace x = x -keyval :: PandocMonad m => LP m (String, String) +keyval :: PandocMonad m => LP m (Text, Text) keyval = try $ do Tok _ Word key <- satisfyTok isWordTok optional sp @@ -601,35 +602,34 @@ keyval = try $ do _ -> True)))))) optional (symbol ',') optional sp - return (T.unpack key, T.unpack $ T.strip val) + return (key, T.strip val) -keyvals :: PandocMonad m => LP m [(String, String)] +keyvals :: PandocMonad m => LP m [(Text, Text)] keyvals = try $ symbol '[' >> manyTill keyval (symbol ']') accent :: PandocMonad m => Char -> Maybe Char -> LP m Inlines accent combiningAccent fallBack = try $ do ils <- tok case toList ils of - (Str (x:xs) : ys) -> return $ fromList $ + (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $ -- try to normalize to the combined character: - Str (T.unpack - (Normalize.normalize Normalize.NFC - (T.pack [x, combiningAccent])) ++ xs) : ys - [Space] -> return $ str [fromMaybe combiningAccent fallBack] - [] -> return $ str [fromMaybe combiningAccent fallBack] + Str (Normalize.normalize Normalize.NFC + (T.pack [x, combiningAccent]) <> xs) : ys + [Space] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack + [] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack _ -> return ils -mathDisplay :: String -> Inlines +mathDisplay :: Text -> Inlines mathDisplay = displayMath . trimMath -mathInline :: String -> Inlines +mathInline :: Text -> Inlines mathInline = math . trimMath dollarsMath :: PandocMonad m => LP m Inlines dollarsMath = do symbol '$' display <- option False (True <$ symbol '$') - (do contents <- try $ T.unpack . untokenize <$> pDollarsMath 0 + (do contents <- try $ untokenize <$> pDollarsMath 0 if display then (mathDisplay contents <$ symbol '$') else return $ mathInline contents) @@ -682,10 +682,10 @@ simpleCiteArgs = try $ do } return $ addPrefix pre $ addSuffix suf $ map conv keys -citationLabel :: PandocMonad m => LP m String +citationLabel :: PandocMonad m => LP m Text citationLabel = do optional spaces - toksToString <$> + untokenize <$> (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar) <* optional spaces <* optional (symbol ',') @@ -729,10 +729,10 @@ cites mode multi = try $ do addMprenote _ _ = [] addMpostnote = addSuffix . mpostnote -citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines +citation :: PandocMonad m => Text -> CitationMode -> Bool -> LP m Inlines citation name mode multi = do (c,raw) <- withRaw $ cites mode multi - return $ cite c (rawInline "latex" $ "\\" ++ name ++ toksToString raw) + return $ cite c (rawInline "latex" $ "\\" <> name <> untokenize raw) handleCitationPart :: Inlines -> [Citation] handleCitationPart ils = @@ -756,7 +756,7 @@ complexNatbibCitation mode = try $ do case cs of [] -> mzero (c:cits) -> return $ cite (c{ citationMode = mode }:cits) - (rawInline "latex" $ "\\citetext" ++ toksToString raw) + (rawInline "latex" $ "\\citetext" <> untokenize raw) inNote :: Inlines -> Inlines inNote ils = @@ -780,10 +780,10 @@ tok :: PandocMonad m => LP m Inlines tok = try $ spaces >> grouped inline <|> inlineCommand' <|> singleChar' where singleChar' = do Tok _ _ t <- singleChar - return (str (T.unpack t)) + return $ str t opt :: PandocMonad m => LP m Inlines -opt = bracketed inline <|> (str . T.unpack <$> rawopt) +opt = bracketed inline <|> (str <$> rawopt) paropt :: PandocMonad m => LP m Inlines paropt = parenWrapped inline @@ -822,26 +822,31 @@ overlayTok = inBrackets :: Inlines -> Inlines inBrackets x = str "[" <> x <> str "]" -unescapeURL :: String -> String -unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs - where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String) -unescapeURL (x:xs) = x:unescapeURL xs -unescapeURL [] = "" +unescapeURL :: Text -> Text +unescapeURL = T.concat . go . T.splitOn "\\" + where + isEscapable c = c `elemText` "#$%&~_^\\{}" + go (x:xs) = x : map unescapeInterior xs + go [] = [] + unescapeInterior t + | Just (c, _) <- T.uncons t + , isEscapable c = t + | otherwise = "\\" <> t mathEnvWith :: PandocMonad m => (Inlines -> a) -> Maybe Text -> Text -> LP m a mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name where inner x = case innerEnv of Nothing -> x - Just y -> "\\begin{" ++ T.unpack y ++ "}\n" ++ x ++ - "\\end{" ++ T.unpack y ++ "}" + Just y -> "\\begin{" <> y <> "}\n" <> x <> + "\\end{" <> y <> "}" -mathEnv :: PandocMonad m => Text -> LP m String +mathEnv :: PandocMonad m => Text -> LP m Text mathEnv name = do skipopts optional blankline res <- manyTill anyTok (end_ name) - return $ stripTrailingNewlines $ T.unpack $ untokenize res + return $ stripTrailingNewlines $ untokenize res inlineEnvironment :: PandocMonad m => LP m Inlines inlineEnvironment = try $ do @@ -914,9 +919,9 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList , ("mbox", rawInlineOr "mbox" $ processHBox <$> tok) , ("hbox", rawInlineOr "hbox" $ processHBox <$> tok) , ("lettrine", optional opt >> extractSpaces (spanWith ("",["lettrine"],[])) <$> tok) - , ("(", mathInline . toksToString <$> manyTill anyTok (controlSeq ")")) - , ("[", mathDisplay . toksToString <$> manyTill anyTok (controlSeq "]")) - , ("ensuremath", mathInline . toksToString <$> braced) + , ("(", mathInline . untokenize <$> manyTill anyTok (controlSeq ")")) + , ("[", mathDisplay . untokenize <$> manyTill anyTok (controlSeq "]")) + , ("ensuremath", mathInline . untokenize <$> braced) , ("texorpdfstring", const <$> tok <*> tok) , ("P", lit "¶") , ("S", lit "§") @@ -1008,16 +1013,15 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList , ("lstinline", dolstinline) , ("mintinline", domintinline) , ("Verb", doverb) - , ("url", ((unescapeURL . T.unpack . untokenize) <$> bracedUrl) >>= \url -> + , ("url", ((unescapeURL . untokenize) <$> bracedUrl) >>= \url -> pure (link url "" (str url))) - , ("nolinkurl", ((unescapeURL . T.unpack . untokenize) <$> bracedUrl) >>= \url -> + , ("nolinkurl", ((unescapeURL . untokenize) <$> bracedUrl) >>= \url -> pure (code url)) - , ("href", (unescapeURL . toksToString <$> + , ("href", (unescapeURL . untokenize <$> bracedUrl <* optional sp) >>= \url -> tok >>= \lab -> pure (link url "" lab)) , ("includegraphics", do options <- option [] keyvals - src <- unescapeURL . T.unpack . - removeDoubleQuotes . untokenize <$> braced + src <- unescapeURL . removeDoubleQuotes . untokenize <$> braced mkImage options src) , ("enquote*", enquote True Nothing) , ("enquote", enquote False Nothing) @@ -1172,22 +1176,21 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList ifdim :: PandocMonad m => LP m Inlines ifdim = do contents <- manyTill anyTok (controlSeq "fi") - return $ rawInline "latex" $ T.unpack $ - "\\ifdim" <> untokenize contents <> "\\fi" + return $ rawInline "latex" $ "\\ifdim" <> untokenize contents <> "\\fi" makeUppercase :: Inlines -> Inlines -makeUppercase = fromList . walk (alterStr (map toUpper)) . toList +makeUppercase = fromList . walk (alterStr T.toUpper) . toList makeLowercase :: Inlines -> Inlines -makeLowercase = fromList . walk (alterStr (map toLower)) . toList +makeLowercase = fromList . walk (alterStr T.toLower) . toList -alterStr :: (String -> String) -> Inline -> Inline +alterStr :: (Text -> Text) -> Inline -> Inline alterStr f (Str xs) = Str (f xs) alterStr _ x = x foreignlanguage :: PandocMonad m => LP m Inlines foreignlanguage = do - babelLang <- T.unpack . untokenize <$> braced + babelLang <- untokenize <$> braced case babelLangToBCP47 babelLang of Just lang -> spanWith ("", [], [("lang", renderLang lang)]) <$> tok _ -> tok @@ -1196,24 +1199,24 @@ inlineLanguageCommands :: PandocMonad m => M.Map Text (LP m Inlines) inlineLanguageCommands = M.fromList $ mk <$> M.toList polyglossiaLangToBCP47 where mk (polyglossia, bcp47Func) = - ("text" <> T.pack polyglossia, inlineLanguage bcp47Func) + ("text" <> polyglossia, inlineLanguage bcp47Func) -inlineLanguage :: PandocMonad m => (String -> Lang) -> LP m Inlines +inlineLanguage :: PandocMonad m => (Text -> Lang) -> LP m Inlines inlineLanguage bcp47Func = do - o <- option "" $ (T.unpack . T.filter (\c -> c /= '[' && c /= ']')) + o <- option "" $ T.filter (\c -> c /= '[' && c /= ']') <$> rawopt let lang = renderLang $ bcp47Func o extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok hyperlink :: PandocMonad m => LP m Inlines hyperlink = try $ do - src <- toksToString <$> braced + src <- untokenize <$> braced lab <- tok - return $ link ('#':src) "" lab + return $ link ("#" <> src) "" lab hypertargetBlock :: PandocMonad m => LP m Blocks hypertargetBlock = try $ do - ref <- toksToString <$> braced + ref <- untokenize <$> braced bs <- grouped block case toList bs of [Header 1 (ident,_,_) _] | ident == ref -> return bs @@ -1221,7 +1224,7 @@ hypertargetBlock = try $ do hypertargetInline :: PandocMonad m => LP m Inlines hypertargetInline = try $ do - ref <- toksToString <$> braced + ref <- untokenize <$> braced ils <- grouped inline return $ spanWith (ref, [], []) ils @@ -1231,7 +1234,7 @@ romanNumeralUpper = romanNumeralLower :: (PandocMonad m) => LP m Inlines romanNumeralLower = - str . map toLower . toRomanNumeral <$> romanNumeralArg + str . T.toLower . toRomanNumeral <$> romanNumeralArg romanNumeralArg :: (PandocMonad m) => LP m Int romanNumeralArg = spaces *> (parser <|> inBraces) @@ -1248,18 +1251,18 @@ romanNumeralArg = spaces *> (parser <|> inBraces) let (digits, rest) = T.span isDigit s unless (T.null rest) $ Prelude.fail "Non-digits in argument to \\Rn or \\RN" - safeRead $ T.unpack digits + safeRead digits newToggle :: (Monoid a, PandocMonad m) => [Tok] -> LP m a newToggle name = do updateState $ \st -> - st{ sToggles = M.insert (toksToString name) False (sToggles st) } + st{ sToggles = M.insert (untokenize name) False (sToggles st) } return mempty setToggle :: (Monoid a, PandocMonad m) => Bool -> [Tok] -> LP m a setToggle on name = do updateState $ \st -> - st{ sToggles = M.adjust (const on) (toksToString name) (sToggles st) } + st{ sToggles = M.adjust (const on) (untokenize name) (sToggles st) } return mempty ifToggle :: PandocMonad m => LP m () @@ -1271,7 +1274,7 @@ ifToggle = do no <- braced toggles <- sToggles <$> getState inp <- getInput - let name' = toksToString name + let name' = untokenize name case M.lookup name' toggles of Just True -> setInput (yes ++ inp) Just False -> setInput (no ++ inp) @@ -1294,11 +1297,11 @@ ifstrequal = do else getInput >>= setInput . (ifnotequal ++) return mempty -coloredInline :: PandocMonad m => String -> LP m Inlines +coloredInline :: PandocMonad m => Text -> LP m Inlines coloredInline stylename = do skipopts color <- braced - spanWith ("",[],[("style",stylename ++ ": " ++ toksToString color)]) <$> tok + spanWith ("",[],[("style",stylename <> ": " <> untokenize color)]) <$> tok ttfamily :: PandocMonad m => LP m Inlines ttfamily = (code . stringify . toList) <$> tok @@ -1313,12 +1316,12 @@ rawInlineOr name' fallback = do processHBox :: Inlines -> Inlines processHBox = walk convert where - convert Space = Str [chr 160] -- non-breakable space - convert SoftBreak = Str [chr 160] -- non-breakable space + convert Space = Str $ T.singleton $ chr 160 -- non-breakable space + convert SoftBreak = Str $ T.singleton $ chr 160 -- non-breakable space convert LineBreak = Str "" convert x = x -getRawCommand :: PandocMonad m => Text -> Text -> LP m String +getRawCommand :: PandocMonad m => Text -> Text -> LP m Text getRawCommand name txt = do (_, rawargs) <- withRaw $ case name of @@ -1336,7 +1339,7 @@ getRawCommand name txt = do skipopts option "" (try dimenarg) void $ many braced - return $ T.unpack (txt <> untokenize rawargs) + return $ txt <> untokenize rawargs isFontSizeCommand :: Text -> Bool isFontSizeCommand "tiny" = True @@ -1396,17 +1399,17 @@ treatAsInline = Set.fromList dolabel :: PandocMonad m => LP m Inlines dolabel = do v <- braced - let refstr = toksToString v + let refstr = untokenize v return $ spanWith (refstr,[],[("label", refstr)]) - $ inBrackets $ str $ toksToString v + $ inBrackets $ str $ untokenize v -doref :: PandocMonad m => String -> LP m Inlines +doref :: PandocMonad m => Text -> LP m Inlines doref cls = do v <- braced - let refstr = toksToString v + let refstr = untokenize v return $ linkWith ("",[],[ ("reference-type", cls) , ("reference", refstr)]) - ('#':refstr) + ("#" <> refstr) "" (inBrackets $ str refstr) @@ -1435,11 +1438,11 @@ inline = (mempty <$ comment) <|> (str "\160" <$ symbol '~') <|> dollarsMath <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb) - <|> (str . (:[]) <$> primEscape) + <|> (str . T.singleton <$> primEscape) <|> regularSymbol <|> (do res <- symbolIn "#^'`\"[]&" pos <- getPosition - let s = T.unpack (untoken res) + let s = untoken res report $ ParsingUnescaped s pos return $ str s) @@ -1498,7 +1501,7 @@ include name = do -- note, we can have cc_by_4.0 for example... _ | name == "usepackage" -> addExtension f ".sty" | otherwise -> addExtension f ".tex" - dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" + dirs <- (map T.unpack . splitTextBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" mapM_ (insertIncluded dirs) (map addExt fs) return mempty @@ -1509,19 +1512,19 @@ insertIncluded :: PandocMonad m insertIncluded dirs f = do pos <- getPosition containers <- getIncludeFiles <$> getState - when (f `elem` containers) $ - throwError $ PandocParseError $ "Include file loop at " ++ show pos - updateState $ addIncludeFile f + when (T.pack f `elem` containers) $ + throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos + updateState $ addIncludeFile $ T.pack f mbcontents <- readFileFromDirs dirs f contents <- case mbcontents of Just s -> return s Nothing -> do - report $ CouldNotLoadIncludeFile f pos + report $ CouldNotLoadIncludeFile (T.pack f) pos return "" - getInput >>= setInput . (tokenize f (T.pack contents) ++) + getInput >>= setInput . (tokenize f contents ++) updateState dropLatestIncludeFile -addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m () +addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m () addMeta field val = updateState $ \st -> st{ sMeta = addMetaField field val $ sMeta st } @@ -1536,10 +1539,10 @@ authors = try $ do egroup addMeta "author" (map trimInlines auths) -macroDef :: (PandocMonad m, Monoid a) => (String -> a) -> LP m a +macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a macroDef constructor = do (_, s) <- withRaw (commandDef <|> environmentDef) - (constructor (T.unpack $ untokenize s) <$ + (constructor (untokenize s) <$ guardDisabled Ext_latex_macros) <|> return mempty where commandDef = do @@ -1632,7 +1635,7 @@ newcommand = do case M.lookup name macros of Just macro | mtype == "newcommand" -> do - report $ MacroAlreadyDefined (T.unpack txt) pos + report $ MacroAlreadyDefined txt pos return (name, macro) | mtype == "providecommand" -> return (name, macro) _ -> return (name, Macro ExpandWhenUsed argspecs optarg contents) @@ -1658,7 +1661,7 @@ newenvironment = do case M.lookup name macros of Just _ | mtype == "newenvironment" -> do - report $ MacroAlreadyDefined (T.unpack name) pos + report $ MacroAlreadyDefined name pos return Nothing | mtype == "provideenvironment" -> do return Nothing @@ -1669,7 +1672,7 @@ newenvironment = do bracketedNum :: PandocMonad m => LP m Int bracketedNum = do ds <- untokenize <$> bracketedToks - case safeRead (T.unpack ds) of + case safeRead ds of Just i -> return i _ -> return 0 @@ -1709,7 +1712,7 @@ section (ident, classes, kvs) lvl = do contents <- grouped inline lab <- option ident $ try (spaces >> controlSeq "label" - >> spaces >> toksToString <$> braced) + >> spaces >> untokenize <$> braced) when (lvl == 0) $ updateState $ \st -> st{ sHasChapters = True } unless ("unnumbered" `elem` classes) $ do @@ -1836,9 +1839,9 @@ blockCommands = M.fromList , ("centerline", (para . trimInlines) <$> (skipopts *> tok)) , ("caption", skipopts *> setCaption) , ("bibliography", mempty <$ (skipopts *> braced >>= - addMeta "bibliography" . splitBibs . toksToString)) + addMeta "bibliography" . splitBibs . untokenize)) , ("addbibresource", mempty <$ (skipopts *> braced >>= - addMeta "bibliography" . splitBibs . toksToString)) + addMeta "bibliography" . splitBibs . untokenize)) , ("endinput", mempty <$ skipMany tok) -- includes , ("lstinputlisting", inputListing) @@ -1941,18 +1944,18 @@ rawEnv name = do (bs, raw) <- withRaw $ env name blocks if parseRaw then return $ rawBlock "latex" - $ T.unpack $ beginCommand <> untokenize raw + $ beginCommand <> untokenize raw else do - report $ SkippedContent (T.unpack beginCommand) pos1 + report $ SkippedContent beginCommand pos1 pos2 <- getPosition - report $ SkippedContent ("\\end{" ++ T.unpack name ++ "}") pos2 + report $ SkippedContent ("\\end{" <> name <> "}") pos2 return bs rawVerbEnv :: PandocMonad m => Text -> LP m Blocks rawVerbEnv name = do pos <- getPosition (_, raw) <- withRaw $ verbEnv name - let raw' = "\\begin{" ++ T.unpack name ++ "}" ++ toksToString raw + let raw' = "\\begin{" <> name <> "}" <> untokenize raw exts <- getOption readerExtensions let parseRaw = extensionEnabled Ext_raw_tex exts if parseRaw @@ -1961,12 +1964,11 @@ rawVerbEnv name = do report $ SkippedContent raw' pos return mempty -verbEnv :: PandocMonad m => Text -> LP m String +verbEnv :: PandocMonad m => Text -> LP m Text verbEnv name = withVerbatimMode $ do optional blankline res <- manyTill anyTok (end_ name) - return $ T.unpack - $ stripTrailingNewline + return $ stripTrailingNewline $ untokenize $ res @@ -2010,11 +2012,11 @@ minted = do mintedAttr :: PandocMonad m => LP m Attr mintedAttr = do options <- option [] keyvals - lang <- toksToString <$> braced + lang <- untokenize <$> braced let kvs = [ (if k == "firstnumber" then "startFrom" else k, v) | (k,v) <- options ] - let classes = [ lang | not (null lang) ] ++ + let classes = [ lang | not (T.null lang) ] ++ [ "numberLines" | lookup "linenos" options == Just "true" ] return ("",classes,kvs) @@ -2023,14 +2025,14 @@ inputMinted :: PandocMonad m => LP m Blocks inputMinted = do pos <- getPosition attr <- mintedAttr - f <- filter (/='"') . toksToString <$> braced - dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" - mbCode <- readFileFromDirs dirs f + f <- T.filter (/='"') . untokenize <$> braced + dirs <- (map T.unpack . splitTextBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" + mbCode <- readFileFromDirs dirs (T.unpack f) rawcode <- case mbCode of Just s -> return s Nothing -> do report $ CouldNotLoadIncludeFile f pos - return [] + return "" return $ B.codeBlockWith attr rawcode letterContents :: PandocMonad m => LP m Blocks @@ -2052,10 +2054,10 @@ figure = try $ do addImageCaption :: PandocMonad m => Blocks -> LP m Blocks addImageCaption = walkM go where go (Image attr@(_, cls, kvs) alt (src,tit)) - | not ("fig:" `isPrefixOf` tit) = do + | not ("fig:" `T.isPrefixOf` tit) = do (mbcapt, mblab) <- sCaption <$> getState let (alt', tit') = case mbcapt of - Just ils -> (toList ils, "fig:" ++ tit) + Just ils -> (toList ils, "fig:" <> tit) Nothing -> (alt, tit) attr' = case mblab of Just lab -> (lab, cls, kvs) @@ -2090,23 +2092,23 @@ addImageCaption = walkM go return $ Image attr' alt' (src, tit') go x = return x -coloredBlock :: PandocMonad m => String -> LP m Blocks +coloredBlock :: PandocMonad m => Text -> LP m Blocks coloredBlock stylename = try $ do skipopts color <- braced notFollowedBy (grouped inline) - let constructor = divWith ("",[],[("style",stylename ++ ": " ++ toksToString color)]) + let constructor = divWith ("",[],[("style",stylename <> ": " <> untokenize color)]) constructor <$> grouped block graphicsPath :: PandocMonad m => LP m Blocks graphicsPath = do - ps <- map toksToString <$> + ps <- map (T.unpack . untokenize) <$> (bgroup *> spaces *> manyTill (braced <* spaces) egroup) - getResourcePath >>= setResourcePath . (++ ps) + getResourcePath >>= setResourcePath . (<> ps) return mempty -splitBibs :: String -> [Inlines] -splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',') +splitBibs :: Text -> [Inlines] +splitBibs = map (str . T.pack . flip replaceExtension "bib" . T.unpack . trim) . splitTextBy (==',') alltt :: Blocks -> Blocks alltt = walk strToCode @@ -2115,7 +2117,7 @@ alltt = walk strToCode strToCode SoftBreak = LineBreak strToCode x = x -parseListingsOptions :: [(String, String)] -> Attr +parseListingsOptions :: [(Text, Text)] -> Attr parseListingsOptions options = let kvs = [ (if k == "firstnumber" then "startFrom" @@ -2129,23 +2131,23 @@ inputListing :: PandocMonad m => LP m Blocks inputListing = do pos <- getPosition options <- option [] keyvals - f <- filter (/='"') . toksToString <$> braced - dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" - mbCode <- readFileFromDirs dirs f + f <- T.filter (/='"') . untokenize <$> braced + dirs <- (map T.unpack . splitTextBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" + mbCode <- readFileFromDirs dirs (T.unpack f) codeLines <- case mbCode of - Just s -> return $ lines s + Just s -> return $ T.lines s Nothing -> do report $ CouldNotLoadIncludeFile f pos return [] let (ident,classes,kvs) = parseListingsOptions options let classes' = (case listingsLanguage options of - Nothing -> (take 1 (languagesByExtension (takeExtension f)) ++) + Nothing -> (take 1 (languagesByExtension (T.pack $ takeExtension $ T.unpack f)) <>) Just _ -> id) classes let firstline = fromMaybe 1 $ lookup "firstline" options >>= safeRead let lastline = fromMaybe (length codeLines) $ lookup "lastline" options >>= safeRead - let codeContents = intercalate "\n" $ take (1 + lastline - firstline) $ + let codeContents = T.intercalate "\n" $ take (1 + lastline - firstline) $ drop (firstline - 1) codeLines return $ codeBlockWith (ident,classes',kvs) codeContents @@ -2176,12 +2178,12 @@ orderedList' = try $ do spaces let markerSpec = do symbol '[' - ts <- toksToString <$> manyTill anyTok (symbol ']') + ts <- untokenize <$> manyTill anyTok (symbol ']') case runParser anyOrderedListMarker def "option" ts of Right r -> return r Left _ -> do pos <- getPosition - report $ SkippedContent ("[" ++ ts ++ "]") pos + report $ SkippedContent ("[" <> ts <> "]") pos return (1, DefaultStyle, DefaultDelim) (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) markerSpec spaces @@ -2191,17 +2193,17 @@ orderedList' = try $ do spaces start <- option 1 $ try $ do pos <- getPosition controlSeq "setcounter" - ctr <- toksToString <$> braced - guard $ "enum" `isPrefixOf` ctr - guard $ all (`elem` ['i','v']) (drop 4 ctr) + ctr <- untokenize <$> braced + guard $ "enum" `T.isPrefixOf` ctr + guard $ T.all (`elem` ['i','v']) (T.drop 4 ctr) optional sp - num <- toksToString <$> braced + num <- untokenize <$> braced case safeRead num of Just i -> return (i + 1 :: Int) Nothing -> do report $ SkippedContent - ("\\setcounter{" ++ ctr ++ - "}{" ++ num ++ "}") pos + ("\\setcounter{" <> ctr <> + "}{" <> num <> "}") pos return 1 bs <- listenv "enumerate" (many item) return $ orderedListWith (start, style, delim) bs @@ -2235,7 +2237,7 @@ splitWordTok = do inp <- getInput case inp of (Tok spos Word t : rest) -> - setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) ++ rest + setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest _ -> return () parseAligns :: PandocMonad m => LP m [(Alignment, Double, ([Tok], [Tok]))] @@ -2256,7 +2258,7 @@ parseAligns = try $ do let alignSuffix = symbol '<' >> braced let colWidth = try $ do symbol '{' - ds <- trim . toksToString <$> manyTill anyTok (controlSeq "linewidth") + ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth") spaces symbol '}' case safeRead ds of @@ -2266,7 +2268,7 @@ parseAligns = try $ do pref <- option [] alignPrefix spaces al <- alignChar - width <- colWidth <|> option 0.0 (do s <- toksToString <$> braced + width <- colWidth <|> option 0.0 (do s <- untokenize <$> braced pos <- getPosition report $ SkippedContent s pos return 0.0) @@ -2276,13 +2278,13 @@ parseAligns = try $ do let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro symbol '*' spaces - ds <- trim . toksToString <$> braced + ds <- trim . untokenize <$> braced spaces spec <- braced case safeRead ds of Just n -> getInput >>= setInput . (mconcat (replicate n spec) ++) - Nothing -> Prelude.fail $ "Could not parse " ++ ds ++ " as number" + Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number" bgroup spaces maybeBar @@ -2379,7 +2381,7 @@ block = do <|> blockCommand <|> paragraph <|> grouped block - trace (take 60 $ show $ B.toList res) + trace (T.take 60 $ tshow $ B.toList res) return res blocks :: PandocMonad m => LP m Blocks @@ -2387,9 +2389,9 @@ blocks = mconcat <$> many block setDefaultLanguage :: PandocMonad m => LP m Blocks setDefaultLanguage = do - o <- option "" $ (T.unpack . T.filter (\c -> c /= '[' && c /= ']')) + o <- option "" $ T.filter (\c -> c /= '[' && c /= ']') <$> rawopt - polylang <- toksToString <$> braced + polylang <- untokenize <$> braced case M.lookup polylang polyglossiaLangToBCP47 of Nothing -> return mempty -- TODO mzero? warning? Just langFunc -> do diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs index b21398f93..7ec432a4a 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.LaTeX.Lang Copyright : Copyright (C) 2018-2019 John MacFarlane @@ -18,11 +19,12 @@ module Text.Pandoc.Readers.LaTeX.Lang where import Prelude import qualified Data.Map as M +import qualified Data.Text as T import Text.Pandoc.BCP47 (Lang(..)) -polyglossiaLangToBCP47 :: M.Map String (String -> Lang) +polyglossiaLangToBCP47 :: M.Map T.Text (T.Text -> Lang) polyglossiaLangToBCP47 = M.fromList - [ ("arabic", \o -> case filter (/=' ') o of + [ ("arabic", \o -> case T.filter (/=' ') o of "locale=algeria" -> Lang "ar" "" "DZ" [] "locale=mashriq" -> Lang "ar" "" "SY" [] "locale=libya" -> Lang "ar" "" "LY" [] @@ -30,7 +32,7 @@ polyglossiaLangToBCP47 = M.fromList "locale=mauritania" -> Lang "ar" "" "MR" [] "locale=tunisia" -> Lang "ar" "" "TN" [] _ -> Lang "ar" "" "" []) - , ("german", \o -> case filter (/=' ') o of + , ("german", \o -> case T.filter (/=' ') o of "spelling=old" -> Lang "de" "" "DE" ["1901"] "variant=austrian,spelling=old" -> Lang "de" "" "AT" ["1901"] @@ -40,11 +42,11 @@ polyglossiaLangToBCP47 = M.fromList "variant=swiss" -> Lang "de" "" "CH" [] _ -> Lang "de" "" "" []) , ("lsorbian", \_ -> Lang "dsb" "" "" []) - , ("greek", \o -> case filter (/=' ') o of + , ("greek", \o -> case T.filter (/=' ') o of "variant=poly" -> Lang "el" "" "polyton" [] "variant=ancient" -> Lang "grc" "" "" [] _ -> Lang "el" "" "" []) - , ("english", \o -> case filter (/=' ') o of + , ("english", \o -> case T.filter (/=' ') o of "variant=australian" -> Lang "en" "" "AU" [] "variant=canadian" -> Lang "en" "" "CA" [] "variant=british" -> Lang "en" "" "GB" [] @@ -52,7 +54,7 @@ polyglossiaLangToBCP47 = M.fromList "variant=american" -> Lang "en" "" "US" [] _ -> Lang "en" "" "" []) , ("usorbian", \_ -> Lang "hsb" "" "" []) - , ("latin", \o -> case filter (/=' ') o of + , ("latin", \o -> case T.filter (/=' ') o of "variant=classic" -> Lang "la" "" "" ["x-classic"] _ -> Lang "la" "" "" []) , ("slovenian", \_ -> Lang "sl" "" "" []) @@ -133,7 +135,7 @@ polyglossiaLangToBCP47 = M.fromList , ("vietnamese", \_ -> Lang "vi" "" "" []) ] -babelLangToBCP47 :: String -> Maybe Lang +babelLangToBCP47 :: T.Text -> Maybe Lang babelLangToBCP47 s = case s of "austrian" -> Just $ Lang "de" "" "AT" ["1901"] diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index 14cb408b0..a01abda46 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -97,8 +97,8 @@ import Text.Parsec.Pos newtype DottedNum = DottedNum [Int] deriving (Show) -renderDottedNum :: DottedNum -> String -renderDottedNum (DottedNum xs) = +renderDottedNum :: DottedNum -> T.Text +renderDottedNum (DottedNum xs) = T.pack $ intercalate "." (map show xs) incrementDottedNum :: Int -> DottedNum -> DottedNum @@ -111,18 +111,18 @@ data LaTeXState = LaTeXState{ sOptions :: ReaderOptions , sMeta :: Meta , sQuoteContext :: QuoteContext , sMacros :: M.Map Text Macro - , sContainers :: [String] + , sContainers :: [Text] , sLogMessages :: [LogMessage] - , sIdentifiers :: Set.Set String + , sIdentifiers :: Set.Set Text , sVerbatimMode :: Bool - , sCaption :: (Maybe Inlines, Maybe String) + , sCaption :: (Maybe Inlines, Maybe Text) , sInListItem :: Bool , sInTableCell :: Bool , sLastHeaderNum :: DottedNum , sLastFigureNum :: DottedNum - , sLabels :: M.Map String [Inline] + , sLabels :: M.Map Text [Inline] , sHasChapters :: Bool - , sToggles :: M.Map String Bool + , sToggles :: M.Map Text Bool , sExpanded :: Bool } deriving Show @@ -202,7 +202,7 @@ withVerbatimMode parser = do rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s) => [Tok] -> Bool -> LP m a -> LP m a - -> ParserT String s m (a, String) + -> ParserT Text s m (a, Text) rawLaTeXParser toks retokenize parser valParser = do pstate <- getState let lstate = def{ sOptions = extractReaderOptions pstate } @@ -233,16 +233,16 @@ rawLaTeXParser toks retokenize parser valParser = do , not (" " `T.isSuffixOf` result) -> result <> " " _ -> result - return (val, T.unpack result') + return (val, result') applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => String -> ParserT String s m String + => Text -> ParserT Text s m Text applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|> - do let retokenize = toksToString <$> many (satisfyTok (const True)) + do let retokenize = untokenize <$> many (satisfyTok (const True)) pstate <- getState let lstate = def{ sOptions = extractReaderOptions pstate , sMacros = extractMacros pstate } - res <- runParserT retokenize lstate "math" (tokenize "math" (T.pack s)) + res <- runParserT retokenize lstate "math" (tokenize "math" s) case res of Left e -> Prelude.fail (show e) Right s' -> return s' @@ -307,7 +307,7 @@ totoks pos t = : totoks (incSourceColumn pos 2) rest' | c == '#' -> let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest - in case safeRead (T.unpack t1) of + in case safeRead t1 of Just i -> Tok pos (Arg i) ("#" <> t1) : totoks (incSourceColumn pos (1 + T.length t1)) t2 @@ -447,7 +447,7 @@ doMacros' n inp = do handleMacros n' spos name ts = do when (n' > 20) -- detect macro expansion loops - $ throwError $ PandocMacroLoop (T.unpack name) + $ throwError $ PandocMacroLoop name macros <- sMacros <$> getState case M.lookup name macros of Nothing -> mzero @@ -588,7 +588,7 @@ primEscape = do | c >= '\64' && c <= '\127' -> return (chr (ord c - 64)) | otherwise -> return (chr (ord c + 64)) Nothing -> Prelude.fail "Empty content of Esc1" - Esc2 -> case safeRead ('0':'x':T.unpack (T.drop 2 t)) of + Esc2 -> case safeRead ("0x" <> T.drop 2 t) of Just x -> return (chr x) Nothing -> Prelude.fail $ "Could not read: " ++ T.unpack t _ -> Prelude.fail "Expected an Esc1 or Esc2 token" -- should not happen @@ -677,7 +677,7 @@ dimenarg = try $ do guard $ rest `elem` ["", "pt","pc","in","bp","cm","mm","dd","cc","sp"] return $ T.pack ['=' | ch] <> minus <> s -ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a +ignore :: (Monoid a, PandocMonad m) => Text -> ParserT s u m a ignore raw = do pos <- getPosition report $ SkippedContent raw pos diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs index ddf469222..feacb8450 100644 --- a/src/Text/Pandoc/Readers/Man.hs +++ b/src/Text/Pandoc/Readers/Man.hs @@ -1,4 +1,6 @@ -{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Man Copyright : Copyright (C) 2018-2019 Yan Pashkovsky and John MacFarlane @@ -63,7 +65,7 @@ readWithMTokens :: PandocMonad m -> [RoffToken] -- ^ input -> m (Either PandocError a) readWithMTokens parser state input = - let leftF = PandocParsecError . intercalate "\n" $ show <$> input + let leftF = PandocParsecError . T.pack . intercalate "\n" $ show <$> input in mapLeft leftF `liftM` runParserT parser state "source" input parseMan :: PandocMonad m => ManParser m Pandoc @@ -141,7 +143,7 @@ parseTable = do isHrule ([cellfmt], _) = columnType cellfmt `elem` ['_','-','='] isHrule (_, [RoffTokens ss]) = case Foldable.toList ss of - [TextLine [RoffStr [c]]] -> c `elem` ['_','-','='] + [TextLine [RoffStr (T.unpack -> [c])]] -> c `elem` ['_','-','='] _ -> False isHrule _ = False @@ -191,7 +193,7 @@ memptyLine = msatisfy isEmptyLine where isEmptyLine EmptyLine = True isEmptyLine _ = False -mmacro :: PandocMonad m => String -> ManParser m RoffToken +mmacro :: PandocMonad m => T.Text -> ManParser m RoffToken mmacro mk = msatisfy isControlLine where isControlLine (ControlLine mk' _ _) | mk == mk' = True | otherwise = False @@ -284,7 +286,7 @@ parseInline = try $ do _ -> mzero handleInlineMacro :: PandocMonad m - => String -> [Arg] -> SourcePos -> ManParser m Inlines + => T.Text -> [Arg] -> SourcePos -> ManParser m Inlines handleInlineMacro mname args _pos = do case mname of "UR" -> parseLink args @@ -339,7 +341,7 @@ bareIP = msatisfy isBareIP where isBareIP (ControlLine "IP" [] _) = True isBareIP _ = False -endmacro :: PandocMonad m => String -> ManParser m () +endmacro :: PandocMonad m => T.Text -> ManParser m () endmacro name = void (mmacro name) <|> lookAhead (void newBlockMacro) <|> lookAhead eof @@ -356,7 +358,7 @@ parseCodeBlock = try $ do toks <- (mmacro "nf" *> manyTill codeline (endmacro "fi")) <|> (mmacro "EX" *> manyTill codeline (endmacro "EE")) optional (mmacro "in") - return $ codeBlock (intercalate "\n" $ catMaybes toks) + return $ codeBlock (T.intercalate "\n" $ catMaybes toks) where @@ -366,7 +368,7 @@ parseCodeBlock = try $ do ControlLine "PP" _ _ -> return $ Just "" -- .PP sometimes used for blank line ControlLine mname args pos -> do (Just . query getText <$> handleInlineMacro mname args pos) <|> - do report $ SkippedContent ('.':mname) pos + do report $ SkippedContent ("." <> mname) pos return Nothing Tbl _ _ pos -> do report $ SkippedContent "TABLE" pos @@ -375,12 +377,12 @@ parseCodeBlock = try $ do TextLine ss | not (null ss) , all isFontToken ss -> return Nothing - | otherwise -> return $ Just $ linePartsToString ss + | otherwise -> return $ Just $ linePartsToText ss isFontToken Font{} = True isFontToken _ = False - getText :: Inline -> String + getText :: Inline -> T.Text getText (Str s) = s getText Space = " " getText (Code _ s) = s @@ -416,8 +418,8 @@ listItem mbListType = try $ do (ControlLine _ args _) <- mmacro "IP" case args of (arg1 : _) -> do - let cs = linePartsToString arg1 - let cs' = if not ('.' `elem` cs || ')' `elem` cs) then cs ++ "." else cs + let cs = linePartsToText arg1 + let cs' = if not (T.any (== '.') cs || T.any (== ')') cs) then cs <> "." else cs let lt = case Parsec.runParser anyOrderedListMarker defaultParserState "list marker" cs' of Right (start, listtype, listdelim) @@ -467,7 +469,7 @@ parseLink args = do ControlLine _ endargs _ <- mmacro "UE" let url = case args of [] -> "" - (x:_) -> linePartsToString x + (x:_) -> linePartsToText x return $ link url "" contents <> case endargs of [] -> mempty @@ -479,7 +481,7 @@ parseEmailLink args = do ControlLine _ endargs _ <- mmacro "ME" let url = case args of [] -> "" - (x:_) -> "mailto:" ++ linePartsToString x + (x:_) -> "mailto:" <> linePartsToText x return $ link url "" contents <> case endargs of [] -> mempty @@ -490,6 +492,6 @@ skipUnknownMacro = do tok <- mmacroAny case tok of ControlLine mkind _ pos -> do - report $ SkippedContent ('.':mkind) pos + report $ SkippedContent ("." <> mkind) pos return mempty _ -> Prelude.fail "the impossible happened" diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 4807baada..f8349ea99 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,7 +1,9 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE RelaxedPolyRec #-} {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TupleSections #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.Markdown Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -19,14 +21,15 @@ import Prelude import Control.Monad import Control.Monad.Except (throwError) import qualified Data.ByteString.Lazy as BS -import Data.Char (isAlphaNum, isPunctuation, isSpace, toLower) -import Data.List (intercalate, sortBy, transpose, elemIndex) +import Data.Char (isAlphaNum, isPunctuation, isSpace) +import Data.List (sortBy, transpose, elemIndex) import qualified Data.Map as M import Data.Maybe import Data.Ord (comparing) import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import qualified Data.YAML as YAML import qualified Data.YAML.Event as YE import System.FilePath (addExtension, takeExtension) @@ -47,7 +50,7 @@ import Text.Pandoc.Shared import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.XML (fromEntities) -type MarkdownParser m = ParserT [Char] ParserState m +type MarkdownParser m = ParserT Text ParserState m -- | Read markdown from an input string and return a Pandoc document. readMarkdown :: PandocMonad m @@ -56,7 +59,7 @@ readMarkdown :: PandocMonad m -> m Pandoc readMarkdown opts s = do parsed <- readWithM parseMarkdown def{ stateOptions = opts } - (T.unpack (crFilter s) ++ "\n\n") + (crFilter s <> "\n\n") case parsed of Right result -> return result Left e -> throwError e @@ -77,7 +80,7 @@ isHruleChar '-' = True isHruleChar '_' = True isHruleChar _ = False -setextHChars :: String +setextHChars :: [Char] setextHChars = "=-" isBlank :: Char -> Bool @@ -96,30 +99,30 @@ inList = do ctx <- stateParserContext <$> getState guard (ctx == ListItemState) -spnl :: PandocMonad m => ParserT [Char] st m () +spnl :: PandocMonad m => ParserT Text st m () spnl = try $ do skipSpaces optional newline skipSpaces notFollowedBy (char '\n') -spnl' :: PandocMonad m => ParserT [Char] st m String +spnl' :: PandocMonad m => ParserT Text st m Text spnl' = try $ do xs <- many spaceChar ys <- option "" $ try $ (:) <$> newline <*> (many spaceChar <* notFollowedBy (char '\n')) - return (xs ++ ys) + return $ T.pack $ xs ++ ys -indentSpaces :: PandocMonad m => MarkdownParser m String +indentSpaces :: PandocMonad m => MarkdownParser m Text indentSpaces = try $ do tabStop <- getOption readerTabStop - count tabStop (char ' ') <|> - string "\t" <?> "indentation" + countChar tabStop (char ' ') <|> + textStr "\t" <?> "indentation" -nonindentSpaces :: PandocMonad m => MarkdownParser m String +nonindentSpaces :: PandocMonad m => MarkdownParser m Text nonindentSpaces = do n <- skipNonindentSpaces - return $ replicate n ' ' + return $ T.replicate n " " -- returns number of spaces parsed skipNonindentSpaces :: PandocMonad m => MarkdownParser m Int @@ -139,8 +142,9 @@ inlinesInBalancedBrackets :: PandocMonad m => MarkdownParser m (F Inlines) inlinesInBalancedBrackets = try $ char '[' >> withRaw (go 1) >>= parseFromString inlines . stripBracket . snd - where stripBracket [] = [] - stripBracket xs = if last xs == ']' then init xs else xs + where stripBracket t = case T.unsnoc t of + Just (t', ']') -> t' + _ -> t go :: PandocMonad m => Int -> MarkdownParser m () go 0 = return () go openBrackets = @@ -160,7 +164,7 @@ inlinesInBalancedBrackets = -- document structure -- -rawTitleBlockLine :: PandocMonad m => MarkdownParser m String +rawTitleBlockLine :: PandocMonad m => MarkdownParser m Text rawTitleBlockLine = do char '%' skipSpaces @@ -169,7 +173,7 @@ rawTitleBlockLine = do notFollowedBy blankline skipSpaces anyLine - return $ trim $ unlines (first:rest) + return $ trim $ T.unlines (first:rest) titleLine :: PandocMonad m => MarkdownParser m (F Inlines) titleLine = try $ do @@ -222,9 +226,9 @@ yamlMetaBlock = try $ do notFollowedBy blankline -- if --- is followed by a blank it's an HRULE rawYamlLines <- manyTill anyLine stopLine -- by including --- and ..., we allow yaml blocks with just comments: - let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."])) + let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."])) optional blanklines - newMetaF <- yamlBsToMeta $ UTF8.fromStringLazy rawYaml + newMetaF <- yamlBsToMeta $ UTF8.fromTextLazy $ TL.fromStrict rawYaml -- Since `<>` is left-biased, existing values are not touched: updateState $ \st -> st{ stateMeta' = (stateMeta' st) <> newMetaF } return mempty @@ -255,7 +259,7 @@ yamlBsToMeta bstr = do return . return $ mempty Left (_pos, err') -> do logMessage $ CouldNotParseYamlMetadata - err' pos + (T.pack err') pos return . return $ mempty nodeToKey :: PandocMonad m => YAML.Node YE.Pos -> m Text @@ -270,11 +274,11 @@ toMetaValue x = -- Note: a standard quoted or unquoted YAML value will -- not end in a newline, but a "block" set off with -- `|` or `>` will. - if (T.pack "\n") `T.isSuffixOf` x - then parseFromString' (asBlocks <$> parseBlocks) (xstring <> "\n") + if "\n" `T.isSuffixOf` x + then parseFromString' (asBlocks <$> parseBlocks) (x <> "\n") else parseFromString' ((asInlines <$> try pInlines) <|> (asBlocks <$> parseBlocks)) - xstring + x where pInlines = trimInlinesF . mconcat <$> manyTill inline eof asBlocks p = do p' <- p @@ -282,7 +286,6 @@ toMetaValue x = asInlines p = do p' <- p return $ MetaInlines (B.toList p') - xstring = T.unpack x checkBoolean :: Text -> Maybe Bool checkBoolean t = @@ -298,8 +301,8 @@ yamlToMetaValue (YAML.Scalar _ x) = case x of YAML.SStr t -> toMetaValue t YAML.SBool b -> return $ return $ MetaBool b - YAML.SFloat d -> return $ return $ MetaString (show d) - YAML.SInt i -> return $ return $ MetaString (show i) + YAML.SFloat d -> return $ return $ MetaString $ tshow d + YAML.SInt i -> return $ return $ MetaString $ tshow i YAML.SUnknown _ t -> case checkBoolean t of Just b -> return $ return $ MetaBool b @@ -315,7 +318,7 @@ yamlToMetaValue _ = return $ return $ MetaString "" yamlMap :: PandocMonad m => M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos) - -> MarkdownParser m (F (M.Map String MetaValue)) + -> MarkdownParser m (F (M.Map Text MetaValue)) yamlMap o = do kvs <- forM (M.toList o) $ \(key, v) -> do k <- nodeToKey key @@ -323,12 +326,12 @@ yamlMap o = do let kvs' = filter (not . ignorable . fst) kvs (fmap M.fromList . sequence) <$> mapM toMeta kvs' where - ignorable t = (T.pack "_") `T.isSuffixOf` t + ignorable t = "_" `T.isSuffixOf` t toMeta (k, v) = do fv <- yamlToMetaValue v return $ do v' <- fv - return (T.unpack k, v') + return (k, v') stopLine :: PandocMonad m => MarkdownParser m () stopLine = try $ (string "---" <|> string "...") >> blankline >> return () @@ -343,14 +346,14 @@ mmdTitleBlock = try $ do updateState $ \st -> st{ stateMeta' = stateMeta' st <> return (Meta $ M.fromList kvPairs) } -kvPair :: PandocMonad m => Bool -> MarkdownParser m (String, MetaValue) +kvPair :: PandocMonad m => Bool -> MarkdownParser m (Text, MetaValue) kvPair allowEmpty = try $ do - key <- many1Till (alphaNum <|> oneOf "_- ") (char ':') - val <- trim <$> manyTill anyChar + key <- many1TillChar (alphaNum <|> oneOf "_- ") (char ':') + val <- trim <$> manyTillChar anyChar (try $ newline >> lookAhead (blankline <|> nonspaceChar)) - guard $ allowEmpty || not (null val) - let key' = concat $ words $ map toLower key - let val' = MetaBlocks $ B.toList $ B.plain $B.text val + guard $ allowEmpty || not (T.null val) + let key' = T.concat $ T.words $ T.toLower key + let val' = MetaBlocks $ B.toList $ B.plain $ B.text val return (key',val') parseMarkdown :: PandocMonad m => MarkdownParser m Pandoc @@ -380,13 +383,13 @@ referenceKey = try $ do (_,raw) <- reference char ':' skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[') - let sourceURL = fmap unwords $ many $ try $ do + let sourceURL = fmap T.unwords $ many $ try $ do skipMany spaceChar notFollowedBy' referenceTitle notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes notFollowedBy' (() <$ reference) - many1 $ notFollowedBy space >> litChar - let betweenAngles = try $ char '<' >> manyTill litChar (char '>') + many1Char $ notFollowedBy space >> litChar + let betweenAngles = try $ char '<' >> manyTillChar litChar (char '>') src <- try betweenAngles <|> sourceURL tit <- option "" referenceTitle attr <- option nullAttr $ try $ @@ -411,20 +414,20 @@ referenceKey = try $ do updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys } return $ return mempty -referenceTitle :: PandocMonad m => MarkdownParser m String +referenceTitle :: PandocMonad m => MarkdownParser m Text referenceTitle = try $ do skipSpaces >> optional newline >> skipSpaces quotedTitle '"' <|> quotedTitle '\'' <|> charsInBalanced '(' ')' litChar -- A link title in quotes -quotedTitle :: PandocMonad m => Char -> MarkdownParser m String +quotedTitle :: PandocMonad m => Char -> MarkdownParser m Text quotedTitle c = try $ do char c notFollowedBy spaces let pEnder = try $ char c >> notFollowedBy (satisfy isAlphaNum) - let regChunk = many1 (noneOf ['\\','\n','&',c]) <|> count 1 litChar - let nestedChunk = (\x -> [c] ++ x ++ [c]) <$> quotedTitle c - unwords . words . concat <$> manyTill (nestedChunk <|> regChunk) pEnder + let regChunk = many1Char (noneOf ['\\','\n','&',c]) <|> countChar 1 litChar + let nestedChunk = (\x -> T.singleton c <> x <> T.singleton c) <$> quotedTitle c + T.unwords . T.words . T.concat <$> manyTill (nestedChunk <|> regChunk) pEnder -- | PHP Markdown Extra style abbreviation key. Currently -- we just skip them, since Pandoc doesn't have an element for @@ -440,21 +443,21 @@ abbrevKey = do blanklines return $ return mempty -noteMarker :: PandocMonad m => MarkdownParser m String -noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']') +noteMarker :: PandocMonad m => MarkdownParser m Text +noteMarker = string "[^" >> many1TillChar (satisfy $ not . isBlank) (char ']') -rawLine :: PandocMonad m => MarkdownParser m String +rawLine :: PandocMonad m => MarkdownParser m Text rawLine = try $ do notFollowedBy blankline notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker optional indentSpaces anyLine -rawLines :: PandocMonad m => MarkdownParser m String +rawLines :: PandocMonad m => MarkdownParser m Text rawLines = do first <- anyLine rest <- many rawLine - return $ unlines (first:rest) + return $ T.unlines (first:rest) noteBlock :: PandocMonad m => MarkdownParser m (F Blocks) noteBlock = try $ do @@ -466,7 +469,7 @@ noteBlock = try $ do optional indentSpaces first <- rawLines rest <- many $ try $ blanklines >> indentSpaces >> rawLines - let raw = unlines (first:rest) ++ "\n" + let raw = T.unlines (first:rest) <> "\n" optional blanklines parsed <- parseFromString' parseBlocks raw oldnotes <- stateNotes' <$> getState @@ -510,7 +513,7 @@ block = do , para , plain ] <?> "block" - trace (take 60 $ show $ B.toList $ runF res defaultParserState) + trace (T.take 60 $ tshow $ B.toList $ runF res defaultParserState) return res -- @@ -570,7 +573,7 @@ mmdHeaderIdentifier :: PandocMonad m => MarkdownParser m Attr mmdHeaderIdentifier = do (_, raw) <- reference let raw' = trim $ stripFirstAndLast raw - let ident = concat $ words $ map toLower raw' + let ident = T.concat $ T.words $ T.toLower raw' let attr = (ident, [], []) guardDisabled Ext_implicit_header_references <|> registerImplicitHeader raw' attr @@ -600,20 +603,20 @@ setextHeader = try $ do <|> registerImplicitHeader raw attr' return $ B.headerWith attr' level <$> text -registerImplicitHeader :: PandocMonad m => String -> Attr -> MarkdownParser m () +registerImplicitHeader :: PandocMonad m => Text -> Attr -> MarkdownParser m () registerImplicitHeader raw attr@(ident, _, _) - | null raw = return () + | T.null raw = return () | otherwise = do - let key = toKey $ "[" ++ raw ++ "]" + let key = toKey $ "[" <> raw <> "]" updateState $ \s -> - s { stateHeaderKeys = M.insert key (('#':ident,""), attr) + s { stateHeaderKeys = M.insert key (("#" <> ident,""), attr) (stateHeaderKeys s) } -- -- hrule block -- -hrule :: PandocMonad m => ParserT [Char] st m (F Blocks) +hrule :: PandocMonad m => ParserT Text st m (F Blocks) hrule = try $ do skipSpaces start <- satisfy isHruleChar @@ -627,13 +630,13 @@ hrule = try $ do -- code blocks -- -indentedLine :: PandocMonad m => MarkdownParser m String +indentedLine :: PandocMonad m => MarkdownParser m Text indentedLine = indentSpaces >> anyLineNewline blockDelimiter :: PandocMonad m => (Char -> Bool) -> Maybe Int - -> ParserT [Char] ParserState m Int + -> ParserT Text ParserState m Int blockDelimiter f len = try $ do skipNonindentSpaces c <- lookAhead (satisfy f) @@ -652,11 +655,11 @@ attributes = try $ do attribute :: PandocMonad m => MarkdownParser m (Attr -> Attr) attribute = identifierAttr <|> classAttr <|> keyValAttr <|> specialAttr -identifier :: PandocMonad m => MarkdownParser m String +identifier :: PandocMonad m => MarkdownParser m Text identifier = do first <- letter rest <- many $ alphaNum <|> oneOf "-_:." - return (first:rest) + return $ T.pack (first:rest) identifierAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) identifierAttr = try $ do @@ -674,15 +677,15 @@ keyValAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) keyValAttr = try $ do key <- identifier char '=' - val <- enclosed (char '"') (char '"') litChar - <|> enclosed (char '\'') (char '\'') litChar + val <- T.pack <$> enclosed (char '"') (char '"') litChar + <|> T.pack <$> enclosed (char '\'') (char '\'') litChar <|> ("" <$ try (string "\"\"")) <|> ("" <$ try (string "''")) - <|> many (escapedChar' <|> noneOf " \t\n\r}") + <|> manyChar (escapedChar' <|> noneOf " \t\n\r}") return $ \(id',cs,kvs) -> case key of "id" -> (val,cs,kvs) - "class" -> (id',cs ++ words val,kvs) + "class" -> (id',cs ++ T.words val,kvs) _ -> (id',cs,kvs ++ [(key,val)]) specialAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) @@ -690,12 +693,12 @@ specialAttr = do char '-' return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs) -rawAttribute :: PandocMonad m => MarkdownParser m String +rawAttribute :: PandocMonad m => MarkdownParser m Text rawAttribute = do char '{' skipMany spaceChar char '=' - format <- many1 $ satisfy (\c -> isAlphaNum c || c `elem` "-_") + format <- many1Char $ satisfy (\c -> isAlphaNum c || c `elem` ['-', '_']) skipMany spaceChar char '}' return format @@ -703,7 +706,7 @@ rawAttribute = do codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks) codeBlockFenced = try $ do indentchars <- nonindentSpaces - let indentLevel = length indentchars + let indentLevel = T.length indentchars c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~')) <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`')) size <- blockDelimiter (== c) Nothing @@ -713,9 +716,9 @@ codeBlockFenced = try $ do <|> (Right <$> option ("",[],[]) (try (guardEnabled Ext_fenced_code_attributes >> attributes) - <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar))) + <|> ((\x -> ("",[toLanguageId x],[])) <$> many1Char nonspaceChar))) blankline - contents <- intercalate "\n" <$> + contents <- T.intercalate "\n" <$> manyTill (gobbleAtMostSpaces indentLevel >> anyLine) (try $ do blockDelimiter (== c) (Just size) @@ -726,8 +729,8 @@ codeBlockFenced = try $ do Right attr -> B.codeBlockWith attr contents -- correctly handle github language identifiers -toLanguageId :: String -> String -toLanguageId = map toLower . go +toLanguageId :: Text -> Text +toLanguageId = T.toLower . go where go "c++" = "cpp" go "objective-c" = "objectivec" go x = x @@ -737,11 +740,11 @@ codeBlockIndented = do contents <- many1 (indentedLine <|> try (do b <- blanklines l <- indentedLine - return $ b ++ l)) + return $ b <> l)) optional blanklines classes <- getOption readerIndentedCodeClasses return $ return $ B.codeBlockWith ("", classes, []) $ - stripTrailingNewlines $ concat contents + stripTrailingNewlines $ T.concat contents lhsCodeBlock :: PandocMonad m => MarkdownParser m (F Blocks) lhsCodeBlock = do @@ -751,33 +754,33 @@ lhsCodeBlock = do <|> (return . B.codeBlockWith ("",["haskell"],[]) <$> lhsCodeBlockInverseBird) -lhsCodeBlockLaTeX :: PandocMonad m => MarkdownParser m String +lhsCodeBlockLaTeX :: PandocMonad m => MarkdownParser m Text lhsCodeBlockLaTeX = try $ do string "\\begin{code}" manyTill spaceChar newline - contents <- many1Till anyChar (try $ string "\\end{code}") + contents <- many1TillChar anyChar (try $ string "\\end{code}") blanklines return $ stripTrailingNewlines contents -lhsCodeBlockBird :: PandocMonad m => MarkdownParser m String +lhsCodeBlockBird :: PandocMonad m => MarkdownParser m Text lhsCodeBlockBird = lhsCodeBlockBirdWith '>' -lhsCodeBlockInverseBird :: PandocMonad m => MarkdownParser m String +lhsCodeBlockInverseBird :: PandocMonad m => MarkdownParser m Text lhsCodeBlockInverseBird = lhsCodeBlockBirdWith '<' -lhsCodeBlockBirdWith :: PandocMonad m => Char -> MarkdownParser m String +lhsCodeBlockBirdWith :: PandocMonad m => Char -> MarkdownParser m Text lhsCodeBlockBirdWith c = try $ do pos <- getPosition when (sourceColumn pos /= 1) $ Prelude.fail "Not in first column" lns <- many1 $ birdTrackLine c -- if (as is normal) there is always a space after >, drop it - let lns' = if all (\ln -> null ln || take 1 ln == " ") lns - then map (drop 1) lns + let lns' = if all (\ln -> T.null ln || T.take 1 ln == " ") lns + then map (T.drop 1) lns else lns blanklines - return $ intercalate "\n" lns' + return $ T.intercalate "\n" lns' -birdTrackLine :: PandocMonad m => Char -> ParserT [Char] st m String +birdTrackLine :: PandocMonad m => Char -> ParserT Text st m Text birdTrackLine c = try $ do char c -- allow html tags on left margin: @@ -791,12 +794,12 @@ birdTrackLine c = try $ do emailBlockQuoteStart :: PandocMonad m => MarkdownParser m Char emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' <* optional (char ' ') -emailBlockQuote :: PandocMonad m => MarkdownParser m [String] +emailBlockQuote :: PandocMonad m => MarkdownParser m [Text] emailBlockQuote = try $ do emailBlockQuoteStart - let emailLine = many $ nonEndline <|> try - (endline >> notFollowedBy emailBlockQuoteStart >> - return '\n') + let emailLine = manyChar $ nonEndline <|> try + (endline >> notFollowedBy emailBlockQuoteStart >> + return '\n') let emailSep = try (newline >> emailBlockQuoteStart) first <- emailLine rest <- many $ try $ emailSep >> emailLine @@ -809,7 +812,7 @@ blockQuote :: PandocMonad m => MarkdownParser m (F Blocks) blockQuote = do raw <- emailBlockQuote -- parse the extracted block, which may contain various block elements: - contents <- parseFromString' parseBlocks $ intercalate "\n" raw ++ "\n\n" + contents <- parseFromString' parseBlocks $ T.intercalate "\n" raw <> "\n\n" return $ B.blockQuote <$> contents -- @@ -833,7 +836,7 @@ orderedListStart mbstydelim = try $ do skipNonindentSpaces notFollowedBy $ string "p." >> spaceChar >> digit -- page number (do guardDisabled Ext_fancy_lists - start <- many1 digit >>= safeRead + start <- many1Char digit >>= safeRead char '.' gobbleSpaces 1 <|> () <$ lookAhead newline optional $ try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar) @@ -857,7 +860,7 @@ orderedListStart mbstydelim = try $ do listStart :: PandocMonad m => MarkdownParser m () listStart = bulletListStart <|> Control.Monad.void (orderedListStart Nothing) -listLine :: PandocMonad m => Int -> MarkdownParser m String +listLine :: PandocMonad m => Int -> MarkdownParser m Text listLine continuationIndent = try $ do notFollowedBy' (do gobbleSpaces continuationIndent skipMany spaceChar @@ -867,19 +870,19 @@ listLine continuationIndent = try $ do optional (() <$ gobbleSpaces continuationIndent) listLineCommon -listLineCommon :: PandocMonad m => MarkdownParser m String -listLineCommon = concat <$> manyTill - ( many1 (satisfy $ \c -> c `notElem` ['\n', '<', '`']) +listLineCommon :: PandocMonad m => MarkdownParser m Text +listLineCommon = T.concat <$> manyTill + ( many1Char (satisfy $ \c -> c `notElem` ['\n', '<', '`']) <|> fmap snd (withRaw code) <|> fmap snd (htmlTag isCommentTag) - <|> count 1 anyChar + <|> countChar 1 anyChar ) newline -- parse raw text for one list item, excluding start marker and continuations rawListItem :: PandocMonad m => Bool -- four space rule -> MarkdownParser m a - -> MarkdownParser m (String, Int) + -> MarkdownParser m (Text, Int) rawListItem fourSpaceRule start = try $ do pos1 <- getPosition start @@ -892,14 +895,14 @@ rawListItem fourSpaceRule start = try $ do notFollowedBy (() <$ codeBlockFenced) notFollowedBy blankline listLine continuationIndent) - blanks <- many blankline - let result = unlines (first:rest) ++ blanks + blanks <- manyChar blankline + let result = T.unlines (first:rest) <> blanks return (result, continuationIndent) -- continuation of a list item - indented and separated by blankline -- or (in compact lists) endline. -- note: nested lists are parsed as continuations -listContinuation :: PandocMonad m => Int -> MarkdownParser m String +listContinuation :: PandocMonad m => Int -> MarkdownParser m Text listContinuation continuationIndent = try $ do x <- try $ do notFollowedBy blankline @@ -913,12 +916,12 @@ listContinuation continuationIndent = try $ do notFollowedByDivCloser gobbleSpaces continuationIndent <|> notFollowedBy' listStart anyLineNewline - blanks <- many blankline - return $ concat (x:xs) ++ blanks + blanks <- manyChar blankline + return $ T.concat (x:xs) <> blanks -- Variant of blanklines that doesn't require blank lines -- before a fence or eof. -blanklines' :: PandocMonad m => MarkdownParser m [Char] +blanklines' :: PandocMonad m => MarkdownParser m Text blanklines' = blanklines <|> try checkDivCloser where checkDivCloser = do guardEnabled Ext_fenced_divs @@ -954,7 +957,7 @@ listItem fourSpaceRule start = try $ do (first, continuationIndent) <- rawListItem fourSpaceRule start continuations <- many (listContinuation continuationIndent) -- parse the extracted block, which may contain various block elements: - let raw = concat (first:continuations) + let raw = T.concat (first:continuations) contents <- parseFromString' parseBlocks raw updateState (\st -> st {stateParserContext = oldContext}) exts <- getOption readerExtensions @@ -990,7 +993,7 @@ defListMarker = do sps <- nonindentSpaces char ':' <|> char '~' tabStop <- getOption readerTabStop - let remaining = tabStop - (length sps + 1) + let remaining = tabStop - (T.length sps + 1) if remaining > 0 then try (count remaining (char ' ')) <|> string "\t" <|> many1 spaceChar else mzero @@ -1001,11 +1004,11 @@ definitionListItem compact = try $ do rawLine' <- anyLine raw <- many1 $ defRawBlock compact term <- parseFromString' (trimInlinesF <$> inlines) rawLine' - contents <- mapM (parseFromString' parseBlocks . (++"\n")) raw + contents <- mapM (parseFromString' parseBlocks . (<> "\n")) raw optional blanklines return $ liftM2 (,) term (sequence contents) -defRawBlock :: PandocMonad m => Bool -> MarkdownParser m String +defRawBlock :: PandocMonad m => Bool -> MarkdownParser m Text defRawBlock compact = try $ do hasBlank <- option False $ blankline >> return True defListMarker @@ -1020,13 +1023,13 @@ defRawBlock compact = try $ do <|> notFollowedBy defListMarker anyLine ) rawlines <- many dline - cont <- fmap concat $ many $ try $ do + cont <- fmap T.concat $ many $ try $ do trailing <- option "" blanklines ln <- indentSpaces >> notFollowedBy blankline >> anyLine lns <- many dline - return $ trailing ++ unlines (ln:lns) - return $ trimr (firstline ++ unlines rawlines ++ cont) ++ - if hasBlank || not (null cont) then "\n\n" else "" + return $ trailing <> T.unlines (ln:lns) + return $ trimr (firstline <> T.unlines rawlines <> cont) <> + if hasBlank || not (T.null cont) then "\n\n" else "" definitionList :: PandocMonad m => MarkdownParser m (F Blocks) definitionList = try $ do @@ -1063,7 +1066,7 @@ para = try $ do | not (null alt) -> -- the fig: at beginning of title indicates a figure return $ B.singleton - $ Image attr alt (src,'f':'i':'g':':':tit) + $ Image attr alt (src, "fig:" <> tit) _ -> return x' | otherwise = x result <- implicitFigures . trimInlinesF <$> inlines1 @@ -1082,7 +1085,7 @@ para = try $ do inHtmlBlock <- stateInHtmlBlock <$> getState case inHtmlBlock of Just "div" -> () <$ - lookAhead (htmlTag (~== TagClose "div")) + lookAhead (htmlTag (~== TagClose ("div" :: Text))) _ -> mzero <|> do guardEnabled Ext_fenced_divs divLevel <- stateFencedDivLevel <$> getState @@ -1098,7 +1101,7 @@ plain = fmap B.plain . trimInlinesF <$> inlines1 -- raw html -- -htmlElement :: PandocMonad m => MarkdownParser m String +htmlElement :: PandocMonad m => MarkdownParser m Text htmlElement = rawVerbatimBlock <|> strictHtmlBlock <|> fmap snd (htmlTag isBlockTag) @@ -1132,14 +1135,14 @@ htmlBlock' = try $ do first <- htmlElement skipMany spaceChar optional blanklines - return $ if null first + return $ if T.null first then mempty else return $ B.rawBlock "html" first -strictHtmlBlock :: PandocMonad m => MarkdownParser m String +strictHtmlBlock :: PandocMonad m => MarkdownParser m Text strictHtmlBlock = htmlInBalanced (not . isInlineTag) -rawVerbatimBlock :: PandocMonad m => MarkdownParser m String +rawVerbatimBlock :: PandocMonad m => MarkdownParser m Text rawVerbatimBlock = htmlInBalanced isVerbTag where isVerbTag (TagOpen "pre" _) = True isVerbTag (TagOpen "style" _) = True @@ -1150,13 +1153,13 @@ rawVerbatimBlock = htmlInBalanced isVerbTag rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks) rawTeXBlock = do guardEnabled Ext_raw_tex - result <- (B.rawBlock "tex" . trim . concat <$> - many1 ((++) <$> rawConTeXtEnvironment <*> spnl')) - <|> (B.rawBlock "tex" . trim . concat <$> - many1 ((++) <$> rawLaTeXBlock <*> spnl')) + result <- (B.rawBlock "tex" . trim . T.concat <$> + many1 ((<>) <$> rawConTeXtEnvironment <*> spnl')) + <|> (B.rawBlock "tex" . trim . T.concat <$> + many1 ((<>) <$> rawLaTeXBlock <*> spnl')) return $ case B.toList result of [RawBlock _ cs] - | all (`elem` [' ','\t','\n']) cs -> return mempty + | T.all (`elem` [' ','\t','\n']) cs -> return mempty -- don't create a raw block for suppressed macro defs _ -> return result @@ -1186,7 +1189,7 @@ rawHtmlBlocks = do return result -- remove markdown="1" attribute -stripMarkdownAttribute :: String -> String +stripMarkdownAttribute :: Text -> Text stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s where filterAttrib (TagOpen t as) = TagOpen t [(k,v) | (k,v) <- as, k /= "markdown"] @@ -1211,7 +1214,7 @@ lineBlock = try $ do -- and the length including trailing space. dashedLine :: PandocMonad m => Char - -> ParserT [Char] st m (Int, Int) + -> ParserT Text st m (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many spaceChar @@ -1232,9 +1235,9 @@ simpleTableHeader headless = try $ do dashes <- many1 (dashedLine '-') newline let (lengths, lines') = unzip dashes - let indices = scanl (+) (length initSp) lines' + let indices = scanl (+) (T.length initSp) lines' -- If no header, calculate alignment on basis of first row of text - rawHeads <- fmap (tail . splitStringByIndices (init indices)) $ + rawHeads <- fmap (tail . splitTextByIndices (init indices)) $ if headless then lookAhead anyLine else return rawContent @@ -1250,15 +1253,15 @@ simpleTableHeader headless = try $ do -- Returns an alignment type for a table, based on a list of strings -- (the rows of the column header) and a number (the length of the -- dashed line under the rows. -alignType :: [String] +alignType :: [Text] -> Int -> Alignment alignType [] _ = AlignDefault alignType strLst len = - let nonempties = filter (not . null) $ map trimr strLst + let nonempties = filter (not . T.null) $ map trimr strLst (leftSpace, rightSpace) = - case sortBy (comparing length) nonempties of - (x:_) -> (head x `elem` " \t", length x < len) + case sortBy (comparing T.length) nonempties of + (x:_) -> (T.head x `elem` [' ', 't'], T.length x < len) [] -> (False, False) in case (leftSpace, rightSpace) of (True, False) -> AlignRight @@ -1267,7 +1270,7 @@ alignType strLst len = (False, False) -> AlignDefault -- Parse a table footer - dashed lines followed by blank line. -tableFooter :: PandocMonad m => MarkdownParser m String +tableFooter :: PandocMonad m => MarkdownParser m Text tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines' -- Parse a table separator - dashed line. @@ -1277,12 +1280,12 @@ tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> char '\n' -- Parse a raw line and split it into chunks by indices. rawTableLine :: PandocMonad m => [Int] - -> MarkdownParser m [String] + -> MarkdownParser m [Text] rawTableLine indices = do notFollowedBy' (blanklines' <|> tableFooter) - line <- many1Till anyChar newline + line <- many1TillChar anyChar newline return $ map trim $ tail $ - splitStringByIndices (init indices) line + splitTextByIndices (init indices) line -- Parse a table line and return a list of lists of blocks (columns). tableLine :: PandocMonad m @@ -1297,7 +1300,7 @@ multilineRow :: PandocMonad m -> MarkdownParser m (F [Blocks]) multilineRow indices = do colLines <- many1 (rawTableLine indices) - let cols = map unlines $ transpose colLines + let cols = map T.unlines $ transpose colLines fmap sequence $ mapM (parseFromString' (mconcat <$> many plain)) cols -- Parses a table caption: inlines beginning with 'Table:' @@ -1344,7 +1347,7 @@ multilineTableHeader headless = try $ do dashes <- many1 (dashedLine '-') newline let (lengths, lines') = unzip dashes - let indices = scanl (+) (length initSp) lines' + let indices = scanl (+) (T.length initSp) lines' -- compensate for the fact that intercolumn spaces are -- not included in the last index: let indices' = case reverse indices of @@ -1352,14 +1355,14 @@ multilineTableHeader headless = try $ do (x:xs) -> reverse (x+1:xs) rawHeadsList <- if headless then fmap (map (:[]) . tail . - splitStringByIndices (init indices')) $ lookAhead anyLine + splitTextByIndices (init indices')) $ lookAhead anyLine else return $ transpose $ map - (tail . splitStringByIndices (init indices')) + (tail . splitTextByIndices (init indices')) rawContent let aligns = zipWith alignType rawHeadsList lengths let rawHeads = if headless then replicate (length dashes) "" - else map (unlines . map trim) rawHeadsList + else map (T.unlines . map trim) rawHeadsList heads <- fmap sequence $ mapM ((parseFromString' (mconcat <$> many plain)).trim) rawHeads return (heads, aligns, indices') @@ -1393,7 +1396,7 @@ pipeTable = try $ do lines' <- many pipeTableRow let lines'' = map (take (length aligns) <$>) lines' let maxlength = maximum $ - map (\x -> length . stringify $ runF x def) (heads' : lines'') + map (\x -> T.length . stringify $ runF x def) (heads' : lines'') numColumns <- getOption readerColumns let widths = if maxlength > numColumns then map (\len -> @@ -1430,7 +1433,7 @@ pipeTableCell = return $ B.plain <$> result) <|> return mempty -pipeTableHeaderPart :: PandocMonad m => ParserT [Char] st m (Alignment, Int) +pipeTableHeaderPart :: PandocMonad m => ParserT Text st m (Alignment, Int) pipeTableHeaderPart = try $ do skipMany spaceChar left <- optionMaybe (char ':') @@ -1446,12 +1449,12 @@ pipeTableHeaderPart = try $ do (Just _,Just _) -> AlignCenter, len) -- Succeed only if current line contains a pipe. -scanForPipe :: PandocMonad m => ParserT [Char] st m () +scanForPipe :: PandocMonad m => ParserT Text st m () scanForPipe = do inp <- getInput - case break (\c -> c == '\n' || c == '|') inp of - (_,'|':_) -> return () - _ -> mzero + case T.break (\c -> c == '\n' || c == '|') inp of + (_, T.uncons -> Just ('|', _)) -> return () + _ -> mzero -- | Parse a table using 'headerParser', 'rowParser', -- 'lineParser', and 'footerParser'. Variant of the version in @@ -1561,7 +1564,7 @@ escapedChar = do result <- escapedChar' case result of ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space - _ -> return $ return $ B.str [result] + _ -> return $ return $ B.str $ T.singleton result ltSign :: PandocMonad m => MarkdownParser m (F Inlines) ltSign = do @@ -1574,12 +1577,12 @@ exampleRef :: PandocMonad m => MarkdownParser m (F Inlines) exampleRef = try $ do guardEnabled Ext_example_lists char '@' - lab <- many1 (alphaNum <|> oneOf "-_") + lab <- many1Char (alphaNum <|> oneOf "-_") return $ do st <- askF return $ case M.lookup lab (stateExamples st) of - Just n -> B.str (show n) - Nothing -> B.str ('@':lab) + Just n -> B.str $ tshow n + Nothing -> B.str $ "@" <> lab symbol :: PandocMonad m => MarkdownParser m (F Inlines) symbol = do @@ -1587,16 +1590,16 @@ symbol = do <|> try (do lookAhead $ char '\\' notFollowedBy' (() <$ rawTeXBlock) char '\\') - return $ return $ B.str [result] + return $ return $ B.str $ T.singleton result -- parses inline code, between n `s and n `s code :: PandocMonad m => MarkdownParser m (F Inlines) code = try $ do starts <- many1 (char '`') skipSpaces - result <- (trim . concat) <$> + result <- (trim . T.concat) <$> manyTill (notFollowedBy (inList >> listStart) >> - (many1 (noneOf "`\n") <|> many1 (char '`') <|> + (many1Char (noneOf "`\n") <|> many1Char (char '`') <|> (char '\n' >> notFollowedBy' blankline >> return " "))) (try (skipSpaces >> count (length starts) (char '`') >> notFollowedBy (char '`'))) @@ -1627,10 +1630,10 @@ enclosure c = do guardDisabled Ext_intraword_underscores <|> guard (c == '*') <|> (guard =<< notAfterString) - cs <- many1 (char c) + cs <- many1Char (char c) (return (B.str cs) <>) <$> whitespace <|> - case length cs of + case T.length cs of 3 -> three c 2 -> two c mempty 1 -> one c mempty @@ -1653,7 +1656,7 @@ three c = do (ender c 3 >> updateLastStrPos >> return ((B.strong . B.emph) <$> contents)) <|> (ender c 2 >> updateLastStrPos >> one c (B.strong <$> contents)) <|> (ender c 1 >> updateLastStrPos >> two c (B.emph <$> contents)) - <|> return (return (B.str [c,c,c]) <> contents) + <|> return (return (B.str $ T.pack [c,c,c]) <> contents) -- Parse inlines til you hit two c's, and emit strong. -- If you never do hit two cs, emit ** plus inlines parsed. @@ -1662,7 +1665,7 @@ two c prefix' = do contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline) (ender c 2 >> updateLastStrPos >> return (B.strong <$> (prefix' <> contents))) - <|> return (return (B.str [c,c]) <> (prefix' <> contents)) + <|> return (return (B.str $ T.pack [c,c]) <> (prefix' <> contents)) -- Parse inlines til you hit a c, and emit emph. -- If you never hit a c, emit * plus inlines parsed. @@ -1673,7 +1676,7 @@ one c prefix' = do notFollowedBy (ender c 1) >> two c mempty) ) (ender c 1 >> updateLastStrPos >> return (B.emph <$> (prefix' <> contents))) - <|> return (return (B.str [c]) <> (prefix' <> contents)) + <|> return (return (B.str $ T.singleton c) <> (prefix' <> contents)) strongOrEmph :: PandocMonad m => MarkdownParser m (F Inlines) strongOrEmph = enclosure '*' <|> enclosure '_' @@ -1717,16 +1720,16 @@ whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace" where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak) regsp = skipMany spaceChar >> return B.space -nonEndline :: PandocMonad m => ParserT [Char] st m Char +nonEndline :: PandocMonad m => ParserT Text st m Char nonEndline = satisfy (/='\n') str :: PandocMonad m => MarkdownParser m (F Inlines) str = do - result <- many1 (alphaNum <|> try (char '.' <* notFollowedBy (char '.'))) + result <- many1Char (alphaNum <|> try (char '.' <* notFollowedBy (char '.'))) updateLastStrPos (do guardEnabled Ext_smart abbrevs <- getOption readerAbbreviations - if not (null result) && last result == '.' && result `Set.member` abbrevs + if not (T.null result) && T.last result == '.' && result `Set.member` abbrevs then try (do ils <- whitespace -- ?? lookAhead alphaNum -- replace space after with nonbreaking space @@ -1766,36 +1769,36 @@ endline = try $ do -- -- a reference label for a link -reference :: PandocMonad m => MarkdownParser m (F Inlines, String) +reference :: PandocMonad m => MarkdownParser m (F Inlines, Text) reference = do guardDisabled Ext_footnotes <|> notFollowedBy' (string "[^") guardDisabled Ext_citations <|> notFollowedBy' (string "[@") withRaw $ trimInlinesF <$> inlinesInBalancedBrackets -parenthesizedChars :: PandocMonad m => MarkdownParser m [Char] +parenthesizedChars :: PandocMonad m => MarkdownParser m Text parenthesizedChars = do result <- charsInBalanced '(' ')' litChar - return $ '(' : result ++ ")" + return $ "(" <> result <> ")" -- source for a link, with optional title -source :: PandocMonad m => MarkdownParser m (String, String) +source :: PandocMonad m => MarkdownParser m (Text, Text) source = do char '(' skipSpaces let urlChunk = try parenthesizedChars - <|> (notFollowedBy (oneOf " )") >> count 1 litChar) - <|> try (many1 spaceChar <* notFollowedBy (oneOf "\"')")) - let sourceURL = (unwords . words . concat) <$> many urlChunk + <|> (notFollowedBy (oneOf " )") >> countChar 1 litChar) + <|> try (many1Char spaceChar <* notFollowedBy (oneOf "\"')")) + let sourceURL = (T.unwords . T.words . T.concat) <$> many urlChunk let betweenAngles = try $ - char '<' >> manyTill litChar (char '>') + char '<' >> manyTillChar litChar (char '>') src <- try betweenAngles <|> sourceURL tit <- option "" $ try $ spnl >> linkTitle skipSpaces char ')' return (escapeURI $ trimr src, tit) -linkTitle :: PandocMonad m => MarkdownParser m String +linkTitle :: PandocMonad m => MarkdownParser m Text linkTitle = quotedTitle '"' <|> quotedTitle '\'' link :: PandocMonad m => MarkdownParser m (F Inlines) @@ -1823,13 +1826,13 @@ isSmallCaps :: Attr -> Bool isSmallCaps ("",["smallcaps"],[]) = True isSmallCaps ("",[],kvs) = case lookup "style" kvs of - Just s -> map toLower (filter (`notElem` " \t;") s) == + Just s -> T.toLower (T.filter (`notElem` [' ', '\t', ';']) s) == "font-variant:small-caps" Nothing -> False isSmallCaps _ = False regLink :: PandocMonad m - => (Attr -> String -> String -> Inlines -> Inlines) + => (Attr -> Text -> Text -> Inlines -> Inlines) -> F Inlines -> MarkdownParser m (F Inlines) regLink constructor lab = try $ do @@ -1840,8 +1843,8 @@ regLink constructor lab = try $ do -- a link like [this][ref] or [this][] or [this] referenceLink :: PandocMonad m - => (Attr -> String -> String -> Inlines -> Inlines) - -> (F Inlines, String) + => (Attr -> Text -> Text -> Inlines -> Inlines) + -> (F Inlines, Text) -> MarkdownParser m (F Inlines) referenceLink constructor (lab, raw) = do sp <- (True <$ lookAhead (char ' ')) <|> return False @@ -1863,7 +1866,7 @@ referenceLink constructor (lab, raw) = do parsedRaw' <- parsedRaw fallback' <- fallback return $ B.str "[" <> fallback' <> B.str "]" <> - (if sp && not (null raw) then B.space else mempty) <> + (if sp && not (T.null raw) then B.space else mempty) <> parsedRaw' return $ do keys <- asksF stateKeys @@ -1878,19 +1881,19 @@ referenceLink constructor (lab, raw) = do else makeFallback Just ((src,tit), attr) -> constructor attr src tit <$> lab -dropBrackets :: String -> String -dropBrackets = reverse . dropRB . reverse . dropLB - where dropRB (']':xs) = xs - dropRB xs = xs - dropLB ('[':xs) = xs - dropLB xs = xs +dropBrackets :: Text -> Text +dropBrackets = dropRB . dropLB + where dropRB (T.unsnoc -> Just (xs,']')) = xs + dropRB xs = xs + dropLB (T.uncons -> Just ('[',xs)) = xs + dropLB xs = xs bareURL :: PandocMonad m => MarkdownParser m (F Inlines) bareURL = try $ do guardEnabled Ext_autolink_bare_uris getState >>= guard . stateAllowLinks (cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress) - notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a") + notFollowedBy $ try $ spaces >> htmlTag (~== TagClose ("a" :: Text)) return $ return $ B.linkWith ("",[cls],[]) src "" (B.str orig) autoLink :: PandocMonad m => MarkdownParser m (F Inlines) @@ -1902,19 +1905,20 @@ autoLink = try $ do -- is finished, because the uri parser tries to avoid parsing -- final punctuation. for example: in `<http://hi---there>`, -- the URI parser will stop before the dashes. - extra <- fromEntities <$> manyTill nonspaceChar (char '>') + extra <- fromEntities <$> manyTillChar nonspaceChar (char '>') attr <- option ("", [cls], []) $ try $ guardEnabled Ext_link_attributes >> attributes - return $ return $ B.linkWith attr (src ++ escapeURI extra) "" - (B.str $ orig ++ extra) + return $ return $ B.linkWith attr (src <> escapeURI extra) "" + (B.str $ orig <> extra) image :: PandocMonad m => MarkdownParser m (F Inlines) image = try $ do char '!' (lab,raw) <- reference defaultExt <- getOption readerDefaultImageExtension - let constructor attr' src = case takeExtension src of - "" -> B.imageWith attr' (addExtension src defaultExt) + let constructor attr' src = case takeExtension (T.unpack src) of + "" -> B.imageWith attr' (T.pack $ addExtension (T.unpack src) + $ T.unpack defaultExt) _ -> B.imageWith attr' src regLink constructor lab <|> referenceLink constructor (lab,raw) @@ -1926,7 +1930,7 @@ note = try $ do return $ do notes <- asksF stateNotes' case M.lookup ref notes of - Nothing -> return $ B.str $ "[^" ++ ref ++ "]" + Nothing -> return $ B.str $ "[^" <> ref <> "]" Just (_pos, contents) -> do st <- askF -- process the note in a context that doesn't resolve @@ -1949,29 +1953,29 @@ rawLaTeXInline' = try $ do s <- rawLaTeXInline return $ return $ B.rawInline "tex" s -- "tex" because it might be context -rawConTeXtEnvironment :: PandocMonad m => ParserT [Char] st m String +rawConTeXtEnvironment :: PandocMonad m => ParserT Text st m Text rawConTeXtEnvironment = try $ do string "\\start" completion <- inBrackets (letter <|> digit <|> spaceChar) - <|> many1 letter - contents <- manyTill (rawConTeXtEnvironment <|> count 1 anyChar) - (try $ string "\\stop" >> string completion) - return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion + <|> many1Char letter + contents <- manyTill (rawConTeXtEnvironment <|> countChar 1 anyChar) + (try $ string "\\stop" >> textStr completion) + return $ "\\start" <> completion <> T.concat contents <> "\\stop" <> completion -inBrackets :: PandocMonad m => ParserT [Char] st m Char -> ParserT [Char] st m String +inBrackets :: PandocMonad m => ParserT Text st m Char -> ParserT Text st m Text inBrackets parser = do char '[' - contents <- many parser + contents <- manyChar parser char ']' - return $ "[" ++ contents ++ "]" + return $ "[" <> contents <> "]" spanHtml :: PandocMonad m => MarkdownParser m (F Inlines) spanHtml = try $ do guardEnabled Ext_native_spans - (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" []) - contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span")) + (TagOpen _ attrs, _) <- htmlTag (~== TagOpen ("span" :: Text) []) + contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose ("span" :: Text))) let ident = fromMaybe "" $ lookup "id" attrs - let classes = maybe [] words $ lookup "class" attrs + let classes = maybe [] T.words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] return $ if isSmallCaps (ident, classes, keyvals) then B.smallcaps <$> contents @@ -1980,20 +1984,20 @@ spanHtml = try $ do divHtml :: PandocMonad m => MarkdownParser m (F Blocks) divHtml = try $ do guardEnabled Ext_native_divs - (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" []) + (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen ("div" :: Text) []) -- we set stateInHtmlBlock so that closing tags that can be either block or -- inline will not be parsed as inline tags oldInHtmlBlock <- stateInHtmlBlock <$> getState updateState $ \st -> st{ stateInHtmlBlock = Just "div" } bls <- option "" (blankline >> option "" blanklines) contents <- mconcat <$> - many (notFollowedBy' (htmlTag (~== TagClose "div")) >> block) - closed <- option False (True <$ htmlTag (~== TagClose "div")) + many (notFollowedBy' (htmlTag (~== TagClose ("div" :: Text))) >> block) + closed <- option False (True <$ htmlTag (~== TagClose ("div" :: Text))) if closed then do updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock } let ident = fromMaybe "" $ lookup "id" attrs - let classes = maybe [] words $ lookup "class" attrs + let classes = maybe [] T.words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] return $ B.divWith (ident, classes, keyvals) <$> contents else -- avoid backtracing @@ -2005,7 +2009,7 @@ divFenced = try $ do string ":::" skipMany (char ':') skipMany spaceChar - attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1 nonspaceChar) + attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1Char nonspaceChar) skipMany spaceChar skipMany (char ':') blankline @@ -2047,7 +2051,7 @@ emoji :: PandocMonad m => MarkdownParser m (F Inlines) emoji = try $ do guardEnabled Ext_emoji char ':' - emojikey <- many1 (oneOf emojiChars) + emojikey <- many1Char (oneOf emojiChars) char ':' case emojiToInline emojikey of Just i -> return (return $ B.singleton i) @@ -2077,14 +2081,14 @@ textualCite = try $ do mbrest <- option Nothing $ try $ spnl >> Just <$> withRaw normalCite case mbrest of Just (rest, raw) -> - return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw) . (first:)) + return $ (flip B.cite (B.text $ "@" <> key <> " " <> raw) . (first:)) <$> rest Nothing -> (do (cs, raw) <- withRaw $ bareloc first - let (spaces',raw') = span isSpace raw - spc | null spaces' = mempty - | otherwise = B.space + let (spaces',raw') = T.span isSpace raw + spc | T.null spaces' = mempty + | otherwise = B.space lab <- parseFromString' inlines $ dropBrackets raw' fallback <- referenceLink B.linkWith (lab,raw') return $ do @@ -2092,12 +2096,12 @@ textualCite = try $ do cs' <- cs return $ case B.toList fallback' of - Link{}:_ -> B.cite [first] (B.str $ '@':key) <> spc <> fallback' - _ -> B.cite cs' (B.text $ '@':key ++ " " ++ raw)) + Link{}:_ -> B.cite [first] (B.str $ "@" <> key) <> spc <> fallback' + _ -> B.cite cs' (B.text $ "@" <> key <> " " <> raw)) <|> return (do st <- askF return $ case M.lookup key (stateExamples st) of - Just n -> B.str (show n) - _ -> B.cite [first] $ B.str $ '@':key) + Just n -> B.str $ tshow n + _ -> B.cite [first] $ B.str $ "@" <> key) bareloc :: PandocMonad m => Citation -> MarkdownParser m (F [Citation]) bareloc c = try $ do diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 35bb8e3eb..07240e951 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE RelaxedPolyRec #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RelaxedPolyRec #-} -- RelaxedPolyRec needed for inlinesBetween on GHC < 7 {- | Module : Text.Pandoc.Readers.MediaWiki @@ -24,11 +25,12 @@ import Control.Monad import Control.Monad.Except (throwError) import Data.Char (isDigit, isSpace) import qualified Data.Foldable as F -import Data.List (intercalate, intersperse, isPrefixOf) +import Data.List (intersperse) import Data.Maybe (fromMaybe, maybeToList) import Data.Sequence (ViewL (..), viewl, (<|)) import qualified Data.Set as Set -import Data.Text (Text, unpack) +import Data.Text (Text) +import qualified Data.Text as T import Text.HTML.TagSoup import Text.Pandoc.Builder (Blocks, Inlines, trimInlines) import qualified Text.Pandoc.Builder as B @@ -39,7 +41,7 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (nested) import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag) import Text.Pandoc.Shared (crFilter, safeRead, stringify, stripTrailingNewlines, - trim) + trim, splitTextBy, tshow) import Text.Pandoc.Walk (walk) import Text.Pandoc.XML (fromEntities) @@ -57,7 +59,7 @@ readMediaWiki opts s = do , mwLogMessages = [] , mwInTT = False } - (unpack (crFilter s) ++ "\n") + (crFilter s <> "\n") case parsed of Right result -> return result Left e -> throwError e @@ -66,12 +68,12 @@ data MWState = MWState { mwOptions :: ReaderOptions , mwMaxNestingLevel :: Int , mwNextLinkNumber :: Int , mwCategoryLinks :: [Inlines] - , mwIdentifierList :: Set.Set String + , mwIdentifierList :: Set.Set Text , mwLogMessages :: [LogMessage] , mwInTT :: Bool } -type MWParser m = ParserT [Char] MWState m +type MWParser m = ParserT Text MWState m instance HasReaderOptions MWState where extractReaderOptions = mwOptions @@ -105,58 +107,58 @@ specialChars = "'[]<=&*{}|\":\\" spaceChars :: [Char] spaceChars = " \n\t" -sym :: PandocMonad m => String -> MWParser m () -sym s = () <$ try (string s) +sym :: PandocMonad m => Text -> MWParser m () +sym s = () <$ try (string $ T.unpack s) -newBlockTags :: [String] +newBlockTags :: [Text] newBlockTags = ["haskell","syntaxhighlight","source","gallery","references"] -isBlockTag' :: Tag String -> Bool +isBlockTag' :: Tag Text -> Bool isBlockTag' tag@(TagOpen t _) = (isBlockTag tag || t `elem` newBlockTags) && t `notElem` eitherBlockOrInline isBlockTag' tag@(TagClose t) = (isBlockTag tag || t `elem` newBlockTags) && t `notElem` eitherBlockOrInline isBlockTag' tag = isBlockTag tag -isInlineTag' :: Tag String -> Bool +isInlineTag' :: Tag Text -> Bool isInlineTag' (TagComment _) = True isInlineTag' t = not (isBlockTag' t) -eitherBlockOrInline :: [String] +eitherBlockOrInline :: [Text] eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins", "map", "area", "object"] htmlComment :: PandocMonad m => MWParser m () htmlComment = () <$ htmlTag isCommentTag -inlinesInTags :: PandocMonad m => String -> MWParser m Inlines +inlinesInTags :: PandocMonad m => Text -> MWParser m Inlines inlinesInTags tag = try $ do (_,raw) <- htmlTag (~== TagOpen tag []) - if '/' `elem` raw -- self-closing tag + if T.any (== '/') raw -- self-closing tag then return mempty else trimInlines . mconcat <$> manyTill inline (htmlTag (~== TagClose tag)) -blocksInTags :: PandocMonad m => String -> MWParser m Blocks +blocksInTags :: PandocMonad m => Text -> MWParser m Blocks blocksInTags tag = try $ do (_,raw) <- htmlTag (~== TagOpen tag []) let closer = if tag == "li" - then htmlTag (~== TagClose "li") + then htmlTag (~== TagClose ("li" :: Text)) <|> lookAhead ( - htmlTag (~== TagOpen "li" []) - <|> htmlTag (~== TagClose "ol") - <|> htmlTag (~== TagClose "ul")) + htmlTag (~== TagOpen ("li" :: Text) []) + <|> htmlTag (~== TagClose ("ol" :: Text)) + <|> htmlTag (~== TagClose ("ul" :: Text))) else htmlTag (~== TagClose tag) - if '/' `elem` raw -- self-closing tag + if T.any (== '/') raw -- self-closing tag then return mempty else mconcat <$> manyTill block closer -charsInTags :: PandocMonad m => String -> MWParser m [Char] -charsInTags tag = try $ do +textInTags :: PandocMonad m => Text -> MWParser m Text +textInTags tag = try $ do (_,raw) <- htmlTag (~== TagOpen tag []) - if '/' `elem` raw -- self-closing tag + if T.any (== '/') raw -- self-closing tag then return "" - else manyTill anyChar (htmlTag (~== TagClose tag)) + else T.pack <$> manyTill anyChar (htmlTag (~== TagClose tag)) -- -- main parser @@ -192,7 +194,7 @@ block = do <|> blockTag <|> (B.rawBlock "mediawiki" <$> template) <|> para - trace (take 60 $ show $ B.toList res) + trace (T.take 60 $ tshow $ B.toList res) return res para :: PandocMonad m => MWParser m Blocks @@ -234,16 +236,16 @@ table = do else (replicate cols mempty, hdr:rows') return $ B.table caption cellspecs headers rows -parseAttrs :: PandocMonad m => MWParser m [(String,String)] +parseAttrs :: PandocMonad m => MWParser m [(Text,Text)] parseAttrs = many1 parseAttr -parseAttr :: PandocMonad m => MWParser m (String, String) +parseAttr :: PandocMonad m => MWParser m (Text, Text) parseAttr = try $ do skipMany spaceChar - k <- many1 letter + k <- many1Char letter char '=' - v <- (char '"' >> many1Till (satisfy (/='\n')) (char '"')) - <|> many1 (satisfy $ \c -> not (isSpace c) && c /= '|') + v <- (char '"' >> many1TillChar (satisfy (/='\n')) (char '"')) + <|> many1Char (satisfy $ \c -> not (isSpace c) && c /= '|') return (k,v) tableStart :: PandocMonad m => MWParser m () @@ -293,8 +295,8 @@ tableCell = try $ do notFollowedBy (char '|') skipMany spaceChar pos' <- getPosition - ls <- concat <$> many (notFollowedBy (cellsep <|> rowsep <|> tableEnd) *> - ((snd <$> withRaw table) <|> count 1 anyChar)) + ls <- T.concat <$> many (notFollowedBy (cellsep <|> rowsep <|> tableEnd) *> + ((snd <$> withRaw table) <|> countChar 1 anyChar)) bs <- parseFromString (do setPosition pos' mconcat <$> many block) ls let align = case lookup "align" attrs of @@ -307,48 +309,49 @@ tableCell = try $ do Nothing -> 0.0 return ((align, width), bs) -parseWidth :: String -> Maybe Double +parseWidth :: Text -> Maybe Double parseWidth s = - case reverse s of - ('%':ds) | all isDigit ds -> safeRead ('0':'.':reverse ds) - _ -> Nothing + case T.unsnoc s of + Just (ds, '%') | T.all isDigit ds -> safeRead $ "0." <> ds + _ -> Nothing -template :: PandocMonad m => MWParser m String +template :: PandocMonad m => MWParser m Text template = try $ do string "{{" notFollowedBy (char '{') lookAhead $ letter <|> digit <|> char ':' - let chunk = template <|> variable <|> many1 (noneOf "{}") <|> count 1 anyChar + let chunk = template <|> variable <|> many1Char (noneOf "{}") <|> countChar 1 anyChar contents <- manyTill chunk (try $ string "}}") - return $ "{{" ++ concat contents ++ "}}" + return $ "{{" <> T.concat contents <> "}}" blockTag :: PandocMonad m => MWParser m Blocks blockTag = do (tag, _) <- lookAhead $ htmlTag isBlockTag' case tag of TagOpen "blockquote" _ -> B.blockQuote <$> blocksInTags "blockquote" - TagOpen "pre" _ -> B.codeBlock . trimCode <$> charsInTags "pre" + TagOpen "pre" _ -> B.codeBlock . trimCode <$> textInTags "pre" TagOpen "syntaxhighlight" attrs -> syntaxhighlight "syntaxhighlight" attrs TagOpen "source" attrs -> syntaxhighlight "source" attrs TagOpen "haskell" _ -> B.codeBlockWith ("",["haskell"],[]) . trimCode <$> - charsInTags "haskell" + textInTags "haskell" TagOpen "gallery" _ -> blocksInTags "gallery" TagOpen "p" _ -> mempty <$ htmlTag (~== tag) TagClose "p" -> mempty <$ htmlTag (~== tag) _ -> B.rawBlock "html" . snd <$> htmlTag (~== tag) -trimCode :: String -> String -trimCode ('\n':xs) = stripTrailingNewlines xs -trimCode xs = stripTrailingNewlines xs +trimCode :: Text -> Text +trimCode t = case T.uncons t of + Just ('\n', xs) -> stripTrailingNewlines xs + _ -> stripTrailingNewlines t -syntaxhighlight :: PandocMonad m => String -> [Attribute String] -> MWParser m Blocks +syntaxhighlight :: PandocMonad m => Text -> [Attribute Text] -> MWParser m Blocks syntaxhighlight tag attrs = try $ do let mblang = lookup "lang" attrs let mbstart = lookup "start" attrs let mbline = lookup "line" attrs let classes = maybeToList mblang ++ maybe [] (const ["numberLines"]) mbline let kvs = maybe [] (\x -> [("startFrom",x)]) mbstart - contents <- charsInTags tag + contents <- textInTags tag return $ B.codeBlockWith ("",classes,kvs) $ trimCode contents hrule :: PandocMonad m => MWParser m Blocks @@ -362,17 +365,17 @@ preformatted = try $ do guardColumnOne char ' ' let endline' = B.linebreak <$ try (newline <* char ' ') - let whitespace' = B.str <$> many1 ('\160' <$ spaceChar) + let whitespace' = B.str <$> many1Char ('\160' <$ spaceChar) let spToNbsp ' ' = '\160' spToNbsp x = x let nowiki' = mconcat . intersperse B.linebreak . map B.str . - lines . fromEntities . map spToNbsp <$> try - (htmlTag (~== TagOpen "nowiki" []) *> - manyTill anyChar (htmlTag (~== TagClose "nowiki"))) + T.lines . fromEntities . T.map spToNbsp <$> try + (htmlTag (~== TagOpen ("nowiki" :: Text) []) *> + manyTillChar anyChar (htmlTag (~== TagClose ("nowiki" :: Text)))) let inline' = whitespace' <|> endline' <|> nowiki' <|> try (notFollowedBy newline *> inline) contents <- mconcat <$> many1 inline' - let spacesStr (Str xs) = all isSpace xs + let spacesStr (Str xs) = T.all isSpace xs spacesStr _ = False if F.all spacesStr contents then return mempty @@ -385,7 +388,7 @@ encode = B.fromList . normalizeCode . B.toList . walk strToCode strToCode x = x normalizeCode [] = [] normalizeCode (Code a1 x : Code a2 y : zs) | a1 == a2 = - normalizeCode $ Code a1 (x ++ y) : zs + normalizeCode $ Code a1 (x <> y) : zs normalizeCode (x:xs) = x : normalizeCode xs header :: PandocMonad m => MWParser m Blocks @@ -400,22 +403,22 @@ header = try $ do -- See #4731: modifyIdentifier :: Attr -> Attr modifyIdentifier (ident,cl,kv) = (ident',cl,kv) - where ident' = map (\c -> if c == '-' then '_' else c) ident + where ident' = T.map (\c -> if c == '-' then '_' else c) ident bulletList :: PandocMonad m => MWParser m Blocks bulletList = B.bulletList <$> ( many1 (listItem '*') - <|> (htmlTag (~== TagOpen "ul" []) *> spaces *> many (listItem '*' <|> li) <* - optional (htmlTag (~== TagClose "ul"))) ) + <|> (htmlTag (~== TagOpen ("ul" :: Text) []) *> spaces *> many (listItem '*' <|> li) <* + optional (htmlTag (~== TagClose ("ul" :: Text)))) ) orderedList :: PandocMonad m => MWParser m Blocks orderedList = (B.orderedList <$> many1 (listItem '#')) <|> try - (do (tag,_) <- htmlTag (~== TagOpen "ol" []) + (do (tag,_) <- htmlTag (~== TagOpen ("ol" :: Text) []) spaces items <- many (listItem '#' <|> li) - optional (htmlTag (~== TagClose "ol")) + optional (htmlTag (~== TagClose ("ol" :: Text))) let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items) @@ -452,7 +455,7 @@ anyListStart :: PandocMonad m => MWParser m Char anyListStart = guardColumnOne >> oneOf "*#:;" li :: PandocMonad m => MWParser m Blocks -li = lookAhead (htmlTag (~== TagOpen "li" [])) *> +li = lookAhead (htmlTag (~== TagOpen ("li" :: Text) [])) *> (firstParaToPlain <$> blocksInTags "li") <* spaces listItem :: PandocMonad m => Char -> MWParser m Blocks @@ -464,13 +467,13 @@ listItem c = try $ do else do skipMany spaceChar pos' <- getPosition - first <- concat <$> manyTill listChunk newline + first <- T.concat <$> manyTill listChunk newline rest <- many (try $ string extras *> lookAhead listStartChar *> - (concat <$> manyTill listChunk newline)) + (T.concat <$> manyTill listChunk newline)) contents <- parseFromString (do setPosition pos' many1 $ listItem' c) - (unlines (first : rest)) + (T.unlines (first : rest)) case c of '*' -> return $ B.bulletList contents '#' -> return $ B.orderedList contents @@ -484,20 +487,20 @@ listItem c = try $ do -- }} -- * next list item -- which seems to be valid mediawiki. -listChunk :: PandocMonad m => MWParser m String -listChunk = template <|> count 1 anyChar +listChunk :: PandocMonad m => MWParser m Text +listChunk = template <|> countChar 1 anyChar listItem' :: PandocMonad m => Char -> MWParser m Blocks listItem' c = try $ do listStart c skipMany spaceChar pos' <- getPosition - first <- concat <$> manyTill listChunk newline + first <- T.concat <$> manyTill listChunk newline rest <- many (try $ char c *> lookAhead listStartChar *> - (concat <$> manyTill listChunk newline)) + (T.concat <$> manyTill listChunk newline)) parseFromString (do setPosition pos' firstParaToPlain . mconcat <$> many1 block) - $ unlines $ first : rest + $ T.unlines $ first : rest firstParaToPlain :: Blocks -> Blocks firstParaToPlain contents = @@ -528,23 +531,23 @@ inline = whitespace <|> special str :: PandocMonad m => MWParser m Inlines -str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars) +str = B.str <$> many1Char (noneOf $ specialChars ++ spaceChars) math :: PandocMonad m => MWParser m Inlines -math = (B.displayMath . trim <$> try (many1 (char ':') >> charsInTags "math")) - <|> (B.math . trim <$> charsInTags "math") - <|> (B.displayMath . trim <$> try (dmStart *> manyTill anyChar dmEnd)) - <|> (B.math . trim <$> try (mStart *> manyTill (satisfy (/='\n')) mEnd)) +math = (B.displayMath . trim <$> try (many1 (char ':') >> textInTags "math")) + <|> (B.math . trim <$> textInTags "math") + <|> (B.displayMath . trim <$> try (dmStart *> manyTillChar anyChar dmEnd)) + <|> (B.math . trim <$> try (mStart *> manyTillChar (satisfy (/='\n')) mEnd)) where dmStart = string "\\[" dmEnd = try (string "\\]") mStart = string "\\(" mEnd = try (string "\\)") -variable :: PandocMonad m => MWParser m String +variable :: PandocMonad m => MWParser m Text variable = try $ do string "{{{" - contents <- manyTill anyChar (try $ string "}}}") - return $ "{{{" ++ contents ++ "}}}" + contents <- manyTillChar anyChar (try $ string "}}}") + return $ "{{{" <> contents <> "}}}" inlineTag :: PandocMonad m => MWParser m Inlines inlineTag = do @@ -553,11 +556,11 @@ inlineTag = do TagOpen "ref" _ -> B.note . B.plain <$> inlinesInTags "ref" TagOpen "nowiki" _ -> try $ do (_,raw) <- htmlTag (~== tag) - if '/' `elem` raw + if T.any (== '/') raw then return mempty else B.text . fromEntities <$> - manyTill anyChar (htmlTag (~== TagClose "nowiki")) - TagOpen "br" _ -> B.linebreak <$ (htmlTag (~== TagOpen "br" []) -- will get /> too + manyTillChar anyChar (htmlTag (~== TagClose ("nowiki" :: Text))) + TagOpen "br" _ -> B.linebreak <$ (htmlTag (~== TagOpen ("br" :: Text) []) -- will get /> too *> optional blankline) TagOpen "strike" _ -> B.strikeout <$> inlinesInTags "strike" TagOpen "del" _ -> B.strikeout <$> inlinesInTags "del" @@ -570,12 +573,12 @@ inlineTag = do result <- encode <$> inlinesInTags "tt" updateState $ \st -> st{ mwInTT = inTT } return result - TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask" + TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> textInTags "hask" _ -> B.rawInline "html" . snd <$> htmlTag (~== tag) special :: PandocMonad m => MWParser m Inlines -special = B.str <$> count 1 (notFollowedBy' (htmlTag isBlockTag') *> - oneOf specialChars) +special = B.str <$> countChar 1 (notFollowedBy' (htmlTag isBlockTag') *> + oneOf specialChars) inlineHtml :: PandocMonad m => MWParser m Inlines inlineHtml = B.rawInline "html" . snd <$> htmlTag isInlineTag' @@ -594,7 +597,7 @@ endline = () <$ try (newline <* notFollowedBy anyListStart) imageIdentifiers :: PandocMonad m => [MWParser m ()] -imageIdentifiers = [sym (identifier ++ ":") | identifier <- identifiers] +imageIdentifiers = [sym (identifier <> ":") | identifier <- identifiers] where identifiers = ["File", "Image", "Archivo", "Datei", "Fichier", "Bild"] @@ -602,9 +605,9 @@ image :: PandocMonad m => MWParser m Inlines image = try $ do sym "[[" choice imageIdentifiers - fname <- addUnderscores <$> many1 (noneOf "|]") + fname <- addUnderscores <$> many1Char (noneOf "|]") _ <- many imageOption - dims <- try (char '|' *> sepBy (many digit) (char 'x') <* string "px") + dims <- try (char '|' *> sepBy (manyChar digit) (char 'x') <* string "px") <|> return [] _ <- many imageOption let kvs = case dims of @@ -614,9 +617,9 @@ image = try $ do let attr = ("", [], kvs) caption <- (B.str fname <$ sym "]]") <|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]"))) - return $ B.imageWith attr fname ("fig:" ++ stringify caption) caption + return $ B.imageWith attr fname ("fig:" <> stringify caption) caption -imageOption :: PandocMonad m => MWParser m String +imageOption :: PandocMonad m => MWParser m Text imageOption = try $ char '|' *> opt where opt = try (oneOfStrings [ "border", "thumbnail", "frameless" @@ -624,30 +627,27 @@ imageOption = try $ char '|' *> opt , "center", "none", "baseline", "sub" , "super", "top", "text-top", "middle" , "bottom", "text-bottom" ]) - <|> try (string "frame") + <|> try (textStr "frame") <|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]")) -collapseUnderscores :: String -> String -collapseUnderscores [] = [] -collapseUnderscores ('_':'_':xs) = collapseUnderscores ('_':xs) -collapseUnderscores (x:xs) = x : collapseUnderscores xs - -addUnderscores :: String -> String -addUnderscores = collapseUnderscores . intercalate "_" . words +addUnderscores :: Text -> Text +addUnderscores = T.intercalate "_" . splitTextBy sep + where + sep c = isSpace c || c == '_' internalLink :: PandocMonad m => MWParser m Inlines internalLink = try $ do sym "[[" - pagename <- unwords . words <$> many (noneOf "|]") + pagename <- T.unwords . T.words <$> manyChar (noneOf "|]") label <- option (B.text pagename) $ char '|' *> ( (mconcat <$> many1 (notFollowedBy (char ']') *> inline)) -- the "pipe trick" -- [[Help:Contents|] -> "Contents" - <|> return (B.text $ drop 1 $ dropWhile (/=':') pagename) ) + <|> return (B.text $ T.drop 1 $ T.dropWhile (/=':') pagename) ) sym "]]" - linktrail <- B.text <$> many letter + linktrail <- B.text <$> manyChar letter let link = B.link (addUnderscores pagename) "wikilink" (label <> linktrail) - if "Category:" `isPrefixOf` pagename + if "Category:" `T.isPrefixOf` pagename then do updateState $ \st -> st{ mwCategoryLinks = link : mwCategoryLinks st } return mempty @@ -662,7 +662,7 @@ externalLink = try $ do <|> do char ']' num <- mwNextLinkNumber <$> getState updateState $ \st -> st{ mwNextLinkNumber = num + 1 } - return $ B.str $ show num + return $ B.str $ tshow num return $ B.link src "" lab url :: PandocMonad m => MWParser m Inlines diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index b8cbe2f26..4ade61294 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -1,6 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE TupleSections #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Muse Copyright : Copyright (C) 2017-2019 Alexander Krotov @@ -24,12 +25,12 @@ import Control.Monad.Reader import Control.Monad.Except (throwError) import Data.Bifunctor import Data.Default -import Data.List (intercalate, transpose, uncons) -import Data.List.Split (splitOn) +import Data.List (transpose, uncons) import qualified Data.Map as M import qualified Data.Set as Set import Data.Maybe (fromMaybe, isNothing, maybeToList) -import Data.Text (Text, unpack) +import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Builder (Blocks, Inlines) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad (..)) @@ -38,7 +39,7 @@ import Text.Pandoc.Error (PandocError (PandocParsecError)) import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (F) -import Text.Pandoc.Shared (crFilter, trimr, underlineSpan) +import Text.Pandoc.Shared (crFilter, trimr, underlineSpan, tshow) -- | Read Muse from an input string and return a Pandoc document. readMuse :: PandocMonad m @@ -49,18 +50,18 @@ readMuse opts s = do let input = crFilter s res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } "source" input case res of - Left e -> throwError $ PandocParsecError (unpack input) e + Left e -> throwError $ PandocParsecError input e Right d -> return d type F = Future MuseState data MuseState = MuseState { museMeta :: F Meta -- ^ Document metadata , museOptions :: ReaderOptions - , museIdentifierList :: Set.Set String + , museIdentifierList :: Set.Set Text , museLastSpacePos :: Maybe SourcePos -- ^ Position after last space or newline parsed , museLastStrPos :: Maybe SourcePos -- ^ Position after last str parsed , museLogMessages :: [LogMessage] - , museNotes :: M.Map String (SourcePos, F Blocks) + , museNotes :: M.Map Text (SourcePos, F Blocks) } instance Default MuseState where @@ -116,22 +117,27 @@ parseMuse = do -- * Utility functions -- | Trim up to one newline from the beginning of the string. -lchop :: String -> String -lchop ('\n':xs) = xs -lchop s = s +lchop :: Text -> Text +lchop s = case T.uncons s of + Just ('\n', xs) -> xs + _ -> s -- | Trim up to one newline from the end of the string. -rchop :: String -> String -rchop = reverse . lchop . reverse +rchop :: Text -> Text +rchop s = case T.unsnoc s of + Just (xs, '\n') -> xs + _ -> s -unindent :: String -> String -unindent = rchop . intercalate "\n" . dropSpacePrefix . splitOn "\n" . lchop +unindent :: Text -> Text +unindent = rchop . T.intercalate "\n" . dropSpacePrefix . T.splitOn "\n" . lchop -dropSpacePrefix :: [String] -> [String] -dropSpacePrefix lns = drop maxIndent <$> lns +dropSpacePrefix :: [Text] -> [Text] +dropSpacePrefix lns = T.drop maxIndent <$> lns where isSpaceChar c = c == ' ' || c == '\t' - maxIndent = length $ takeWhile (isSpaceChar . head) $ takeWhile same $ transpose lns - same = and . (zipWith (==) <*> drop 1) + maxIndent = length $ takeWhile (isSpaceChar . T.head) $ takeWhile same $ T.transpose lns + same t = case T.uncons t of + Just (c, cs) -> T.all (== c) cs + Nothing -> True atStart :: PandocMonad m => MuseParser m () atStart = do @@ -160,29 +166,29 @@ getIndent = subtract 1 . sourceColumn <$ many spaceChar <*> getPosition -- ** HTML parsers -openTag :: PandocMonad m => String -> MuseParser m [(String, String)] +openTag :: PandocMonad m => Text -> MuseParser m [(Text, Text)] openTag tag = try $ - char '<' *> string tag *> manyTill attr (char '>') + char '<' *> textStr tag *> manyTill attr (char '>') where attr = try $ (,) <$ many1 spaceChar - <*> many1 (noneOf "=\n") + <*> many1Char (noneOf "=\n") <* string "=\"" - <*> manyTill (noneOf "\"") (char '"') + <*> manyTillChar (noneOf "\"") (char '"') -closeTag :: PandocMonad m => String -> MuseParser m () -closeTag tag = try $ string "</" *> string tag *> void (char '>') +closeTag :: PandocMonad m => Text -> MuseParser m () +closeTag tag = try $ string "</" *> textStr tag *> void (char '>') -- | Convert HTML attributes to Pandoc 'Attr' -htmlAttrToPandoc :: [(String, String)] -> Attr +htmlAttrToPandoc :: [(Text, Text)] -> Attr htmlAttrToPandoc attrs = (ident, classes, keyvals) where ident = fromMaybe "" $ lookup "id" attrs - classes = maybe [] words $ lookup "class" attrs + classes = maybe [] T.words $ lookup "class" attrs keyvals = [(k,v) | (k,v) <- attrs, k /= "id", k /= "class"] parseHtmlContent :: PandocMonad m - => String -- ^ Tag name + => Text -- ^ Tag name -> MuseParser m (Attr, F Blocks) parseHtmlContent tag = try $ getIndent >>= \indent -> (,) <$> fmap htmlAttrToPandoc (openTag tag) @@ -193,16 +199,16 @@ parseHtmlContent tag = try $ getIndent >>= \indent -> (,) -- ** Directive parsers -- While not documented, Emacs Muse allows "-" in directive name -parseDirectiveKey :: PandocMonad m => MuseParser m String -parseDirectiveKey = char '#' *> many (letter <|> char '-') +parseDirectiveKey :: PandocMonad m => MuseParser m Text +parseDirectiveKey = char '#' *> manyChar (letter <|> char '-') -parseEmacsDirective :: PandocMonad m => MuseParser m (String, F Inlines) +parseEmacsDirective :: PandocMonad m => MuseParser m (Text, F Inlines) parseEmacsDirective = (,) <$> parseDirectiveKey <* spaceChar <*> (trimInlinesF . mconcat <$> manyTill inline' eol) -parseAmuseDirective :: PandocMonad m => MuseParser m (String, F Inlines) +parseAmuseDirective :: PandocMonad m => MuseParser m (Text, F Inlines) parseAmuseDirective = (,) <$> parseDirectiveKey <* many1 spaceChar @@ -289,7 +295,7 @@ listItemContentsUntil col pre end = p parseBlock :: PandocMonad m => MuseParser m (F Blocks) parseBlock = do res <- blockElements <|> para - trace (take 60 $ show $ B.toList $ runF res def) + trace (T.take 60 $ tshow $ B.toList $ runF res def) return res where para = fst <$> paraUntil (try (eof <|> void (lookAhead blockElements))) @@ -337,7 +343,7 @@ pagebreak = try $ pure (B.divWith ("", [], [("style", "page-break-before: always <* string "* * * * *" <* manyTill spaceChar eol -headingStart :: PandocMonad m => MuseParser m (String, Int) +headingStart :: PandocMonad m => MuseParser m (Text, Int) headingStart = try $ (,) <$> option "" (try (parseAnchor <* manyTill spaceChar eol)) <* firstColumn @@ -371,14 +377,14 @@ example :: PandocMonad m => MuseParser m (F Blocks) example = try $ pure . B.codeBlock <$ string "{{{" <* many spaceChar - <*> (unindent <$> manyTill anyChar (string "}}}")) + <*> (unindent <$> manyTillChar anyChar (string "}}}")) -- | Parse an @\<example>@ tag. exampleTag :: PandocMonad m => MuseParser m (F Blocks) exampleTag = try $ fmap pure $ B.codeBlockWith <$ many spaceChar <*> (htmlAttrToPandoc <$> openTag "example") - <*> (unindent <$> manyTill anyChar (closeTag "example")) + <*> (unindent <$> manyTillChar anyChar (closeTag "example")) <* manyTill spaceChar eol -- | Parse a @\<literal>@ tag as a raw block. @@ -388,7 +394,7 @@ literalTag = try $ fmap pure $ B.rawBlock <$ many spaceChar <*> (fromMaybe "html" . lookup "style" <$> openTag "literal") -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML <* manyTill spaceChar eol - <*> (unindent <$> manyTill anyChar (closeTag "literal")) + <*> (unindent <$> manyTillChar anyChar (closeTag "literal")) <* manyTill spaceChar eol -- | Parse @\<center>@ tag. @@ -428,7 +434,7 @@ playTag = do verseLine :: PandocMonad m => MuseParser m (F Inlines) verseLine = (<>) - <$> fmap pure (option mempty (B.str <$> many1 ('\160' <$ char ' '))) + <$> fmap pure (option mempty (B.str <$> many1Char ('\160' <$ char ' '))) <*> fmap (trimInlinesF . mconcat) (manyTill inline' eol) -- | Parse @\<verse>@ tag. @@ -466,17 +472,17 @@ paraUntil end = do noteMarker' :: PandocMonad m => Char -> Char - -> MuseParser m String -noteMarker' l r = try $ (\x y -> l:x:y ++ [r]) + -> MuseParser m Text +noteMarker' l r = try $ (\x y -> T.pack $ l:x:y ++ [r]) <$ char l <*> oneOf "123456789" <*> manyTill digit (char r) -noteMarker :: PandocMonad m => MuseParser m String +noteMarker :: PandocMonad m => MuseParser m Text noteMarker = noteMarker' '[' ']' <|> noteMarker' '{' '}' addNote :: PandocMonad m - => String + => Text -> SourcePos -> F Blocks -> MuseParser m () @@ -674,15 +680,15 @@ museGridTableRow :: PandocMonad m -> MuseParser m (F [Blocks]) museGridTableRow indent indices = try $ do lns <- many1 $ try (indentWith indent *> museGridTableRawLine indices) - let cols = map (unlines . map trimr) $ transpose lns + let cols = map (T.unlines . map trimr) $ transpose lns indentWith indent *> museGridTableHeader sequence <$> mapM (parseFromString' parseBlocks) cols museGridTableRawLine :: PandocMonad m => [Int] - -> MuseParser m [String] + -> MuseParser m [Text] museGridTableRawLine indices = - char '|' *> forM indices (\n -> count n anyChar <* char '|') <* manyTill spaceChar eol + char '|' *> forM indices (\n -> countChar n anyChar <* char '|') <* manyTill spaceChar eol museGridTable :: PandocMonad m => MuseParser m (F Blocks) museGridTable = try $ do @@ -767,12 +773,12 @@ inline = endline <|> inline' endline :: PandocMonad m => MuseParser m (F Inlines) endline = try $ pure B.softbreak <$ newline <* notFollowedBy blankline <* updateLastSpacePos -parseAnchor :: PandocMonad m => MuseParser m String -parseAnchor = try $ (:) +parseAnchor :: PandocMonad m => MuseParser m Text +parseAnchor = try $ T.cons <$ firstColumn <* char '#' <*> letter - <*> many (letter <|> digit <|> char '-') + <*> manyChar (letter <|> digit <|> char '-') anchor :: PandocMonad m => MuseParser m (F Inlines) anchor = try $ do @@ -813,7 +819,7 @@ emphasisBetween p = try $ trimInlinesF . mconcat -- | Parse an inline tag, such as @\<em>@ and @\<strong>@. inlineTag :: PandocMonad m - => String -- ^ Tag name + => Text -- ^ Tag name -> MuseParser m (F Inlines) inlineTag tag = try $ mconcat <$ openTag tag @@ -862,12 +868,12 @@ strikeoutTag = fmap B.strikeout <$> inlineTag "del" verbatimTag :: PandocMonad m => MuseParser m (F Inlines) verbatimTag = return . B.text <$ openTag "verbatim" - <*> manyTill anyChar (closeTag "verbatim") + <*> manyTillChar anyChar (closeTag "verbatim") -- | Parse @\<class>@ tag. classTag :: PandocMonad m => MuseParser m (F Inlines) classTag = do - classes <- maybe [] words . lookup "name" <$> openTag "class" + classes <- maybe [] T.words . lookup "name" <$> openTag "class" fmap (B.spanWith ("", classes, [])) . mconcat <$> manyTill inline (closeTag "class") -- | Parse @\<\<\<RTL>>>@ text. @@ -886,43 +892,43 @@ nbsp = try $ pure (B.str "\160") <$ string "~~" -- | Parse code markup, indicated by @\'=\'@ characters. code :: PandocMonad m => MuseParser m (F Inlines) -code = try $ fmap pure $ B.code . uncurry (++) +code = try $ fmap pure $ B.code . uncurry (<>) <$ atStart <* char '=' <* notFollowedBy (spaceChar <|> newline) - <*> manyUntil (noneOf "\n\r" <|> (newline <* notFollowedBy newline)) (try $ fmap pure $ noneOf " \t\n\r=" <* char '=') + <*> manyUntilChar (noneOf "\n\r" <|> (newline <* notFollowedBy newline)) (try $ fmap T.singleton $ noneOf " \t\n\r=" <* char '=') <* notFollowedBy alphaNum -- | Parse @\<code>@ tag. codeTag :: PandocMonad m => MuseParser m (F Inlines) codeTag = fmap pure $ B.codeWith <$> (htmlAttrToPandoc <$> openTag "code") - <*> manyTill anyChar (closeTag "code") + <*> manyTillChar anyChar (closeTag "code") -- | Parse @\<math>@ tag. -- @\<math>@ tag is an Emacs Muse extension enabled by @(require 'muse-latex2png)@ mathTag :: PandocMonad m => MuseParser m (F Inlines) mathTag = return . B.math <$ openTag "math" - <*> manyTill anyChar (closeTag "math") + <*> manyTillChar anyChar (closeTag "math") -- | Parse inline @\<literal>@ tag as a raw inline. inlineLiteralTag :: PandocMonad m => MuseParser m (F Inlines) inlineLiteralTag = try $ fmap pure $ B.rawInline <$> (fromMaybe "html" . lookup "style" <$> openTag "literal") -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML - <*> manyTill anyChar (closeTag "literal") + <*> manyTillChar anyChar (closeTag "literal") str :: PandocMonad m => MuseParser m (F Inlines) -str = return . B.str <$> many1 alphaNum <* updateLastStrPos +str = return . B.str <$> many1Char alphaNum <* updateLastStrPos -- | Consume asterisks that were not used as emphasis opening. -- This prevents series of asterisks from being split into -- literal asterisk and emphasis opening. asterisks :: PandocMonad m => MuseParser m (F Inlines) -asterisks = pure . B.str <$> many1 (char '*') +asterisks = pure . B.str <$> many1Char (char '*') symbol :: PandocMonad m => MuseParser m (F Inlines) -symbol = pure . B.str . pure <$> nonspaceChar +symbol = pure . B.str . T.singleton <$> nonspaceChar -- | Parse a link or image. linkOrImage :: PandocMonad m => MuseParser m (F Inlines) @@ -934,12 +940,12 @@ linkContent = trimInlinesF . mconcat <*> manyTill inline (char ']') -- | Parse a link starting with (possibly null) prefix -link :: PandocMonad m => String -> MuseParser m (F Inlines) +link :: PandocMonad m => Text -> MuseParser m (F Inlines) link prefix = try $ do inLink <- asks museInLink guard $ not inLink - string $ "[[" ++ prefix - url <- manyTill anyChar $ char ']' + textStr $ "[[" <> prefix + url <- manyTillChar anyChar $ char ']' content <- option (pure $ B.str url) (local (\s -> s { museInLink = True }) linkContent) char ']' return $ B.link url "" <$> content @@ -947,27 +953,27 @@ link prefix = try $ do image :: PandocMonad m => MuseParser m (F Inlines) image = try $ do string "[[" - (url, (ext, width, align)) <- manyUntil (noneOf "]") (imageExtensionAndOptions <* char ']') + (url, (ext, width, align)) <- manyUntilChar (noneOf "]") (imageExtensionAndOptions <* char ']') content <- option mempty linkContent char ']' let widthAttr = case align of - Just 'f' -> [("width", fromMaybe "100" width ++ "%"), ("height", "75%")] - _ -> maybeToList (("width",) . (++ "%") <$> width) + Just 'f' -> [("width", fromMaybe "100" width <> "%"), ("height", "75%")] + _ -> maybeToList (("width",) . (<> "%") <$> width) let alignClass = case align of Just 'r' -> ["align-right"] Just 'l' -> ["align-left"] Just 'f' -> [] _ -> [] - return $ B.imageWith ("", alignClass, widthAttr) (url ++ ext) mempty <$> content + return $ B.imageWith ("", alignClass, widthAttr) (url <> ext) mempty <$> content where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"] - imageExtension = choice (try . string <$> imageExtensions) + imageExtension = choice (try . textStr <$> imageExtensions) imageExtensionAndOptions = do ext <- imageExtension (width, align) <- option (Nothing, Nothing) imageAttrs return (ext, width, align) imageAttrs = (,) <$ many1 spaceChar - <*> optionMaybe (many1 digit) + <*> optionMaybe (many1Char digit) <* many spaceChar <*> optionMaybe (oneOf "rlf") diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs index 9e3c118d8..34d3c5e8f 100644 --- a/src/Text/Pandoc/Readers/Native.hs +++ b/src/Text/Pandoc/Readers/Native.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Native Copyright : Copyright (C) 2011-2019 John MacFarlane @@ -19,7 +20,7 @@ import Text.Pandoc.Options (ReaderOptions) import Text.Pandoc.Shared (safeRead) import Control.Monad.Except (throwError) -import Data.Text (Text, unpack) +import Data.Text (Text) import Text.Pandoc.Class import Text.Pandoc.Error @@ -38,18 +39,18 @@ readNative :: PandocMonad m -> Text -- ^ String to parse (assuming @'\n'@ line endings) -> m Pandoc readNative _ s = - case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead (unpack s)) of + case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of Right doc -> return doc Left _ -> throwError $ PandocParseError "couldn't read native" readBlocks :: Text -> Either PandocError [Block] -readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead (unpack s)) +readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s) readBlock :: Text -> Either PandocError Block -readBlock s = maybe (Plain <$> readInlines s) Right (safeRead (unpack s)) +readBlock s = maybe (Plain <$> readInlines s) Right (safeRead s) readInlines :: Text -> Either PandocError [Inline] -readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead (unpack s)) +readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead s) readInline :: Text -> Either PandocError Inline -readInline s = maybe (Left . PandocParseError $ "Could not read: " ++ unpack s) Right (safeRead (unpack s)) +readInline s = maybe (Left . PandocParseError $ "Could not read: " <> s) Right (safeRead s) diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index 2c3b0367f..5330b0238 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.OPML Copyright : Copyright (C) 2013-2019 John MacFarlane @@ -18,7 +19,8 @@ import Data.Char (toUpper) import Data.Default import Data.Generics import Data.Maybe (fromMaybe) -import Data.Text (Text, pack, unpack) +import Data.Text (Text) +import qualified Data.Text as T import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc.Builder import Text.Pandoc.Class (PandocMonad) @@ -50,7 +52,7 @@ readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readOPML opts inp = do (bs, st') <- runStateT (mapM parseBlock $ normalizeTree $ - parseXML (unpack (crFilter inp))) def{ opmlOptions = opts } + parseXML (T.unpack (crFilter inp))) def{ opmlOptions = opts } return $ setTitle (opmlDocTitle st') $ setAuthors (opmlDocAuthors st') $ @@ -76,23 +78,26 @@ convertEntity :: String -> String convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) -- convenience function to get an attribute value, defaulting to "" -attrValue :: String -> Element -> String +attrValue :: String -> Element -> Text attrValue attr elt = - fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) + maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) + +textContent :: Element -> Text +textContent = T.pack . strContent -- exceptT :: PandocMonad m => Either PandocError a -> OPML m a -- exceptT = either throwError return -asHtml :: PandocMonad m => String -> OPML m Inlines +asHtml :: PandocMonad m => Text -> OPML m Inlines asHtml s = do opts <- gets opmlOptions - Pandoc _ bs <- readHtml def{ readerExtensions = readerExtensions opts } (pack s) + Pandoc _ bs <- readHtml def{ readerExtensions = readerExtensions opts } s return $ blocksToInlines' bs -asMarkdown :: PandocMonad m => String -> OPML m Blocks +asMarkdown :: PandocMonad m => Text -> OPML m Blocks asMarkdown s = do opts <- gets opmlOptions - Pandoc _ bs <- readMarkdown def{ readerExtensions = readerExtensions opts } (pack s) + Pandoc _ bs <- readMarkdown def{ readerExtensions = readerExtensions opts } s return $ fromList bs getBlocks :: PandocMonad m => Element -> OPML m Blocks @@ -102,11 +107,11 @@ parseBlock :: PandocMonad m => Content -> OPML m Blocks parseBlock (Elem e) = case qName (elName e) of "ownerName" -> mempty <$ modify (\st -> - st{opmlDocAuthors = [text $ strContent e]}) + st{opmlDocAuthors = [text $ textContent e]}) "dateModified" -> mempty <$ modify (\st -> - st{opmlDocDate = text $ strContent e}) + st{opmlDocDate = text $ textContent e}) "title" -> mempty <$ modify (\st -> - st{opmlDocTitle = text $ strContent e}) + st{opmlDocTitle = text $ textContent e}) "outline" -> gets opmlSectionLevel >>= sect . (+1) "?xml" -> return mempty _ -> getBlocks e @@ -115,7 +120,7 @@ parseBlock (Elem e) = modify $ \st -> st{ opmlSectionLevel = n } bs <- getBlocks e modify $ \st -> st{ opmlSectionLevel = n - 1 } - let headerText' = case map toUpper (attrValue "type" e) of + let headerText' = case T.toUpper (attrValue "type" e) of "LINK" -> link (attrValue "url" e) "" headerText _ -> headerText diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs index dfa019932..f9b78d5bf 100644 --- a/src/Text/Pandoc/Readers/Odt.hs +++ b/src/Text/Pandoc/Readers/Odt.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Reader.Odt Copyright : Copyright (C) 2015 Martin Linnemann diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs index d8e5ba272..ff8cdc5fa 100644 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs @@ -1,11 +1,12 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE Arrows #-} -{-# LANGUAGE DeriveFoldable #-} +{-# LANGUAGE Arrows #-} +{-# LANGUAGE DeriveFoldable #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE PatternGuards #-} -{-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Odt.ContentReader Copyright : Copyright (C) 2015 Martin Linnemann @@ -29,8 +30,9 @@ import Control.Arrow import qualified Data.ByteString.Lazy as B import Data.Foldable (fold) -import Data.List (find, intercalate, stripPrefix) +import Data.List (find, stripPrefix) import qualified Data.Map as M +import qualified Data.Text as T import Data.Maybe import Data.Semigroup (First(..), Option(..)) @@ -59,7 +61,7 @@ import qualified Data.Set as Set -- State -------------------------------------------------------------------------------- -type Anchor = String +type Anchor = T.Text type Media = [(FilePath, B.ByteString)] data ReaderState @@ -204,21 +206,21 @@ updateMediaWithResource = keepingTheValue ( ) >>^ fst -lookupResource :: OdtReaderSafe String (FilePath, B.ByteString) +lookupResource :: OdtReaderSafe FilePath (FilePath, B.ByteString) lookupResource = proc target -> do state <- getExtraState -< () case lookup target (getMediaEnv state) of Just bs -> returnV (target, bs) -<< () Nothing -> returnV ("", B.empty) -< () -type AnchorPrefix = String +type AnchorPrefix = T.Text -- | An adaptation of 'uniqueIdent' from "Text.Pandoc.Shared" that generates a -- unique identifier but without assuming that the id should be for a header. -- Second argument is a list of already used identifiers. uniqueIdentFrom :: AnchorPrefix -> [Anchor] -> Anchor uniqueIdentFrom baseIdent usedIdents = - let numIdent n = baseIdent ++ "-" ++ show n + let numIdent n = baseIdent <> "-" <> T.pack (show n) in if baseIdent `elem` usedIdents then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of Just x -> numIdent x @@ -305,7 +307,7 @@ withNewStyle a = proc x -> do isCodeStyle _ = False inlineCode :: Inlines -> Inlines - inlineCode = code . intercalate "" . map stringify . toList + inlineCode = code . T.concat . map stringify . toList type PropertyTriple = (ReaderState, TextProperties, Maybe StyleFamily) type InlineModifier = Inlines -> Inlines @@ -535,7 +537,6 @@ matchChildContent :: (Monoid result) -> OdtReaderSafe _x result matchChildContent ls fallback = returnV mempty >>> matchContent ls fallback - -------------------------------------------- -- Matchers -------------------------------------------- @@ -556,8 +557,8 @@ read_plain_text = fst ^&&& read_plain_text' >>% recover ) >>?% mappend -- - extractText :: XML.Content -> Fallible String - extractText (XML.Text cData) = succeedWith (XML.cdData cData) + extractText :: XML.Content -> Fallible T.Text + extractText (XML.Text cData) = succeedWith (T.pack $ XML.cdData cData) extractText _ = failEmpty read_text_seq :: InlineMatcher @@ -675,8 +676,8 @@ read_list_item = matchingElement NsText "list-item" read_link :: InlineMatcher read_link = matchingElement NsText "a" $ liftA3 link - ( findAttrWithDefault NsXLink "href" "" ) - ( findAttrWithDefault NsOffice "title" "" ) + ( findAttrTextWithDefault NsXLink "href" "" ) + ( findAttrTextWithDefault NsOffice "title" "" ) ( matchChildContent [ read_span , read_note , read_citation @@ -709,12 +710,12 @@ read_citation :: InlineMatcher read_citation = matchingElement NsText "bibliography-mark" $ liftA2 cite ( liftA2 makeCitation - ( findAttrWithDefault NsText "identifier" "" ) + ( findAttrTextWithDefault NsText "identifier" "" ) ( readAttrWithDefault NsText "number" 0 ) ) ( matchChildContent [] read_plain_text ) where - makeCitation :: String -> Int -> [Citation] + makeCitation :: T.Text -> Int -> [Citation] makeCitation citeId num = [Citation citeId [] [] NormalCitation num 0] @@ -779,17 +780,17 @@ read_frame_img = let exts = extensionsFromList [Ext_auto_identifiers] resource <- lookupResource -< src' _ <- updateMediaWithResource -< resource - w <- findAttr' NsSVG "width" -< () - h <- findAttr' NsSVG "height" -< () + w <- findAttrText' NsSVG "width" -< () + h <- findAttrText' NsSVG "height" -< () titleNodes <- matchChildContent' [ read_frame_title ] -< () alt <- matchChildContent [] read_plain_text -< () arr (firstMatch . uncurry4 imageWith) -< - (image_attributes w h, src', inlineListToIdentifier exts (toList titleNodes), alt) + (image_attributes w h, T.pack src', inlineListToIdentifier exts (toList titleNodes), alt) read_frame_title :: InlineMatcher read_frame_title = matchingElement NsSVG "title" (matchChildContent [] read_plain_text) -image_attributes :: Maybe String -> Maybe String -> Attr +image_attributes :: Maybe T.Text -> Maybe T.Text -> Attr image_attributes x y = ( "", [], (dim "width" x) ++ (dim "height" y)) where @@ -806,7 +807,7 @@ read_frame_mathml = src' -> do let path = fromMaybe src' (stripPrefix "./" src') ++ "/content.xml" (_, mathml) <- lookupResource -< path - case readMathML (UTF8.toString $ B.toStrict mathml) of + case readMathML (UTF8.toText $ B.toStrict mathml) of Left _ -> returnV mempty -< () Right exps -> arr (firstMatch . displayMath . writeTeX) -< exps @@ -817,9 +818,9 @@ read_frame_text_box = proc box -> do read_img_with_caption :: [Block] -> FirstMatch Inlines read_img_with_caption (Para [Image attr alt (src,title)] : _) = - firstMatch $ singleton (Image attr alt (src, 'f':'i':'g':':':title)) -- no text, default caption + firstMatch $ singleton (Image attr alt (src, "fig:" <> title)) -- no text, default caption read_img_with_caption (Para (Image attr _ (src,title) : txt) : _) = - firstMatch $ singleton (Image attr txt (src, 'f':'i':'g':':':title) ) -- override caption with the text that follows + firstMatch $ singleton (Image attr txt (src, "fig:" <> title) ) -- override caption with the text that follows read_img_with_caption ( Para (_ : xs) : ys) = read_img_with_caption (Para xs : ys) read_img_with_caption _ = @@ -829,12 +830,12 @@ read_img_with_caption _ = -- Internal links ---------------------- -_ANCHOR_PREFIX_ :: String +_ANCHOR_PREFIX_ :: T.Text _ANCHOR_PREFIX_ = "anchor" -- readAnchorAttr :: OdtReader _x Anchor -readAnchorAttr = findAttr NsText "name" +readAnchorAttr = findAttrText NsText "name" -- | Beware: may fail findAnchorName :: OdtReader AnchorPrefix Anchor @@ -875,7 +876,7 @@ read_reference_start = matchingElement NsText "reference-mark-start" -- | Beware: may fail findAnchorRef :: OdtReader _x Anchor -findAnchorRef = ( findAttr NsText "ref-name" +findAnchorRef = ( findAttrText NsText "ref-name" >>?^ (_ANCHOR_PREFIX_,) ) >>?! getPrettyAnchor @@ -890,7 +891,7 @@ maybeInAnchorRef = proc inlines -> do Left _ -> returnA -< inlines where toAnchorRef :: Anchor -> Inlines -> Inlines - toAnchorRef anchor = link ('#':anchor) "" -- no title + toAnchorRef anchor = link ("#" <> anchor) "" -- no title -- read_bookmark_ref :: InlineMatcher diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs index ccbaf6fc4..59d1b8abd 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs @@ -38,8 +38,11 @@ module Text.Pandoc.Readers.Odt.Generic.XMLConverter , lookupAttr' , lookupDefaultingAttr , findAttr' +, findAttrText' , findAttr +, findAttrText , findAttrWithDefault +, findAttrTextWithDefault , readAttr , readAttr' , readAttrWithDefault @@ -59,6 +62,7 @@ import Control.Arrow import Data.Either ( rights ) import qualified Data.Map as M +import qualified Data.Text as T import Data.Default import Data.Maybe @@ -79,6 +83,7 @@ import Text.Pandoc.Readers.Odt.Generic.Fallible type ElementName = String type AttributeName = String type AttributeValue = String +type TextAttributeValue = T.Text -- type NameSpacePrefix = String @@ -466,6 +471,16 @@ findAttr' nsID attrName = qualifyName nsID attrName &&& getCurrentElement >>% XML.findAttr +-- | Return value as a (Maybe Text) +findAttrText' :: (NameSpaceID nsID) + => nsID -> AttributeName + -> XMLConverter nsID extraState x (Maybe TextAttributeValue) +findAttrText' nsID attrName + = qualifyName nsID attrName + &&& getCurrentElement + >>% XML.findAttr + >>^ fmap T.pack + -- | Return value as string or fail findAttr :: (NameSpaceID nsID) => nsID -> AttributeName @@ -473,6 +488,15 @@ findAttr :: (NameSpaceID nsID) findAttr nsID attrName = findAttr' nsID attrName >>> maybeToChoice +-- | Return value as text or fail +findAttrText :: (NameSpaceID nsID) + => nsID -> AttributeName + -> FallibleXMLConverter nsID extraState x TextAttributeValue +findAttrText nsID attrName + = findAttr' nsID attrName + >>^ fmap T.pack + >>> maybeToChoice + -- | Return value as string or return provided default value findAttrWithDefault :: (NameSpaceID nsID) => nsID -> AttributeName @@ -482,6 +506,15 @@ findAttrWithDefault nsID attrName deflt = findAttr' nsID attrName >>^ fromMaybe deflt +-- | Return value as string or return provided default value +findAttrTextWithDefault :: (NameSpaceID nsID) + => nsID -> AttributeName + -> TextAttributeValue + -> XMLConverter nsID extraState x TextAttributeValue +findAttrTextWithDefault nsID attrName deflt + = findAttr' nsID attrName + >>^ maybe deflt T.pack + -- | Read and return value or fail readAttr :: (NameSpaceID nsID, Read attrValue) => nsID -> AttributeName diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs index 79e8d7aea..99fa05880 100644 --- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs +++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs @@ -548,11 +548,11 @@ readListLevelStyle :: ListLevelType -> StyleReader _x (Int, ListLevelStyle) readListLevelStyle levelType = readAttr NsText "level" >>?! keepingTheValue ( liftA5 toListLevelStyle - ( returnV levelType ) - ( findAttr' NsStyle "num-prefix" ) - ( findAttr' NsStyle "num-suffix" ) - ( getAttr NsStyle "num-format" ) - ( findAttr' NsText "start-value" ) + ( returnV levelType ) + ( findAttr' NsStyle "num-prefix" ) + ( findAttr' NsStyle "num-suffix" ) + ( getAttr NsStyle "num-format" ) + ( findAttrText' NsText "start-value" ) ) where toListLevelStyle _ p s LinfNone b = ListLevelStyle LltBullet p s LinfNone (startValue b) diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 2c88c7776..99ece152c 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -26,7 +27,6 @@ import Control.Monad.Except (throwError) import Control.Monad.Reader (runReaderT) import Data.Text (Text) -import qualified Data.Text as T -- | Parse org-mode string and return a Pandoc document. readOrg :: PandocMonad m @@ -36,7 +36,7 @@ readOrg :: PandocMonad m readOrg opts s = do parsed <- flip runReaderT def $ readWithM parseOrg (optionsToParserState opts) - (T.unpack (crFilter s) ++ "\n\n") + (crFilter s <> "\n\n") case parsed of Right result -> return result Left _ -> throwError $ PandocParseError "problem parsing org" diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs index 58db4f46c..b4f3cc0d8 100644 --- a/src/Text/Pandoc/Readers/Org/BlockStarts.hs +++ b/src/Text/Pandoc/Readers/Org/BlockStarts.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.BlockStarts Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -25,6 +26,8 @@ module Text.Pandoc.Readers.Org.BlockStarts import Prelude import Control.Monad (void) +import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Readers.Org.Parsing -- | Horizontal Line (five -- dashes or more) @@ -49,15 +52,15 @@ gridTableStart :: Monad m => OrgParser m () gridTableStart = try $ skipSpaces <* char '+' <* char '-' -latexEnvStart :: Monad m => OrgParser m String +latexEnvStart :: Monad m => OrgParser m Text latexEnvStart = try $ skipSpaces *> string "\\begin{" *> latexEnvName <* string "}" <* blankline where - latexEnvName :: Monad m => OrgParser m String - latexEnvName = try $ mappend <$> many1 alphaNum <*> option "" (string "*") + latexEnvName :: Monad m => OrgParser m Text + latexEnvName = try $ mappend <$> many1Char alphaNum <*> option "" (textStr "*") bulletListStart :: Monad m => OrgParser m Int bulletListStart = try $ do @@ -68,7 +71,7 @@ bulletListStart = try $ do return (ind + 1) genericListStart :: Monad m - => OrgParser m String + => OrgParser m Text -> OrgParser m Int genericListStart listMarker = try $ do ind <- length <$> many spaceChar @@ -82,11 +85,11 @@ eol = void (char '\n') orderedListStart :: Monad m => OrgParser m Int orderedListStart = genericListStart orderedListMarker -- Ordered list markers allowed in org-mode - where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") + where orderedListMarker = T.snoc <$> many1Char digit <*> oneOf ".)" -drawerStart :: Monad m => OrgParser m String +drawerStart :: Monad m => OrgParser m Text drawerStart = try $ skipSpaces *> drawerName <* skipSpaces <* newline - where drawerName = char ':' *> manyTill nonspaceChar (char ':') + where drawerName = char ':' *> manyTillChar nonspaceChar (char ':') metaLineStart :: Monad m => OrgParser m () metaLineStart = try $ skipSpaces <* string "#+" @@ -99,12 +102,12 @@ commentLineStart = try $ exampleLineStart :: Monad m => OrgParser m () exampleLineStart = () <$ try (skipSpaces *> string ": ") -noteMarker :: Monad m => OrgParser m String +noteMarker :: Monad m => OrgParser m Text noteMarker = try $ do char '[' - choice [ many1Till digit (char ']') - , (++) <$> string "fn:" - <*> many1Till (noneOf "\n\r\t ") (char ']') + choice [ many1TillChar digit (char ']') + , (<>) <$> textStr "fn:" + <*> many1TillChar (noneOf "\n\r\t ") (char ']') ] -- | Succeeds if the parser is at the end of a block. diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index cba876f06..de51dec3d 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -1,6 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.Blocks Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -23,7 +24,7 @@ import Text.Pandoc.Readers.Org.Inlines import Text.Pandoc.Readers.Org.Meta (metaExport, metaKey, metaLine) import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing -import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename, +import Text.Pandoc.Readers.Org.Shared (cleanLinkText, isImageFilename, originalLang, translateLang, exportsCode) import Text.Pandoc.Builder (Blocks, Inlines) @@ -33,11 +34,13 @@ import Text.Pandoc.Options import Text.Pandoc.Shared (compactify, compactifyDL, safeRead) import Control.Monad (foldM, guard, mzero, void) -import Data.Char (isSpace, toLower, toUpper) +import Data.Char (isSpace) import Data.Default (Default) -import Data.List (foldl', isPrefixOf) +import Data.List (foldl') import Data.Maybe (fromMaybe, isJust, isNothing) +import Data.Text (Text) +import qualified Data.Text as T import qualified Text.Pandoc.Builder as B import qualified Text.Pandoc.Walk as Walk @@ -90,10 +93,10 @@ horizontalRule = return B.horizontalRule <$ try hline -- | Attributes that may be added to figures (like a name or caption). data BlockAttributes = BlockAttributes - { blockAttrName :: Maybe String - , blockAttrLabel :: Maybe String + { blockAttrName :: Maybe Text + , blockAttrLabel :: Maybe Text , blockAttrCaption :: Maybe (F Inlines) - , blockAttrKeyValues :: [(String, String)] + , blockAttrKeyValues :: [(Text, Text)] } -- | Convert BlockAttributes into pandoc Attr @@ -103,14 +106,14 @@ attrFromBlockAttributes BlockAttributes{..} = ident = fromMaybe mempty $ lookup "id" blockAttrKeyValues classes = case lookup "class" blockAttrKeyValues of Nothing -> [] - Just clsStr -> words clsStr + Just clsStr -> T.words clsStr kv = filter ((`notElem` ["id", "class"]) . fst) blockAttrKeyValues in (ident, classes, kv) -stringyMetaAttribute :: Monad m => OrgParser m (String, String) +stringyMetaAttribute :: Monad m => OrgParser m (Text, Text) stringyMetaAttribute = try $ do metaLineStart - attrName <- map toUpper <$> many1Till nonspaceChar (char ':') + attrName <- T.toUpper <$> many1TillChar nonspaceChar (char ':') skipSpaces attrValue <- anyLine <|> ("" <$ newline) return (attrName, attrValue) @@ -129,8 +132,8 @@ blockAttributes = try $ do let label = lookup "LABEL" kv caption' <- case caption of Nothing -> return Nothing - Just s -> Just <$> parseFromString inlines (s ++ "\n") - kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs + Just s -> Just <$> parseFromString inlines (s <> "\n") + kvAttrs' <- parseFromString keyValues . (<> "\n") $ fromMaybe mempty kvAttrs return BlockAttributes { blockAttrName = name , blockAttrLabel = label @@ -138,31 +141,31 @@ blockAttributes = try $ do , blockAttrKeyValues = kvAttrs' } where - isBlockAttr :: String -> Bool + isBlockAttr :: Text -> Bool isBlockAttr = flip elem [ "NAME", "LABEL", "CAPTION" , "ATTR_HTML", "ATTR_LATEX" , "RESULTS" ] - appendValues :: String -> Maybe String -> (String, String) -> Maybe String + appendValues :: Text -> Maybe Text -> (Text, Text) -> Maybe Text appendValues attrName accValue (key, value) = if key /= attrName then accValue else case accValue of - Just acc -> Just $ acc ++ ' ':value + Just acc -> Just $ acc <> " " <> value Nothing -> Just value -- | Parse key-value pairs for HTML attributes -keyValues :: Monad m => OrgParser m [(String, String)] +keyValues :: Monad m => OrgParser m [(Text, Text)] keyValues = try $ manyTill ((,) <$> key <*> value) newline where - key :: Monad m => OrgParser m String - key = try $ skipSpaces *> char ':' *> many1 nonspaceChar + key :: Monad m => OrgParser m Text + key = try $ skipSpaces *> char ':' *> many1Char nonspaceChar - value :: Monad m => OrgParser m String - value = skipSpaces *> manyTill anyChar endOfValue + value :: Monad m => OrgParser m Text + value = skipSpaces *> manyTillChar anyChar endOfValue endOfValue :: Monad m => OrgParser m () endOfValue = @@ -180,7 +183,7 @@ orgBlock = try $ do blockAttrs <- blockAttributes blkType <- blockHeaderStart ($ blkType) $ - case map toLower blkType of + case T.toLower blkType of "export" -> exportBlock "comment" -> rawBlockLines (const mempty) "html" -> rawBlockLines (return . B.rawBlock (lowercase blkType)) @@ -194,13 +197,13 @@ orgBlock = try $ do let (ident, classes, kv) = attrFromBlockAttributes blockAttrs in fmap $ B.divWith (ident, classes ++ [blkType], kv) where - blockHeaderStart :: Monad m => OrgParser m String + blockHeaderStart :: Monad m => OrgParser m Text blockHeaderStart = try $ skipSpaces *> stringAnyCase "#+begin_" *> orgArgWord - lowercase :: String -> String - lowercase = map toLower + lowercase :: Text -> Text + lowercase = T.toLower -exampleBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks) +exampleBlock :: PandocMonad m => BlockAttributes -> Text -> OrgParser m (F Blocks) exampleBlock blockAttrs _label = do skipSpaces (classes, kv) <- switchesAsAttributes @@ -210,54 +213,54 @@ exampleBlock blockAttrs _label = do let codeBlck = B.codeBlockWith (id', "example":classes, kv) content return . return $ codeBlck -rawBlockLines :: Monad m => (String -> F Blocks) -> String -> OrgParser m (F Blocks) +rawBlockLines :: Monad m => (Text -> F Blocks) -> Text -> OrgParser m (F Blocks) rawBlockLines f blockType = ignHeaders *> (f <$> rawBlockContent blockType) -parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> String -> OrgParser m (F Blocks) +parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> Text -> OrgParser m (F Blocks) parseBlockLines f blockType = ignHeaders *> (f <$> parsedBlockContent) where parsedBlockContent :: PandocMonad m => OrgParser m (F Blocks) parsedBlockContent = try $ do raw <- rawBlockContent blockType - parseFromString blocks (raw ++ "\n") + parseFromString blocks (raw <> "\n") -- | Read the raw string content of a block -rawBlockContent :: Monad m => String -> OrgParser m String +rawBlockContent :: Monad m => Text -> OrgParser m Text rawBlockContent blockType = try $ do blkLines <- manyTill rawLine blockEnder tabLen <- getOption readerTabStop trimP <- orgStateTrimLeadBlkIndent <$> getState - let stripIndent strs = if trimP then map (drop (shortestIndent strs)) strs else strs - (unlines + let stripIndent strs = if trimP then map (T.drop (shortestIndent strs)) strs else strs + (T.unlines . stripIndent . map (tabsToSpaces tabLen . commaEscaped) $ blkLines) <$ updateState (\s -> s { orgStateTrimLeadBlkIndent = True }) where - rawLine :: Monad m => OrgParser m String + rawLine :: Monad m => OrgParser m Text rawLine = try $ ("" <$ blankline) <|> anyLine blockEnder :: Monad m => OrgParser m () blockEnder = try $ skipSpaces <* stringAnyCase ("#+end_" <> blockType) - shortestIndent :: [String] -> Int - shortestIndent = foldr (min . length . takeWhile isSpace) maxBound - . filter (not . null) - - tabsToSpaces :: Int -> String -> String - tabsToSpaces _ [] = [] - tabsToSpaces tabLen cs'@(c:cs) = - case c of - ' ' -> ' ':tabsToSpaces tabLen cs - '\t' -> replicate tabLen ' ' ++ tabsToSpaces tabLen cs - _ -> cs' - - commaEscaped :: String -> String - commaEscaped (',':cs@('*':_)) = cs - commaEscaped (',':cs@('#':'+':_)) = cs - commaEscaped (' ':cs) = ' ':commaEscaped cs - commaEscaped ('\t':cs) = '\t':commaEscaped cs - commaEscaped cs = cs + shortestIndent :: [Text] -> Int + shortestIndent = foldr (min . T.length . T.takeWhile isSpace) maxBound + . filter (not . T.null) + + tabsToSpaces :: Int -> Text -> Text + tabsToSpaces tabStop t = + let (ind, suff) = T.span (\c -> c == ' ' || c == '\t') t + tabNum = T.length $ T.filter (== '\n') ind + spaceNum = T.length ind - tabNum + in T.replicate (spaceNum + tabStop * tabNum) " " <> suff + + commaEscaped t = + let (ind, suff) = T.span (\c -> c == ' ' || c == '\t') t + in case T.uncons suff of + Just (',', cs) + | "*" <- T.take 1 cs -> ind <> cs + | "#+" <- T.take 2 cs -> ind <> cs + _ -> t -- | Read but ignore all remaining block headers. ignHeaders :: Monad m => OrgParser m () @@ -265,34 +268,34 @@ ignHeaders = (() <$ newline) <|> (() <$ anyLine) -- | Read a block containing code intended for export in specific backends -- only. -exportBlock :: Monad m => String -> OrgParser m (F Blocks) +exportBlock :: Monad m => Text -> OrgParser m (F Blocks) exportBlock blockType = try $ do exportType <- skipSpaces *> orgArgWord <* ignHeaders contents <- rawBlockContent blockType - returnF (B.rawBlock (map toLower exportType) contents) + returnF (B.rawBlock (T.toLower exportType) contents) -verseBlock :: PandocMonad m => String -> OrgParser m (F Blocks) +verseBlock :: PandocMonad m => Text -> OrgParser m (F Blocks) verseBlock blockType = try $ do ignHeaders content <- rawBlockContent blockType fmap B.lineBlock . sequence - <$> mapM parseVerseLine (lines content) + <$> mapM parseVerseLine (T.lines content) where -- replace initial spaces with nonbreaking spaces to preserve -- indentation, parse the rest as normal inline - parseVerseLine :: PandocMonad m => String -> OrgParser m (F Inlines) + parseVerseLine :: PandocMonad m => Text -> OrgParser m (F Inlines) parseVerseLine cs = do - let (initialSpaces, indentedLine) = span isSpace cs - let nbspIndent = if null initialSpaces + let (initialSpaces, indentedLine) = T.span isSpace cs + let nbspIndent = if T.null initialSpaces then mempty - else B.str $ map (const '\160') initialSpaces - line <- parseFromString inlines (indentedLine ++ "\n") + else B.str $ T.map (const '\160') initialSpaces + line <- parseFromString inlines (indentedLine <> "\n") return (trimInlinesF $ pure nbspIndent <> line) -- | Read a code block and the associated results block if present. Which of -- boths blocks is included in the output is determined using the "exports" -- argument in the block header. -codeBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks) +codeBlock :: PandocMonad m => BlockAttributes -> Text -> OrgParser m (F Blocks) codeBlock blockAttrs blockType = do skipSpaces (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) @@ -314,7 +317,7 @@ codeBlock blockAttrs blockType = do labelledBlock :: F Inlines -> F Blocks labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) - exportsResults :: [(String, String)] -> Bool + exportsResults :: [(Text, Text)] -> Bool exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports" -- | Parse the result of an evaluated babel code block. @@ -329,7 +332,7 @@ babelResultsBlock = try $ do resultsMarker = try . void $ stringAnyCase "#+RESULTS:" *> blankline -- | Parse code block arguments -codeHeaderArgs :: Monad m => OrgParser m ([String], [(String, String)]) +codeHeaderArgs :: Monad m => OrgParser m ([Text], [(Text, Text)]) codeHeaderArgs = try $ do language <- skipSpaces *> orgArgWord (switchClasses, switchKv) <- switchesAsAttributes @@ -338,14 +341,14 @@ codeHeaderArgs = try $ do , originalLang language <> switchKv <> parameters ) -switchesAsAttributes :: Monad m => OrgParser m ([String], [(String, String)]) +switchesAsAttributes :: Monad m => OrgParser m ([Text], [(Text, Text)]) switchesAsAttributes = try $ do switches <- skipSpaces *> try (switch `sepBy` many1 spaceChar) return $ foldr addToAttr ([], []) switches where - addToAttr :: (Char, Maybe String, SwitchPolarity) - -> ([String], [(String, String)]) - -> ([String], [(String, String)]) + addToAttr :: (Char, Maybe Text, SwitchPolarity) + -> ([Text], [(Text, Text)]) + -> ([Text], [(Text, Text)]) addToAttr ('n', lineNum, pol) (cls, kv) = let kv' = case lineNum of Just num -> ("startFrom", num):kv @@ -365,15 +368,15 @@ switchPolarity :: Monad m => OrgParser m SwitchPolarity switchPolarity = (SwitchMinus <$ char '-') <|> (SwitchPlus <$ char '+') -- | Parses a source block switch option. -switch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity) +switch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity) switch = try $ lineNumberSwitch <|> labelSwitch <|> whitespaceSwitch <|> simpleSwitch where simpleSwitch = (\pol c -> (c, Nothing, pol)) <$> switchPolarity <*> letter labelSwitch = genericSwitch 'l' $ - char '"' *> many1Till nonspaceChar (char '"') + char '"' *> many1TillChar nonspaceChar (char '"') -whitespaceSwitch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity) +whitespaceSwitch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity) whitespaceSwitch = do string "-i" updateState $ \s -> s { orgStateTrimLeadBlkIndent = False } @@ -382,8 +385,8 @@ whitespaceSwitch = do -- | Generic source block switch-option parser. genericSwitch :: Monad m => Char - -> OrgParser m String - -> OrgParser m (Char, Maybe String, SwitchPolarity) + -> OrgParser m Text + -> OrgParser m (Char, Maybe Text, SwitchPolarity) genericSwitch c p = try $ do polarity <- switchPolarity <* char c <* skipSpaces arg <- optionMaybe p @@ -391,17 +394,17 @@ genericSwitch c p = try $ do -- | Reads a line number switch option. The line number switch can be used with -- example and source blocks. -lineNumberSwitch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity) -lineNumberSwitch = genericSwitch 'n' (many digit) +lineNumberSwitch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity) +lineNumberSwitch = genericSwitch 'n' (manyChar digit) -blockOption :: Monad m => OrgParser m (String, String) +blockOption :: Monad m => OrgParser m (Text, Text) blockOption = try $ do argKey <- orgArgKey paramValue <- option "yes" orgParamValue return (argKey, paramValue) -orgParamValue :: Monad m => OrgParser m String -orgParamValue = try $ +orgParamValue :: Monad m => OrgParser m Text +orgParamValue = try $ fmap T.pack $ skipSpaces *> notFollowedBy orgArgKey *> noneOf "\n\r" `many1Till` endOfValue @@ -420,7 +423,7 @@ orgParamValue = try $ -- export setting. genericDrawer :: PandocMonad m => OrgParser m (F Blocks) genericDrawer = try $ do - name <- map toUpper <$> drawerStart + name <- T.toUpper <$> drawerStart content <- manyTill drawerLine (try drawerEnd) state <- getState -- Include drawer if it is explicitly included in or not explicitly excluded @@ -432,16 +435,16 @@ genericDrawer = try $ do Right names | name `notElem` names -> return mempty _ -> drawerDiv name <$> parseLines content where - parseLines :: PandocMonad m => [String] -> OrgParser m (F Blocks) - parseLines = parseFromString blocks . (++ "\n") . unlines + parseLines :: PandocMonad m => [Text] -> OrgParser m (F Blocks) + parseLines = parseFromString blocks . (<> "\n") . T.unlines - drawerDiv :: String -> F Blocks -> F Blocks + drawerDiv :: Text -> F Blocks -> F Blocks drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty) -drawerLine :: Monad m => OrgParser m String +drawerLine :: Monad m => OrgParser m Text drawerLine = anyLine -drawerEnd :: Monad m => OrgParser m String +drawerEnd :: Monad m => OrgParser m Text drawerEnd = try $ skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline @@ -456,17 +459,17 @@ figure :: PandocMonad m => OrgParser m (F Blocks) figure = try $ do figAttrs <- blockAttributes src <- skipSpaces *> selfTarget <* skipSpaces <* endOfParagraph - case cleanLinkString src of + case cleanLinkText src of Nothing -> mzero Just imgSrc -> do guard (isImageFilename imgSrc) let isFigure = isJust $ blockAttrCaption figAttrs return $ imageBlock isFigure figAttrs imgSrc where - selfTarget :: PandocMonad m => OrgParser m String + selfTarget :: PandocMonad m => OrgParser m Text selfTarget = try $ char '[' *> linkTarget <* char ']' - imageBlock :: Bool -> BlockAttributes -> String -> F Blocks + imageBlock :: Bool -> BlockAttributes -> Text -> F Blocks imageBlock isFigure figAttrs imgSrc = let figName = fromMaybe mempty $ blockAttrName figAttrs @@ -478,11 +481,11 @@ figure = try $ do in B.para . B.imageWith attr imgSrc figTitle <$> figCaption - withFigPrefix :: String -> String + withFigPrefix :: Text -> Text withFigPrefix cs = - if "fig:" `isPrefixOf` cs + if "fig:" `T.isPrefixOf` cs then cs - else "fig:" ++ cs + else "fig:" <> cs -- | Succeeds if looking at the end of the current paragraph endOfParagraph :: Monad m => OrgParser m () @@ -495,12 +498,12 @@ endOfParagraph = try $ skipSpaces *> newline *> endOfBlock -- | Example code marked up by a leading colon. example :: Monad m => OrgParser m (F Blocks) -example = try $ returnF . exampleCode =<< unlines <$> many1 exampleLine +example = try $ returnF . exampleCode =<< T.unlines <$> many1 exampleLine where - exampleLine :: Monad m => OrgParser m String + exampleLine :: Monad m => OrgParser m Text exampleLine = try $ exampleLineStart *> anyLine -exampleCode :: String -> Blocks +exampleCode :: Text -> Blocks exampleCode = B.codeBlockWith ("", ["example"], []) @@ -516,7 +519,7 @@ include :: PandocMonad m => OrgParser m (F Blocks) include = try $ do metaLineStart <* stringAnyCase "include:" <* skipSpaces filename <- includeTarget - includeArgs <- many (try $ skipSpaces *> many1 alphaNum) + includeArgs <- many (try $ skipSpaces *> many1Char alphaNum) params <- keyValues blocksParser <- case includeArgs of ("example" : _) -> return $ pure . B.codeBlock <$> parseRaw @@ -535,10 +538,10 @@ include = try $ do char '"' manyTill (noneOf "\n\r\t") (char '"') - parseRaw :: PandocMonad m => OrgParser m String - parseRaw = many anyChar + parseRaw :: PandocMonad m => OrgParser m Text + parseRaw = manyChar anyChar - blockFilter :: [(String, String)] -> [Block] -> [Block] + blockFilter :: [(Text, Text)] -> [Block] -> [Block] blockFilter params blks = let minlvl = lookup "minlevel" params in case (minlvl >>= safeRead :: Maybe Int) of @@ -660,7 +663,7 @@ columnPropertyCell = emptyCell <|> propCell <?> "alignment info" <$> (skipSpaces *> char '<' *> optionMaybe tableAlignFromChar) - <*> (optionMaybe (many1 digit >>= safeRead) + <*> (optionMaybe (many1Char digit >>= safeRead) <* char '>' <* emptyCell) @@ -739,10 +742,10 @@ latexFragment = try $ do , "\\end{", e, "}\n" ] -latexEnd :: Monad m => String -> OrgParser m () +latexEnd :: Monad m => Text -> OrgParser m () latexEnd envName = try $ () <$ skipSpaces - <* string ("\\end{" ++ envName ++ "}") + <* textStr ("\\end{" <> envName <> "}") <* blankline @@ -813,12 +816,12 @@ definitionListItem :: PandocMonad m -> OrgParser m (F (Inlines, [Blocks])) definitionListItem parseIndentedMarker = try $ do markerLength <- parseIndentedMarker - term <- manyTill (noneOf "\n\r") (try definitionMarker) + term <- manyTillChar (noneOf "\n\r") (try definitionMarker) line1 <- anyLineNewline blank <- option "" ("\n" <$ blankline) - cont <- concat <$> many (listContinuation markerLength) + cont <- T.concat <$> many (listContinuation markerLength) term' <- parseFromString inlines term - contents' <- parseFromString blocks $ line1 ++ blank ++ cont + contents' <- parseFromString blocks $ line1 <> blank <> cont return $ (,) <$> term' <*> fmap (:[]) contents' where definitionMarker = @@ -832,16 +835,16 @@ listItem parseIndentedMarker = try . withContext ListItemState $ do markerLength <- try parseIndentedMarker firstLine <- anyLineNewline blank <- option "" ("\n" <$ blankline) - rest <- concat <$> many (listContinuation markerLength) - parseFromString blocks $ firstLine ++ blank ++ rest + rest <- T.concat <$> many (listContinuation markerLength) + parseFromString blocks $ firstLine <> blank <> rest -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. -listContinuation :: PandocMonad m => Int -> OrgParser m String +listContinuation :: PandocMonad m => Int -> OrgParser m Text listContinuation markerLength = try $ do notFollowedBy' blankline - mappend <$> (concat <$> many1 (listContinuation' markerLength)) - <*> many blankline + mappend <$> (T.concat <$> many1 (listContinuation' markerLength)) + <*> manyChar blankline where listContinuation' indentation = blockLines indentation <|> listLine indentation @@ -853,6 +856,6 @@ listContinuation markerLength = try $ do >> blockAttributes >>= (\blockAttrs -> case attrFromBlockAttributes blockAttrs of - ("", [], []) -> count 1 anyChar + ("", [], []) -> countChar 1 anyChar _ -> indentWith indentation)) >> (snd <$> withRaw orgBlock) diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs index c96087be7..09a501b68 100644 --- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs +++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs @@ -1,5 +1,6 @@ -{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.DocumentTree Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -17,9 +18,9 @@ module Text.Pandoc.Readers.Org.DocumentTree import Prelude import Control.Arrow ((***)) import Control.Monad (guard, void) -import Data.Char (toLower, toUpper) import Data.List (intersperse) import Data.Maybe (mapMaybe) +import Data.Text (Text) import Text.Pandoc.Builder (Blocks, Inlines) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition @@ -28,6 +29,7 @@ import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing import qualified Data.Set as Set +import qualified Data.Text as T import qualified Text.Pandoc.Builder as B -- @@ -59,28 +61,28 @@ documentTree blocks inline = do } -- | Create a tag containing the given string. -toTag :: String -> Tag +toTag :: Text -> Tag toTag = Tag -- | The key (also called name or type) of a property. -newtype PropertyKey = PropertyKey { fromKey :: String } +newtype PropertyKey = PropertyKey { fromKey :: Text } deriving (Show, Eq, Ord) -- | Create a property key containing the given string. Org mode keys are -- case insensitive and are hence converted to lower case. -toPropertyKey :: String -> PropertyKey -toPropertyKey = PropertyKey . map toLower +toPropertyKey :: Text -> PropertyKey +toPropertyKey = PropertyKey . T.toLower -- | The value assigned to a property. -newtype PropertyValue = PropertyValue { fromValue :: String } +newtype PropertyValue = PropertyValue { fromValue :: Text } -- | Create a property value containing the given string. -toPropertyValue :: String -> PropertyValue +toPropertyValue :: Text -> PropertyValue toPropertyValue = PropertyValue -- | Check whether the property value is non-nil (i.e. truish). isNonNil :: PropertyValue -> Bool -isNonNil p = map toLower (fromValue p) `notElem` ["()", "{}", "nil"] +isNonNil p = T.toLower (fromValue p) `notElem` ["()", "{}", "nil"] -- | Key/value pairs from a PROPERTIES drawer type Properties = [(PropertyKey, PropertyValue)] @@ -273,7 +275,7 @@ headlineToHeader hdln = do todoKeyword :: Monad m => OrgParser m TodoMarker todoKeyword = try $ do taskStates <- activeTodoMarkers <$> getState - let kwParser tdm = try (tdm <$ string (todoMarkerName tdm) + let kwParser tdm = try (tdm <$ textStr (todoMarkerName tdm) <* spaceChar <* updateLastPreCharPos) choice (map kwParser taskStates) @@ -281,26 +283,26 @@ todoKeyword = try $ do todoKeywordToInlines :: TodoMarker -> Inlines todoKeywordToInlines tdm = let todoText = todoMarkerName tdm - todoState = map toLower . show $ todoMarkerState tdm + todoState = T.toLower . T.pack . show $ todoMarkerState tdm classes = [todoState, todoText] in B.spanWith (mempty, classes, mempty) (B.str todoText) propertiesToAttr :: Properties -> Attr propertiesToAttr properties = let - toStringPair = fromKey *** fromValue + toTextPair = fromKey *** fromValue customIdKey = toPropertyKey "custom_id" classKey = toPropertyKey "class" unnumberedKey = toPropertyKey "unnumbered" specialProperties = [customIdKey, classKey, unnumberedKey] id' = maybe mempty fromValue . lookup customIdKey $ properties cls = maybe mempty fromValue . lookup classKey $ properties - kvs' = map toStringPair . filter ((`notElem` specialProperties) . fst) + kvs' = map toTextPair . filter ((`notElem` specialProperties) . fst) $ properties isUnnumbered = maybe False isNonNil . lookup unnumberedKey $ properties in - (id', words cls ++ ["unnumbered" | isUnnumbered], kvs') + (id', T.words cls ++ ["unnumbered" | isUnnumbered], kvs') tagsToInlines :: [Tag] -> Inlines tagsToInlines [] = mempty @@ -336,15 +338,15 @@ planningToBlock planning = do <> B.emph (B.str time) -- | An Org timestamp, including repetition marks. TODO: improve -type Timestamp = String +type Timestamp = Text timestamp :: Monad m => OrgParser m Timestamp timestamp = try $ do openChar <- oneOf "<[" let isActive = openChar == '<' let closeChar = if isActive then '>' else ']' - content <- many1Till anyChar (char closeChar) - return (openChar : content ++ [closeChar]) + content <- many1TillChar anyChar (char closeChar) + return $ T.cons openChar $ content `T.snoc` closeChar -- | Planning information for a subtree/headline. data PlanningInfo = PlanningInfo @@ -374,7 +376,7 @@ planningInfo = try $ do propertiesDrawer :: Monad m => OrgParser m Properties propertiesDrawer = try $ do drawerType <- drawerStart - guard $ map toUpper drawerType == "PROPERTIES" + guard $ T.toUpper drawerType == "PROPERTIES" manyTill property (try endOfDrawer) where property :: Monad m => OrgParser m (PropertyKey, PropertyValue) @@ -382,12 +384,12 @@ propertiesDrawer = try $ do key :: Monad m => OrgParser m PropertyKey key = fmap toPropertyKey . try $ - skipSpaces *> char ':' *> many1Till nonspaceChar (char ':') + skipSpaces *> char ':' *> many1TillChar nonspaceChar (char ':') value :: Monad m => OrgParser m PropertyValue value = fmap toPropertyValue . try $ - skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline) + skipSpaces *> manyTillChar anyChar (try $ skipSpaces *> newline) - endOfDrawer :: Monad m => OrgParser m String + endOfDrawer :: Monad m => OrgParser m Text endOfDrawer = try $ skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs index f783eaa0f..f1f089273 100644 --- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs +++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.ExportSettings Copyright : © 2016–2019 Albert Krewinkel @@ -21,6 +22,7 @@ import Text.Pandoc.Readers.Org.Parsing import Control.Monad (mzero, void) import Data.Char (toLower) import Data.Maybe (listToMaybe) +import Data.Text (Text) -- | Read and handle space separated org-mode export settings. exportSettings :: PandocMonad m => OrgParser m () @@ -70,11 +72,11 @@ exportSetting = choice genericExportSetting :: Monad m => OrgParser m a - -> String + -> Text -> ExportSettingSetter a -> OrgParser m () genericExportSetting optionParser settingIdentifier setter = try $ do - _ <- string settingIdentifier *> char ':' + _ <- textStr settingIdentifier *> char ':' value <- optionParser updateState $ modifyExportSettings value where @@ -82,11 +84,11 @@ genericExportSetting optionParser settingIdentifier setter = try $ do st { orgStateExportSettings = setter val . orgStateExportSettings $ st } -- | A boolean option, either nil (False) or non-nil (True). -booleanSetting :: Monad m => String -> ExportSettingSetter Bool -> OrgParser m () +booleanSetting :: Monad m => Text -> ExportSettingSetter Bool -> OrgParser m () booleanSetting = genericExportSetting elispBoolean -- | An integer-valued option. -integerSetting :: Monad m => String -> ExportSettingSetter Int -> OrgParser m () +integerSetting :: Monad m => Text -> ExportSettingSetter Int -> OrgParser m () integerSetting = genericExportSetting parseInt where parseInt = try $ @@ -95,7 +97,7 @@ integerSetting = genericExportSetting parseInt -- | Either the string "headline" or an elisp boolean and treated as an -- @ArchivedTreesOption@. archivedTreeSetting :: Monad m - => String + => Text -> ExportSettingSetter ArchivedTreesOption -> OrgParser m () archivedTreeSetting = @@ -115,42 +117,42 @@ archivedTreeSetting = -- | A list or a complement list (i.e. a list starting with `not`). complementableListSetting :: Monad m - => String - -> ExportSettingSetter (Either [String] [String]) + => Text + -> ExportSettingSetter (Either [Text] [Text]) -> OrgParser m () complementableListSetting = genericExportSetting $ choice - [ Left <$> complementStringList + [ Left <$> complementTextList , Right <$> stringList , (\b -> if b then Left [] else Right []) <$> elispBoolean ] where -- Read a plain list of strings. - stringList :: Monad m => OrgParser m [String] + stringList :: Monad m => OrgParser m [Text] stringList = try $ char '(' - *> sepBy elispString spaces + *> sepBy elispText spaces <* char ')' -- Read an emacs lisp list specifying a complement set. - complementStringList :: Monad m => OrgParser m [String] - complementStringList = try $ + complementTextList :: Monad m => OrgParser m [Text] + complementTextList = try $ string "(not " - *> sepBy elispString spaces + *> sepBy elispText spaces <* char ')' - elispString :: Monad m => OrgParser m String - elispString = try $ + elispText :: Monad m => OrgParser m Text + elispText = try $ char '"' - *> manyTill alphaNum (char '"') + *> manyTillChar alphaNum (char '"') -- | Read but ignore the export setting. -ignoredSetting :: Monad m => String -> OrgParser m () -ignoredSetting s = try (() <$ string s <* char ':' <* many1 nonspaceChar) +ignoredSetting :: Monad m => Text -> OrgParser m () +ignoredSetting s = try (() <$ textStr s <* char ':' <* many1 nonspaceChar) -- | Read any setting string, but ignore it and emit a warning. ignoreAndWarn :: PandocMonad m => OrgParser m () ignoreAndWarn = try $ do - opt <- many1 nonspaceChar + opt <- many1Char nonspaceChar report (UnknownOrgExportOption opt) return () diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index cae590c5f..da638f717 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -20,7 +20,7 @@ import Prelude import Text.Pandoc.Readers.Org.BlockStarts (endOfBlock, noteMarker) import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing -import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename, +import Text.Pandoc.Readers.Org.Shared (cleanLinkText, isImageFilename, originalLang, translateLang, exportsCode) import Text.Pandoc.Builder (Inlines) @@ -38,12 +38,14 @@ import Control.Monad.Trans (lift) import Data.Char (isAlphaNum, isSpace) import Data.List (intersperse) import qualified Data.Map as M +import Data.Text (Text) +import qualified Data.Text as T import Data.Maybe (fromMaybe) -- -- Functions acting on the parser state -- -recordAnchorId :: PandocMonad m => String -> OrgParser m () +recordAnchorId :: PandocMonad m => Text -> OrgParser m () recordAnchorId i = updateState $ \s -> s{ orgStateAnchorIds = i : orgStateAnchorIds s } @@ -127,7 +129,7 @@ linebreak :: PandocMonad m => OrgParser m (F Inlines) linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline str :: PandocMonad m => OrgParser m (F Inlines) -str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") +str = return . B.str <$> many1Char (noneOf $ specialChars ++ "\n\r ") <* updateLastStrPos -- | An endline character that can be treated as a space, not a structural @@ -321,7 +323,7 @@ linkLikeOrgRefCite = try $ do -- | Read a citation key. The characters allowed in citation keys are taken -- from the `org-ref-cite-re` variable in `org-ref.el`. -orgRefCiteKey :: PandocMonad m => OrgParser m String +orgRefCiteKey :: PandocMonad m => OrgParser m Text orgRefCiteKey = let citeKeySpecialChars = "-_:\\./," :: String isCiteKeySpecialChar c = c `elem` citeKeySpecialChars @@ -329,7 +331,7 @@ orgRefCiteKey = endOfCitation = try $ do many $ satisfy isCiteKeySpecialChar satisfy $ not . isCiteKeyChar - in try $ satisfy isCiteKeyChar `many1Till` lookAhead endOfCitation + in try $ satisfy isCiteKeyChar `many1TillChar` lookAhead endOfCitation -- | Supported citation types. Only a small subset of org-ref types is @@ -384,11 +386,11 @@ footnote = try $ inlineNote <|> referencedNote inlineNote :: PandocMonad m => OrgParser m (F Inlines) inlineNote = try $ do string "[fn:" - ref <- many alphaNum + ref <- manyChar alphaNum char ':' note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']') - unless (null ref) $ - addToNotesTable ("fn:" ++ ref, note) + unless (T.null ref) $ + addToNotesTable ("fn:" <> ref, note) return $ B.note <$> note referencedNote :: PandocMonad m => OrgParser m (F Inlines) @@ -397,7 +399,7 @@ referencedNote = try $ do return $ do notes <- asksF orgStateNotes' case lookup ref notes of - Nothing -> return . B.str $ "[" ++ ref ++ "]" + Nothing -> return . B.str $ "[" <> ref <> "]" Just contents -> do st <- askF let contents' = runF contents st{ orgStateNotes' = [] } @@ -420,7 +422,7 @@ explicitOrImageLink = try $ do return $ do src <- srcF title <- titleF - case cleanLinkString descr of + case cleanLinkText descr of Just imgSrc | isImageFilename imgSrc -> return . B.link src "" $ B.image imgSrc mempty mempty _ -> @@ -429,10 +431,10 @@ explicitOrImageLink = try $ do selflinkOrImage :: PandocMonad m => OrgParser m (F Inlines) selflinkOrImage = try $ do target <- char '[' *> linkTarget <* char ']' - case cleanLinkString target of - Nothing -> case target of - '#':_ -> returnF $ B.link target "" (B.str target) - _ -> return $ internalLink target (B.str target) + case cleanLinkText target of + Nothing -> case T.uncons target of + Just ('#', _) -> returnF $ B.link target "" (B.str target) + _ -> return $ internalLink target (B.str target) Just nonDocTgt -> if isImageFilename nonDocTgt then returnF $ B.image nonDocTgt "" "" else returnF $ B.link nonDocTgt "" (B.str target) @@ -449,35 +451,35 @@ angleLink = try $ do char '>' return link -linkTarget :: PandocMonad m => OrgParser m String -linkTarget = enclosedByPair1 '[' ']' (noneOf "\n\r[]") +linkTarget :: PandocMonad m => OrgParser m Text +linkTarget = T.pack <$> enclosedByPair1 '[' ']' (noneOf "\n\r[]") -possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m String +possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m Text possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]") -applyCustomLinkFormat :: String -> OrgParser m (F String) +applyCustomLinkFormat :: Text -> OrgParser m (F Text) applyCustomLinkFormat link = do - let (linkType, rest) = break (== ':') link + let (linkType, rest) = T.break (== ':') link return $ do formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters - return $ maybe link ($ drop 1 rest) formatter + return $ maybe link ($ T.drop 1 rest) formatter -- | Take a link and return a function which produces new inlines when given -- description inlines. -linkToInlinesF :: String -> Inlines -> F Inlines +linkToInlinesF :: Text -> Inlines -> F Inlines linkToInlinesF linkStr = - case linkStr of - "" -> pure . B.link mempty "" -- wiki link (empty by convention) - ('#':_) -> pure . B.link linkStr "" -- document-local fraction - _ -> case cleanLinkString linkStr of - Just extTgt -> return . B.link extTgt "" - Nothing -> internalLink linkStr -- other internal link - -internalLink :: String -> Inlines -> F Inlines + case T.uncons linkStr of + Nothing -> pure . B.link mempty "" -- wiki link (empty by convention) + Just ('#', _) -> pure . B.link linkStr "" -- document-local fraction + _ -> case cleanLinkText linkStr of + Just extTgt -> return . B.link extTgt "" + Nothing -> internalLink linkStr -- other internal link + +internalLink :: Text -> Inlines -> F Inlines internalLink link title = do anchorB <- (link `elem`) <$> asksF orgStateAnchorIds if anchorB - then return $ B.link ('#':link) "" title + then return $ B.link ("#" <> link) "" title else return $ B.emph title -- | Parse an anchor like @<<anchor-id>>@ and return an empty span with @@ -493,15 +495,15 @@ anchor = try $ do returnF $ B.spanWith (solidify anchorId, [], []) mempty where parseAnchor = string "<<" - *> many1 (noneOf "\t\n\r<>\"' ") + *> many1Char (noneOf "\t\n\r<>\"' ") <* string ">>" <* skipSpaces -- | Replace every char but [a-zA-Z0-9_.-:] with a hyphen '-'. This mirrors -- the org function @org-export-solidify-link-text@. -solidify :: String -> String -solidify = map replaceSpecialChar +solidify :: Text -> Text +solidify = T.map replaceSpecialChar where replaceSpecialChar c | isAlphaNum c = c | c `elem` ("_.-:" :: String) = c @@ -511,25 +513,25 @@ solidify = map replaceSpecialChar inlineCodeBlock :: PandocMonad m => OrgParser m (F Inlines) inlineCodeBlock = try $ do string "src_" - lang <- many1 orgArgWordChar + lang <- many1Char orgArgWordChar opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption - inlineCode <- enclosedByPair1 '{' '}' (noneOf "\n\r") + inlineCode <- T.pack <$> enclosedByPair1 '{' '}' (noneOf "\n\r") let attrClasses = [translateLang lang] let attrKeyVal = originalLang lang <> opts let codeInlineBlck = B.codeWith ("", attrClasses, attrKeyVal) inlineCode returnF $ if exportsCode opts then codeInlineBlck else mempty where - inlineBlockOption :: PandocMonad m => OrgParser m (String, String) + inlineBlockOption :: PandocMonad m => OrgParser m (Text, Text) inlineBlockOption = try $ do argKey <- orgArgKey paramValue <- option "yes" orgInlineParamValue return (argKey, paramValue) - orgInlineParamValue :: PandocMonad m => OrgParser m String + orgInlineParamValue :: PandocMonad m => OrgParser m Text orgInlineParamValue = try $ skipSpaces *> notFollowedBy (char ':') - *> many1 (noneOf "\t\n\r ]") + *> many1Char (noneOf "\t\n\r ]") <* skipSpaces @@ -584,7 +586,7 @@ superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr) math :: PandocMonad m => OrgParser m (F Inlines) math = return . B.math <$> choice [ math1CharBetween '$' - , mathStringBetween '$' + , mathTextBetween '$' , rawMathBetween "\\(" "\\)" ] @@ -604,7 +606,7 @@ updatePositions c = do return c symbol :: PandocMonad m => OrgParser m (F Inlines) -symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions) +symbol = return . B.str . T.singleton <$> (oneOf specialChars >>= updatePositions) emphasisBetween :: PandocMonad m => Char @@ -619,7 +621,7 @@ emphasisBetween c = try $ do verbatimBetween :: PandocMonad m => Char - -> OrgParser m String + -> OrgParser m Text verbatimBetween c = try $ emphasisStart c *> many1TillNOrLessNewlines 1 verbatimChar (emphasisEnd c) @@ -627,33 +629,33 @@ verbatimBetween c = try $ verbatimChar = noneOf "\n\r" >>= updatePositions -- | Parses a raw string delimited by @c@ using Org's math rules -mathStringBetween :: PandocMonad m +mathTextBetween :: PandocMonad m => Char - -> OrgParser m String -mathStringBetween c = try $ do + -> OrgParser m Text +mathTextBetween c = try $ do mathStart c body <- many1TillNOrLessNewlines mathAllowedNewlines (noneOf (c:"\n\r")) (lookAhead $ mathEnd c) final <- mathEnd c - return $ body ++ [final] + return $ T.snoc body final -- | Parse a single character between @c@ using math rules math1CharBetween :: PandocMonad m => Char - -> OrgParser m String + -> OrgParser m Text math1CharBetween c = try $ do char c res <- noneOf $ c:mathForbiddenBorderChars char c eof <|> () <$ lookAhead (oneOf mathPostChars) - return [res] + return $ T.singleton res rawMathBetween :: PandocMonad m - => String - -> String - -> OrgParser m String -rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e) + => Text + -> Text + -> OrgParser m Text +rawMathBetween s e = try $ textStr s *> manyTillChar anyChar (try $ textStr e) -- | Parses the start (opening character) of emphasis emphasisStart :: PandocMonad m => Char -> OrgParser m Char @@ -702,10 +704,10 @@ enclosedInlines start end = try $ enclosedRaw :: (PandocMonad m, Show b) => OrgParser m a -> OrgParser m b - -> OrgParser m String + -> OrgParser m Text enclosedRaw start end = try $ start *> (onSingleLine <|> spanningTwoLines) - where onSingleLine = try $ many1Till (noneOf "\n\r") end + where onSingleLine = try $ many1TillChar (noneOf "\n\r") end spanningTwoLines = try $ anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine @@ -714,7 +716,7 @@ enclosedRaw start end = try $ many1TillNOrLessNewlines :: PandocMonad m => Int -> OrgParser m Char -> OrgParser m a - -> OrgParser m String + -> OrgParser m Text many1TillNOrLessNewlines n p end = try $ nMoreLines (Just n) mempty >>= oneOrMore where @@ -726,7 +728,7 @@ many1TillNOrLessNewlines n p end = try $ rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline) finalLine = try $ manyTill p end minus1 k = k - 1 - oneOrMore cs = cs <$ guard (not $ null cs) + oneOrMore cs = T.pack cs <$ guard (not $ null cs) -- Org allows customization of the way it reads emphasis. We use the defaults -- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components` @@ -773,17 +775,17 @@ subOrSuperExpr :: PandocMonad m => OrgParser m (F Inlines) subOrSuperExpr = try $ choice [ charsInBalanced '{' '}' (noneOf "\n\r") , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r") - , simpleSubOrSuperString + , simpleSubOrSuperText ] >>= parseFromString (mconcat <$> many inline) - where enclosing (left, right) s = left : s ++ [right] + where enclosing (left, right) s = T.cons left $ T.snoc s right -simpleSubOrSuperString :: PandocMonad m => OrgParser m String -simpleSubOrSuperString = try $ do +simpleSubOrSuperText :: PandocMonad m => OrgParser m Text +simpleSubOrSuperText = try $ do state <- getState guard . exportSubSuperscripts . orgStateExportSettings $ state - choice [ string "*" - , mappend <$> option [] ((:[]) <$> oneOf "+-") - <*> many1 alphaNum + choice [ textStr "*" + , mappend <$> option "" (T.singleton <$> oneOf "+-") + <*> many1Char alphaNum ] inlineLaTeX :: PandocMonad m => OrgParser m (F Inlines) @@ -793,28 +795,28 @@ inlineLaTeX = try $ do maybe mzero returnF $ parseAsMathMLSym cmd `mplus` parseAsMath cmd `mplus` ils where - parseAsMath :: String -> Maybe Inlines + parseAsMath :: Text -> Maybe Inlines parseAsMath cs = B.fromList <$> texMathToPandoc cs - parseAsInlineLaTeX :: PandocMonad m => String -> m (Maybe Inlines) + parseAsInlineLaTeX :: PandocMonad m => Text -> m (Maybe Inlines) parseAsInlineLaTeX cs = maybeRight <$> runParserT inlineCommand state "" cs - parseAsMathMLSym :: String -> Maybe Inlines + parseAsMathMLSym :: Text -> Maybe Inlines parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs) -- drop initial backslash and any trailing "{}" - where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1 + where clean = T.dropWhileEnd (`elem` ("{}" :: String)) . T.drop 1 state :: ParserState state = def{ stateOptions = def{ readerExtensions = enableExtension Ext_raw_tex (readerExtensions def) } } - texMathToPandoc :: String -> Maybe [Inline] + texMathToPandoc :: Text -> Maybe [Inline] texMathToPandoc cs = maybeRight (readTeX cs) >>= writePandoc DisplayInline maybeRight :: Either a b -> Maybe b maybeRight = either (const Nothing) Just -inlineLaTeXCommand :: PandocMonad m => OrgParser m String +inlineLaTeXCommand :: PandocMonad m => OrgParser m Text inlineLaTeXCommand = try $ do rest <- getInput st <- getState @@ -823,21 +825,17 @@ inlineLaTeXCommand = try $ do Right cs -> do -- drop any trailing whitespace, those are not be part of the command as -- far as org mode is concerned. - let cmdNoSpc = dropWhileEnd isSpace cs - let len = length cmdNoSpc + let cmdNoSpc = T.dropWhileEnd isSpace cs + let len = T.length cmdNoSpc count len anyChar return cmdNoSpc _ -> mzero --- Taken from Data.OldList. -dropWhileEnd :: (a -> Bool) -> [a] -> [a] -dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) [] - exportSnippet :: PandocMonad m => OrgParser m (F Inlines) exportSnippet = try $ do string "@@" - format <- many1Till (alphaNum <|> char '-') (char ':') - snippet <- manyTill anyChar (try $ string "@@") + format <- many1TillChar (alphaNum <|> char '-') (char ':') + snippet <- manyTillChar anyChar (try $ string "@@") returnF $ B.rawInline format snippet macro :: PandocMonad m => OrgParser m (F Inlines) @@ -845,7 +843,7 @@ macro = try $ do recursionDepth <- orgStateMacroDepth <$> getState guard $ recursionDepth < 15 string "{{{" - name <- many alphaNum + name <- manyChar alphaNum args <- ([] <$ string "}}}") <|> char '(' *> argument `sepBy` char ',' <* eoa expander <- lookupMacro name <$> getState @@ -857,7 +855,7 @@ macro = try $ do updateState $ \s -> s { orgStateMacroDepth = recursionDepth } return res where - argument = many $ notFollowedBy eoa *> noneOf "," + argument = manyChar $ notFollowedBy eoa *> noneOf "," eoa = string ")}}}" smart :: PandocMonad m => OrgParser m (F Inlines) diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index 0a388403e..811a5b974 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -1,6 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE TupleSections #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.Meta Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -30,11 +31,12 @@ import Text.Pandoc.Definition import Text.Pandoc.Shared (safeRead) import Control.Monad (mzero, void, when) -import Data.Char (toLower) import Data.List (intersperse) import Data.Maybe (fromMaybe) import qualified Data.Map as M import qualified Data.Set as Set +import Data.Text (Text) +import qualified Data.Text as T import Network.HTTP (urlEncode) -- | Returns the current meta, respecting export options. @@ -47,7 +49,7 @@ metaExport = do . (if exportWithEmail settings then id else removeMeta "email") <$> orgStateMeta st -removeMeta :: String -> Meta -> Meta +removeMeta :: Text -> Meta -> Meta removeMeta key meta' = let metaMap = unMeta meta' in Meta $ M.delete key metaMap @@ -60,18 +62,18 @@ metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine) declarationLine :: PandocMonad m => OrgParser m () declarationLine = try $ do - key <- map toLower <$> metaKey + key <- T.toLower <$> metaKey (key', value) <- metaValue key let addMetaValue st = st { orgStateMeta = B.setMeta key' <$> value <*> orgStateMeta st } when (key' /= "results") $ updateState addMetaValue -metaKey :: Monad m => OrgParser m String -metaKey = map toLower <$> many1 (noneOf ": \n\r") - <* char ':' - <* skipSpaces +metaKey :: Monad m => OrgParser m Text +metaKey = T.toLower <$> many1Char (noneOf ": \n\r") + <* char ':' + <* skipSpaces -metaValue :: PandocMonad m => String -> OrgParser m (String, F MetaValue) +metaValue :: PandocMonad m => Text -> OrgParser m (Text, F MetaValue) metaValue key = let inclKey = "header-includes" in case key of @@ -88,7 +90,7 @@ metaValue key = -- Org-mode expects class options to contain the surrounding brackets, -- pandoc does not. "latex_class_options" -> ("classoption",) <$> - metaModifiedString (filter (`notElem` "[]")) + metaModifiedString (T.filter (`notElem` ("[]" :: String))) "html_head" -> (inclKey,) <$> accumulatingList inclKey (metaExportSnippet "html") _ -> (key,) <$> metaString @@ -98,25 +100,25 @@ metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline metaInlinesCommaSeparated :: PandocMonad m => OrgParser m (F MetaValue) metaInlinesCommaSeparated = do - itemStrs <- many1 (noneOf ",\n") `sepBy1` char ',' + itemStrs <- many1Char (noneOf ",\n") `sepBy1` char ',' newline - items <- mapM (parseFromString inlinesTillNewline . (++ "\n")) itemStrs + items <- mapM (parseFromString inlinesTillNewline . (<> "\n")) itemStrs let toMetaInlines = MetaInlines . B.toList return $ MetaList . map toMetaInlines <$> sequence items metaString :: Monad m => OrgParser m (F MetaValue) metaString = metaModifiedString id -metaModifiedString :: Monad m => (String -> String) -> OrgParser m (F MetaValue) +metaModifiedString :: Monad m => (Text -> Text) -> OrgParser m (F MetaValue) metaModifiedString f = return . MetaString . f <$> anyLine -- | Read an format specific meta definition -metaExportSnippet :: Monad m => String -> OrgParser m (F MetaValue) +metaExportSnippet :: Monad m => Text -> OrgParser m (F MetaValue) metaExportSnippet format = return . MetaInlines . B.toList . B.rawInline format <$> anyLine -- | Accumulate the result of the @parser@ in a list under @key@. -accumulatingList :: Monad m => String +accumulatingList :: Monad m => Text -> OrgParser m (F MetaValue) -> OrgParser m (F MetaValue) accumulatingList key p = do @@ -147,33 +149,33 @@ optionLine = try $ do "pandoc-emphasis-post" -> emphChars >>= updateState . setEmphasisPostChar _ -> mzero -addLinkFormat :: Monad m => String - -> (String -> String) +addLinkFormat :: Monad m => Text + -> (Text -> Text) -> OrgParser m () addLinkFormat key formatter = updateState $ \s -> let fs = orgStateLinkFormatters s in s{ orgStateLinkFormatters = M.insert key formatter fs } -parseLinkFormat :: Monad m => OrgParser m (String, String -> String) +parseLinkFormat :: Monad m => OrgParser m (Text, Text -> Text) parseLinkFormat = try $ do - linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces + linkType <- T.cons <$> letter <*> manyChar (alphaNum <|> oneOf "-_") <* skipSpaces linkSubst <- parseFormat return (linkType, linkSubst) -- | An ad-hoc, single-argument-only implementation of a printf-style format -- parser. -parseFormat :: Monad m => OrgParser m (String -> String) +parseFormat :: Monad m => OrgParser m (Text -> Text) parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend where -- inefficient, but who cares - replacePlain = try $ (\x -> concat . flip intersperse x) + replacePlain = try $ (\x -> T.concat . flip intersperse x) <$> sequence [tillSpecifier 's', rest] - replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode) + replaceUrl = try $ (\x -> T.concat . flip intersperse x . T.pack . urlEncode . T.unpack) <$> sequence [tillSpecifier 'h', rest] - justAppend = try $ (++) <$> rest + justAppend = try $ (<>) <$> rest - rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r") - tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:"")) + rest = manyTillChar anyChar (eof <|> () <$ oneOf "\n\r") + tillSpecifier c = manyTillChar (noneOf "\n\r") (try $ string ('%':c:"")) tagList :: Monad m => OrgParser m [Tag] tagList = do @@ -231,41 +233,41 @@ todoSequence = try $ do (x:xs) -> return $ keywordsToSequence (reverse xs) [x] where - todoKeywords :: Monad m => OrgParser m [String] + todoKeywords :: Monad m => OrgParser m [Text] todoKeywords = try $ - let keyword = many1 nonspaceChar <* skipSpaces + let keyword = many1Char nonspaceChar <* skipSpaces endOfKeywords = todoDoneSep <|> void newline in manyTill keyword (lookAhead endOfKeywords) todoDoneSep :: Monad m => OrgParser m () todoDoneSep = void . try $ skipSpaces *> char '|' <* skipSpaces1 - keywordsToSequence :: [String] -> [String] -> TodoSequence + keywordsToSequence :: [Text] -> [Text] -> TodoSequence keywordsToSequence todo done = let todoMarkers = map (TodoMarker Todo) todo doneMarkers = map (TodoMarker Done) done in todoMarkers ++ doneMarkers -macroDefinition :: Monad m => OrgParser m (String, [String] -> String) +macroDefinition :: Monad m => OrgParser m (Text, [Text] -> Text) macroDefinition = try $ do - macroName <- many1 nonspaceChar <* skipSpaces + macroName <- many1Char nonspaceChar <* skipSpaces firstPart <- expansionPart (elemOrder, parts) <- unzip <$> many ((,) <$> placeholder <*> expansionPart) let expander = mconcat . alternate (firstPart:parts) . reorder elemOrder return (macroName, expander) where placeholder :: Monad m => OrgParser m Int - placeholder = try . fmap (fromMaybe 1 . safeRead) $ char '$' *> many1 digit + placeholder = try . fmap (fromMaybe 1 . safeRead) $ char '$' *> many1Char digit - expansionPart :: Monad m => OrgParser m String - expansionPart = try $ many (notFollowedBy placeholder *> noneOf "\n\r") + expansionPart :: Monad m => OrgParser m Text + expansionPart = try $ manyChar (notFollowedBy placeholder *> noneOf "\n\r") alternate :: [a] -> [a] -> [a] alternate [] ys = ys alternate xs [] = xs alternate (x:xs) (y:ys) = x : y : alternate xs ys - reorder :: [Int] -> [String] -> [String] + reorder :: [Int] -> [Text] -> [Text] reorder perm xs = let element n = take 1 $ drop (n - 1) xs in concatMap element perm diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index d6dde8b22..cf5583b76 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -1,6 +1,7 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.ParserState Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -63,16 +64,16 @@ import Text.Pandoc.Readers.LaTeX.Types (Macro) type F = Future OrgParserState -- | An inline note / footnote containing the note key and its (inline) value. -type OrgNoteRecord = (String, F Blocks) +type OrgNoteRecord = (Text, F Blocks) -- | Table of footnotes type OrgNoteTable = [OrgNoteRecord] -- | Map of functions for link transformations. The map key is refers to the -- link-type, the corresponding function transforms the given link string. -type OrgLinkFormatters = M.Map String (String -> String) +type OrgLinkFormatters = M.Map Text (Text -> Text) -- | Macro expander function -type MacroExpander = [String] -> String +type MacroExpander = [Text] -> Text -- | Tag -newtype Tag = Tag { fromTag :: String } +newtype Tag = Tag { fromTag :: Text } deriving (Show, Eq, Ord) -- | The states in which a todo item can be @@ -82,7 +83,7 @@ data TodoState = Todo | Done -- | A ToDo keyword like @TODO@ or @DONE@. data TodoMarker = TodoMarker { todoMarkerState :: TodoState - , todoMarkerName :: String + , todoMarkerName :: Text } deriving (Show, Eq) @@ -91,7 +92,7 @@ type TodoSequence = [TodoMarker] -- | Org-mode parser state data OrgParserState = OrgParserState - { orgStateAnchorIds :: [String] + { orgStateAnchorIds :: [Text] , orgStateEmphasisCharStack :: [Char] , orgStateEmphasisPreChars :: [Char] -- ^ Chars allowed to occur before -- emphasis; spaces and newlines are @@ -102,13 +103,13 @@ data OrgParserState = OrgParserState , orgStateExcludeTags :: Set.Set Tag , orgStateExcludeTagsChanged :: Bool , orgStateExportSettings :: ExportSettings - , orgStateIdentifiers :: Set.Set String - , orgStateIncludeFiles :: [String] + , orgStateIdentifiers :: Set.Set Text + , orgStateIncludeFiles :: [Text] , orgStateLastForbiddenCharPos :: Maybe SourcePos , orgStateLastPreCharPos :: Maybe SourcePos , orgStateLastStrPos :: Maybe SourcePos , orgStateLinkFormatters :: OrgLinkFormatters - , orgStateMacros :: M.Map String MacroExpander + , orgStateMacros :: M.Map Text MacroExpander , orgStateMacroDepth :: Int , orgStateMeta :: F Meta , orgStateNotes' :: OrgNoteTable @@ -212,10 +213,10 @@ activeTodoSequences st = activeTodoMarkers :: OrgParserState -> TodoSequence activeTodoMarkers = concat . activeTodoSequences -lookupMacro :: String -> OrgParserState -> Maybe MacroExpander +lookupMacro :: Text -> OrgParserState -> Maybe MacroExpander lookupMacro macroName = M.lookup macroName . orgStateMacros -registerMacro :: (String, MacroExpander) -> OrgParserState -> OrgParserState +registerMacro :: (Text, MacroExpander) -> OrgParserState -> OrgParserState registerMacro (name, expander) st = let curMacros = orgStateMacros st in st{ orgStateMacros = M.insert name expander curMacros } @@ -236,7 +237,7 @@ data ArchivedTreesOption = -- These settings can be changed via OPTIONS statements. data ExportSettings = ExportSettings { exportArchivedTrees :: ArchivedTreesOption -- ^ How to treat archived trees - , exportDrawers :: Either [String] [String] + , exportDrawers :: Either [Text] [Text] -- ^ Specify drawer names which should be exported. @Left@ names are -- explicitly excluded from the resulting output while @Right@ means that -- only the listed drawer names should be included. diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index 24aa0779d..718925120 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -32,7 +32,13 @@ module Text.Pandoc.Readers.Org.Parsing , orgTagWordChar -- * Re-exports from Text.Pandoc.Parser , ParserContext (..) + , textStr + , countChar + , manyChar + , many1Char + , manyTillChar , many1Till + , many1TillChar , notFollowedBy' , spaceChar , nonspaceChar @@ -98,6 +104,7 @@ module Text.Pandoc.Readers.Org.Parsing ) where import Prelude +import Data.Text (Text) import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Parsing hiding (F, anyLine, blanklines, newline, @@ -108,14 +115,14 @@ import Control.Monad (guard) import Control.Monad.Reader (ReaderT) -- | The parser used to read org files. -type OrgParser m = ParserT [Char] OrgParserState (ReaderT OrgParserLocal m) +type OrgParser m = ParserT Text OrgParserState (ReaderT OrgParserLocal m) -- -- Adaptions and specializations of parsing utilities -- -- | Parse any line of text -anyLine :: Monad m => OrgParser m String +anyLine :: Monad m => OrgParser m Text anyLine = P.anyLine <* updateLastPreCharPos @@ -123,7 +130,7 @@ anyLine = -- | Like @'Text.Pandoc.Parsing'@, but resets the position of the last character -- allowed before emphasised text. -parseFromString :: Monad m => OrgParser m a -> String -> OrgParser m a +parseFromString :: Monad m => OrgParser m a -> Text -> OrgParser m a parseFromString parser str' = do updateState $ \s -> s{ orgStateLastPreCharPos = Nothing } result <- P.parseFromString parser str' @@ -142,7 +149,7 @@ newline = <* updateLastForbiddenCharPos -- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes. -blanklines :: Monad m => OrgParser m [Char] +blanklines :: Monad m => OrgParser m Text blanklines = P.blanklines <* updateLastPreCharPos @@ -192,21 +199,21 @@ updateLastPreCharPos = getPosition >>= \p -> -- -- | Read the key of a plist style key-value list. -orgArgKey :: Monad m => OrgParser m String +orgArgKey :: Monad m => OrgParser m Text orgArgKey = try $ skipSpaces *> char ':' - *> many1 orgArgWordChar + *> many1Char orgArgWordChar -- | Read the value of a plist style key-value list. -orgArgWord :: Monad m => OrgParser m String -orgArgWord = many1 orgArgWordChar +orgArgWord :: Monad m => OrgParser m Text +orgArgWord = many1Char orgArgWordChar -- | Chars treated as part of a word in plists. orgArgWordChar :: Monad m => OrgParser m Char orgArgWordChar = alphaNum <|> oneOf "-_" -orgTagWord :: Monad m => OrgParser m String -orgTagWord = many1 orgTagWordChar +orgTagWord :: Monad m => OrgParser m Text +orgTagWord = many1Char orgTagWordChar orgTagWordChar :: Monad m => OrgParser m Char orgTagWordChar = alphaNum <|> oneOf "@%#_" diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs index 34f958373..be0a2068e 100644 --- a/src/Text/Pandoc/Readers/Org/Shared.hs +++ b/src/Text/Pandoc/Readers/Org/Shared.hs @@ -10,7 +10,7 @@ Utility functions used in other Pandoc Org modules. -} module Text.Pandoc.Readers.Org.Shared - ( cleanLinkString + ( cleanLinkText , isImageFilename , originalLang , translateLang @@ -19,44 +19,44 @@ module Text.Pandoc.Readers.Org.Shared import Prelude import Data.Char (isAlphaNum) -import Data.List (isPrefixOf) +import Data.Text (Text) +import qualified Data.Text as T import System.FilePath (isValid, takeExtension) - +import Text.Pandoc.Shared (elemText) -- | Check whether the given string looks like the path to of URL of an image. -isImageFilename :: String -> Bool -isImageFilename fp = hasImageExtension && (isValid fp || isKnownProtocolUri) +isImageFilename :: Text -> Bool +isImageFilename fp = hasImageExtension && (isValid (T.unpack fp) || isKnownProtocolUri) where - hasImageExtension = takeExtension fp `elem` imageExtensions - isKnownProtocolUri = any (\x -> (x ++ "://") `isPrefixOf` fp) protocols + hasImageExtension = takeExtension (T.unpack fp) `elem` imageExtensions + isKnownProtocolUri = any (\x -> (x <> "://") `T.isPrefixOf` fp) protocols imageExtensions = [ ".jpeg", ".jpg", ".png", ".gif", ".svg" ] protocols = [ "file", "http", "https" ] -- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if -- the string does not appear to be a link. -cleanLinkString :: String -> Maybe String -cleanLinkString s = - case s of - '/':_ -> Just $ "file://" ++ s -- absolute path - '.':'/':_ -> Just s -- relative path - '.':'.':'/':_ -> Just s -- relative path - -- Relative path or URL (file schema) - 'f':'i':'l':'e':':':s' -> Just $ if "//" `isPrefixOf` s' then s else s' - _ -> if isUrl s then Just s else Nothing - where - isUrl :: String -> Bool - isUrl cs = - let (scheme, path) = break (== ':') cs - in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme - && not (null path) +cleanLinkText :: Text -> Maybe Text +cleanLinkText s + | Just _ <- T.stripPrefix "/" s = Just $ "file://" <> s -- absolute path + | Just _ <- T.stripPrefix "./" s = Just s -- relative path + | Just _ <- T.stripPrefix "../" s = Just s -- relative path + -- Relative path or URL (file schema) + | Just s' <- T.stripPrefix "file:" s = Just $ if "//" `T.isPrefixOf` s' then s else s' + | otherwise = if isUrl s then Just s else Nothing + where + isUrl :: Text -> Bool + isUrl cs = + let (scheme, path) = T.break (== ':') cs + in T.all (\c -> isAlphaNum c || c `elemText` ".-") scheme + && not (T.null path) -- | Creates an key-value pair marking the original language name specified for -- a piece of source code. -- | Creates an key-value attributes marking the original language name -- specified for a piece of source code. -originalLang :: String -> [(String, String)] +originalLang :: Text -> [(Text, Text)] originalLang lang = let transLang = translateLang lang in if transLang == lang @@ -66,7 +66,7 @@ originalLang lang = -- | Translate from Org-mode's programming language identifiers to those used -- by Pandoc. This is useful to allow for proper syntax highlighting in -- Pandoc output. -translateLang :: String -> String +translateLang :: Text -> Text translateLang cs = case cs of "C" -> "c" @@ -79,5 +79,5 @@ translateLang cs = "sqlite" -> "sql" _ -> cs -exportsCode :: [(String, String)] -> Bool +exportsCode :: [(Text, Text)] -> Bool exportsCode = maybe True (`elem` ["code", "both"]) . lookup "exports" diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 7e29caf28..d2fba4449 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -1,7 +1,8 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.RST Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -19,9 +20,8 @@ import Control.Arrow (second) import Control.Monad (forM_, guard, liftM, mplus, mzero, when) import Control.Monad.Except (throwError) import Control.Monad.Identity (Identity (..)) -import Data.Char (isHexDigit, isSpace, toLower, toUpper, isAlphaNum) -import Data.List (deleteFirstsBy, elemIndex, intercalate, isInfixOf, isSuffixOf, - nub, sort, transpose) +import Data.Char (isHexDigit, isSpace, toUpper, isAlphaNum) +import Data.List (deleteFirstsBy, elemIndex, nub, sort, transpose) import qualified Data.Map as M import Data.Maybe (fromMaybe) import Data.Sequence (ViewR (..), viewr) @@ -47,16 +47,16 @@ import Text.Printf (printf) -- | Parse reStructuredText string and return Pandoc document. readRST :: PandocMonad m => ReaderOptions -- ^ Reader options - -> Text -- ^ String to parse (assuming @'\n'@ line endings) + -> Text -- ^ Text to parse (assuming @'\n'@ line endings) -> m Pandoc readRST opts s = do parsed <- readWithM parseRST def{ stateOptions = opts } - (T.unpack (crFilter s) ++ "\n\n") + (crFilter s <> "\n\n") case parsed of Right result -> return result Left e -> throwError e -type RSTParser m = ParserT [Char] ParserState m +type RSTParser m = ParserT Text ParserState m -- -- Constants and data structure definitions @@ -113,7 +113,7 @@ titleTransform (bs, meta) = metaFromDefList :: [([Inline], [[Block]])] -> Meta -> Meta metaFromDefList ds meta = adjustAuthors $ foldr f meta ds - where f (k,v) = setMeta (map toLower $ stringify k) (mconcat $ map fromList v) + where f (k,v) = setMeta (T.toLower $ stringify k) (mconcat $ map fromList v) adjustAuthors (Meta metamap) = Meta $ M.adjust splitAuthors "author" $ M.adjust toPlain "date" $ M.adjust toPlain "title" @@ -136,13 +136,13 @@ metaFromDefList ds meta = adjustAuthors $ foldr f meta ds isSp LineBreak = True isSp _ = False splitOnSemi = splitBy (==Str ";") - factorSemi (Str []) = [] - factorSemi (Str s) = case break (==';') s of - (xs,[]) -> [Str xs] - (xs,';':ys) -> Str xs : Str ";" : - factorSemi (Str ys) - (xs,ys) -> Str xs : - factorSemi (Str ys) + factorSemi (Str "") = [] + factorSemi (Str s) = case T.break (==';') s of + (xs,"") -> [Str xs] + (xs,T.uncons -> Just (';',ys)) -> Str xs : Str ";" : + factorSemi (Str ys) + (xs,ys) -> Str xs : + factorSemi (Str ys) factorSemi x = [x] parseRST :: PandocMonad m => RSTParser m Pandoc @@ -151,7 +151,7 @@ parseRST = do startPos <- getPosition -- go through once just to get list of reference keys and notes -- docMinusKeys is the raw document with blanks where the keys were... - docMinusKeys <- concat <$> + docMinusKeys <- T.concat <$> manyTill (referenceKey <|> anchorDef <|> noteBlock <|> citationBlock <|> (snd <$> withRaw comment) <|> @@ -180,7 +180,7 @@ parseRST = do return $ Pandoc meta' (blocks' ++ refBlock) parseCitation :: PandocMonad m - => (String, String) -> RSTParser m (Inlines, [Blocks]) + => (Text, Text) -> RSTParser m (Inlines, [Blocks]) parseCitation (ref, raw) = do contents <- parseFromString' parseBlocks raw return (B.spanWith (ref, ["citation-label"], []) (B.str ref), @@ -215,23 +215,23 @@ block = choice [ codeBlock -- field list -- -rawFieldListItem :: Monad m => Int -> RSTParser m (String, String) +rawFieldListItem :: Monad m => Int -> RSTParser m (Text, Text) rawFieldListItem minIndent = try $ do indent <- length <$> many (char ' ') guard $ indent >= minIndent char ':' - name <- many1Till (noneOf "\n") (char ':') + name <- many1TillChar (noneOf "\n") (char ':') (() <$ lookAhead newline) <|> skipMany1 spaceChar first <- anyLine rest <- option "" $ try $ do lookAhead (count indent (char ' ') >> spaceChar) indentedBlock - let raw = (if null first then "" else (first ++ "\n")) ++ rest ++ "\n" + let raw = (if T.null first then "" else (first <> "\n")) <> rest <> "\n" return (name, raw) fieldListItem :: PandocMonad m => Int -> RSTParser m (Inlines, [Blocks]) fieldListItem minIndent = try $ do (name, raw) <- rawFieldListItem minIndent - term <- parseInlineFromString name + term <- parseInlineFromText name contents <- parseFromString' parseBlocks raw optional blanklines return (term, [contents]) @@ -251,12 +251,12 @@ fieldList = try $ do lineBlock :: PandocMonad m => RSTParser m Blocks lineBlock = try $ do lines' <- lineBlockLines - lines'' <- mapM parseInlineFromString lines' + lines'' <- mapM parseInlineFromText lines' return $ B.lineBlock lines'' -lineBlockDirective :: PandocMonad m => String -> RSTParser m Blocks +lineBlockDirective :: PandocMonad m => Text -> RSTParser m Blocks lineBlockDirective body = do - lines' <- mapM parseInlineFromString $ lines $ stripTrailingNewlines body + lines' <- mapM parseInlineFromText $ T.lines $ stripTrailingNewlines body return $ B.lineBlock lines' -- @@ -271,9 +271,9 @@ para = try $ do newline blanklines case viewr (B.unMany result) of - ys :> Str xs | "::" `isSuffixOf` xs -> do + ys :> Str xs | "::" `T.isSuffixOf` xs -> do raw <- option mempty codeBlockBody - return $ B.para (B.Many ys <> B.str (take (length xs - 1) xs)) + return $ B.para (B.Many ys <> B.str (T.take (T.length xs - 1) xs)) <> raw _ -> return (B.para result) @@ -349,7 +349,7 @@ singleHeader' = try $ do -- hrule block -- -hrule :: Monad m => ParserT [Char] st m Blocks +hrule :: Monad m => ParserT Text st m Blocks hrule = try $ do chr <- oneOf underlineChars count 3 (char chr) @@ -364,7 +364,7 @@ hrule = try $ do -- read a line indented by a given string indentedLine :: (HasReaderOptions st, Monad m) - => Int -> ParserT [Char] st m [Char] + => Int -> ParserT Text st m Text indentedLine indents = try $ do lookAhead spaceChar gobbleAtMostSpaces indents @@ -373,29 +373,29 @@ indentedLine indents = try $ do -- one or more indented lines, possibly separated by blank lines. -- any amount of indentation will work. indentedBlock :: (HasReaderOptions st, Monad m) - => ParserT [Char] st m [Char] + => ParserT Text st m Text indentedBlock = try $ do indents <- length <$> lookAhead (many1 spaceChar) lns <- many1 $ try $ do b <- option "" blanklines l <- indentedLine indents - return (b ++ l) + return (b <> l) optional blanklines - return $ unlines lns + return $ T.unlines lns -quotedBlock :: Monad m => ParserT [Char] st m [Char] +quotedBlock :: Monad m => ParserT Text st m Text quotedBlock = try $ do quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" lns <- many1 $ lookAhead (char quote) >> anyLine optional blanklines - return $ unlines lns + return $ T.unlines lns -codeBlockStart :: Monad m => ParserT [Char] st m Char +codeBlockStart :: Monad m => ParserT Text st m Char codeBlockStart = string "::" >> blankline >> blankline -codeBlock :: (HasReaderOptions st, Monad m) => ParserT [Char] st m Blocks +codeBlock :: (HasReaderOptions st, Monad m) => ParserT Text st m Blocks codeBlock = try $ codeBlockStart >> codeBlockBody -codeBlockBody :: (HasReaderOptions st, Monad m) => ParserT [Char] st m Blocks +codeBlockBody :: (HasReaderOptions st, Monad m) => ParserT Text st m Blocks codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$> (indentedBlock <|> quotedBlock) @@ -407,24 +407,24 @@ lhsCodeBlock = try $ do lns <- latexCodeBlock <|> birdCodeBlock blanklines return $ B.codeBlockWith ("", ["haskell","literate"], []) - $ intercalate "\n" lns + $ T.intercalate "\n" lns -latexCodeBlock :: Monad m => ParserT [Char] st m [[Char]] +latexCodeBlock :: Monad m => ParserT Text st m [Text] latexCodeBlock = try $ do try (latexBlockLine "\\begin{code}") many1Till anyLine (try $ latexBlockLine "\\end{code}") where latexBlockLine s = skipMany spaceChar >> string s >> blankline -birdCodeBlock :: Monad m => ParserT [Char] st m [[Char]] +birdCodeBlock :: Monad m => ParserT Text st m [Text] birdCodeBlock = filterSpace <$> many1 birdTrackLine where filterSpace lns = -- if (as is normal) there is always a space after >, drop it - if all (\ln -> null ln || take 1 ln == " ") lns - then map (drop 1) lns + if all (\ln -> T.null ln || T.take 1 ln == " ") lns + then map (T.drop 1) lns else lns -birdTrackLine :: Monad m => ParserT [Char] st m [Char] +birdTrackLine :: Monad m => ParserT Text st m Text birdTrackLine = char '>' >> anyLine -- @@ -435,7 +435,7 @@ blockQuote :: PandocMonad m => RSTParser m Blocks blockQuote = do raw <- indentedBlock -- parse the extracted block, which may contain various block elements: - contents <- parseFromString' parseBlocks $ raw ++ "\n\n" + contents <- parseFromString' parseBlocks $ raw <> "\n\n" return $ B.blockQuote contents {- @@ -445,12 +445,12 @@ encoding -} includeDirective :: PandocMonad m - => String -> [(String, String)] -> String + => Text -> [(Text, Text)] -> Text -> RSTParser m Blocks includeDirective top fields body = do let f = trim top - guard $ not (null f) - guard $ null (trim body) + guard $ not (T.null f) + guard $ T.null (trim body) -- options let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead @@ -458,11 +458,11 @@ includeDirective top fields body = do oldInput <- getInput containers <- stateContainers <$> getState when (f `elem` containers) $ - throwError $ PandocParseError $ "Include file loop at " ++ show oldPos + throwError $ PandocParseError $ "Include file loop at " <> tshow oldPos updateState $ \s -> s{ stateContainers = f : stateContainers s } - mbContents <- readFileFromDirs ["."] f + mbContents <- readFileFromDirs ["."] $ T.unpack f contentLines <- case mbContents of - Just s -> return $ lines s + Just s -> return $ T.lines s Nothing -> do logMessage $ CouldNotLoadIncludeFile f oldPos return [] @@ -478,23 +478,23 @@ includeDirective top fields body = do let contentLines' = drop (startLine' - 1) $ take (endLine' - 1) contentLines let contentLines'' = (case trim <$> lookup "end-before" fields of - Just patt -> takeWhile (not . (patt `isInfixOf`)) + Just patt -> takeWhile (not . (patt `T.isInfixOf`)) Nothing -> id) . (case trim <$> lookup "start-after" fields of Just patt -> drop 1 . - dropWhile (not . (patt `isInfixOf`)) + dropWhile (not . (patt `T.isInfixOf`)) Nothing -> id) $ contentLines' - let contents' = unlines contentLines'' ++ "\n" + let contents' = T.unlines contentLines'' <> "\n" case lookup "code" fields of Just lang -> do let numberLines = lookup "number-lines" fields - let classes = maybe [] words (lookup "class" fields) + let classes = maybe [] T.words (lookup "class" fields) let ident = maybe "" trimr $ lookup "name" fields codeblock ident classes numberLines (trimr lang) contents' False Nothing -> case lookup "literal" fields of Just _ -> return $ B.rawBlock "rst" contents' Nothing -> do - setPosition $ newPos f 1 1 + setPosition $ newPos (T.unpack f) 1 1 setInput contents' bs <- optional blanklines >> (mconcat <$> many block) @@ -519,14 +519,14 @@ definitionListItem = try $ do term <- trimInlines . mconcat <$> many1Till inline endline raw <- indentedBlock -- parse the extracted block, which may contain various block elements: - contents <- parseFromString' parseBlocks $ raw ++ "\n" + contents <- parseFromString' parseBlocks $ raw <> "\n" return (term, [contents]) definitionList :: PandocMonad m => RSTParser m Blocks definitionList = B.definitionList <$> many1 definitionListItem -- parses bullet list start and returns its length (inc. following whitespace) -bulletListStart :: Monad m => ParserT [Char] st m Int +bulletListStart :: Monad m => ParserT Text st m Int bulletListStart = try $ do notFollowedBy' hrule -- because hrules start out just like lists marker <- oneOf bulletListMarkers @@ -543,7 +543,7 @@ orderedListStart style delim = try $ do return $ markerLen + length white -- parse a line of a list item -listLine :: Monad m => Int -> RSTParser m [Char] +listLine :: Monad m => Int -> RSTParser m Text listLine markerLength = try $ do notFollowedBy blankline indentWith markerLength @@ -551,21 +551,21 @@ listLine markerLength = try $ do -- parse raw text for one list item, excluding start marker and continuations rawListItem :: Monad m => RSTParser m Int - -> RSTParser m (Int, [Char]) + -> RSTParser m (Int, Text) rawListItem start = try $ do markerLength <- start firstLine <- anyLineNewline restLines <- many (listLine markerLength) - return (markerLength, firstLine ++ concat restLines) + return (markerLength, firstLine <> T.concat restLines) -- continuation of a list item - indented and separated by blankline or -- (in compact lists) endline. -- Note: nested lists are parsed as continuations. -listContinuation :: Monad m => Int -> RSTParser m [Char] +listContinuation :: Monad m => Int -> RSTParser m Text listContinuation markerLength = try $ do - blanks <- many1 blankline + blanks <- many1Char blankline result <- many1 (listLine markerLength) - return $ blanks ++ concat result + return $ blanks <> T.concat result listItem :: PandocMonad m => RSTParser m Int @@ -581,7 +581,7 @@ listItem start = try $ do let oldContext = stateParserContext state setState $ state {stateParserContext = ListItemState} -- parse the extracted block, which may itself contain block elements - parsed <- parseFromString' parseBlocks $ concat (first:rest) ++ "\n" + parsed <- parseFromString' parseBlocks $ T.concat (first:rest) <> "\n" updateState (\st -> st {stateParserContext = oldContext}) return $ case B.toList parsed of [Para xs] -> @@ -617,9 +617,9 @@ comment = try $ do optional indentedBlock return mempty -directiveLabel :: Monad m => RSTParser m String -directiveLabel = map toLower - <$> many1Till (letter <|> char '-') (try $ string "::") +directiveLabel :: Monad m => RSTParser m Text +directiveLabel = T.toLower + <$> many1TillChar (letter <|> char '-') (try $ string "::") directive :: PandocMonad m => RSTParser m Blocks directive = try $ do @@ -631,7 +631,7 @@ directive' = do skipMany1 spaceChar label <- directiveLabel skipMany spaceChar - top <- many $ satisfy (/='\n') + top <- manyChar $ satisfy (/='\n') <|> try (char '\n' <* notFollowedBy' (rawFieldListItem 1) <* many1 (char ' ') <* @@ -644,35 +644,33 @@ directive' = do else many $ rawFieldListItem fieldIndent body <- option "" $ try $ blanklines >> indentedBlock optional blanklines - let body' = body ++ "\n\n" + let body' = body <> "\n\n" name = trim $ fromMaybe "" (lookup "name" fields) - classes = words $ maybe "" trim (lookup "class" fields) + classes = T.words $ maybe "" trim (lookup "class" fields) keyvals = [(k, trim v) | (k, v) <- fields, k /= "name", k /= "class"] imgAttr cl = (name, classes ++ alignClasses, widthAttr ++ heightAttr) where - alignClasses = words $ maybe "" trim (lookup cl fields) ++ - maybe "" (\x -> "align-" ++ trim x) + alignClasses = T.words $ maybe "" trim (lookup cl fields) <> + maybe "" (\x -> "align-" <> trim x) (lookup "align" fields) scale = case trim <$> lookup "scale" fields of - Just v -> case reverse v of - '%':vv -> - case safeRead (reverse vv) of - Just (percent :: Double) - -> percent / 100.0 - Nothing -> 1.0 - _ -> - case safeRead v of - Just (s :: Double) -> s - Nothing -> 1.0 - Nothing -> 1.0 + Just v -> case T.unsnoc v of + Just (vv, '%') -> case safeRead vv of + Just (percent :: Double) + -> percent / 100.0 + Nothing -> 1.0 + _ -> case safeRead v of + Just (s :: Double) -> s + Nothing -> 1.0 + Nothing -> 1.0 widthAttr = maybe [] (\x -> [("width", - show $ scaleDimension scale x)]) + tshow $ scaleDimension scale x)]) $ lookup "width" fields >>= - (lengthToDim . filter (not . isSpace)) + (lengthToDim . T.filter (not . isSpace)) heightAttr = maybe [] (\x -> [("height", - show $ scaleDimension scale x)]) + tshow $ scaleDimension scale x)]) $ lookup "height" fields >>= - (lengthToDim . filter (not . isSpace)) + (lengthToDim . T.filter (not . isSpace)) case label of "include" -> includeDirective top fields body' "table" -> tableDirective top fields body' @@ -682,36 +680,37 @@ directive' = do "raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body) "role" -> addNewRole top $ map (second trim) fields "container" -> B.divWith - (name, "container" : words top ++ classes, []) <$> + (name, "container" : T.words top ++ classes, []) <$> parseFromString' parseBlocks body' "replace" -> B.para <$> -- consumed by substKey - parseInlineFromString (trim top) + parseInlineFromText (trim top) "unicode" -> B.para <$> -- consumed by substKey - parseInlineFromString (trim $ unicodeTransform top) + parseInlineFromText (trim $ unicodeTransform top) "compound" -> parseFromString' parseBlocks body' "pull-quote" -> B.blockQuote <$> parseFromString' parseBlocks body' "epigraph" -> B.blockQuote <$> parseFromString' parseBlocks body' "highlights" -> B.blockQuote <$> parseFromString' parseBlocks body' - "rubric" -> B.para . B.strong <$> parseInlineFromString top + "rubric" -> B.para . B.strong <$> parseInlineFromText top _ | label `elem` ["attention","caution","danger","error","hint", "important","note","tip","warning","admonition"] -> - do bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body' + do bod <- parseFromString' parseBlocks $ top <> "\n\n" <> body' let lab = case label of - "admonition" -> mempty - (l:ls) -> B.divWith ("",["title"],[]) - (B.para (B.str (toUpper l : ls))) - [] -> mempty + "admonition" -> mempty + (T.uncons -> Just (l, ls)) + -> B.divWith ("",["title"],[]) + (B.para (B.str $ T.cons (toUpper l) ls)) + _ -> mempty return $ B.divWith (name,label:classes,keyvals) (lab <> bod) "sidebar" -> do let subtit = maybe "" trim $ lookup "subtitle" fields - tit <- B.para . B.strong <$> parseInlineFromString - (trim top ++ if null subtit + tit <- B.para . B.strong <$> parseInlineFromText + (trim top <> if T.null subtit then "" - else (": " ++ subtit)) + else (": " <> subtit)) bod <- parseFromString' parseBlocks body' return $ B.divWith (name,"sidebar":classes,keyvals) $ tit <> bod "topic" -> - do tit <- B.para . B.strong <$> parseInlineFromString top + do tit <- B.para . B.strong <$> parseInlineFromText top bod <- parseFromString' parseBlocks body' return $ B.divWith (name,"topic":classes,keyvals) $ tit <> bod "default-role" -> mempty <$ updateState (\s -> @@ -726,7 +725,7 @@ directive' = do let attribs = (name, ["aafig"], map (second trimr) fields) return $ B.codeBlockWith attribs $ stripTrailingNewlines body "math" -> return $ B.para $ mconcat $ map B.displayMath - $ toChunks $ top ++ "\n\n" ++ body + $ toChunks $ top <> "\n\n" <> body "figure" -> do (caption, legend) <- parseFromString' extractCaption body' let src = escapeURI $ trim top @@ -742,7 +741,7 @@ directive' = do $ B.imageWith attr src "" alt Nothing -> B.imageWith attr src "" alt "class" -> do - let attrs = (name, words (trim top), map (second trimr) fields) + let attrs = (name, T.words (trim top), map (second trimr) fields) -- directive content or the first immediately following element children <- case body of "" -> block @@ -750,12 +749,12 @@ directive' = do return $ B.divWith attrs children other -> do pos <- getPosition - logMessage $ SkippedContent (".. " ++ other) pos - bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body' + logMessage $ SkippedContent (".. " <> other) pos + bod <- parseFromString' parseBlocks $ top <> "\n\n" <> body' return $ B.divWith (name, other:classes, keyvals) bod tableDirective :: PandocMonad m - => String -> [(String, String)] -> String -> RSTParser m Blocks + => Text -> [(Text, Text)] -> Text -> RSTParser m Blocks tableDirective top fields body = do bs <- parseFromString' parseBlocks body case B.toList bs of @@ -770,7 +769,7 @@ tableDirective top fields body = do Just "grid" -> widths' Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) - $ splitBy (`elem` (" ," :: String)) specs + $ splitTextBy (`elem` (" ," :: String)) specs Nothing -> widths' -- align is not applicable since we can't represent whole table align return $ B.singleton $ Table (B.toList title) @@ -783,7 +782,7 @@ tableDirective top fields body = do -- since Pandoc doesn't support a table with multiple header rows. -- We don't need to parse :align: as it represents the whole table align. listTableDirective :: PandocMonad m - => String -> [(String, String)] -> String + => Text -> [(Text, Text)] -> Text -> RSTParser m Blocks listTableDirective top fields body = do bs <- parseFromString' parseBlocks body @@ -799,7 +798,7 @@ listTableDirective top fields body = do widths = case trim <$> lookup "widths" fields of Just "auto" -> replicate numOfCols 0 Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) $ - splitBy (`elem` (" ," :: String)) specs + splitTextBy (`elem` (" ," :: String)) specs _ -> replicate numOfCols 0 return $ B.table title (zip (replicate numOfCols AlignDefault) widths) @@ -812,7 +811,7 @@ listTableDirective top fields body = do normWidths ws = map (/ max 1 (sum ws)) ws csvTableDirective :: PandocMonad m - => String -> [(String, String)] -> String + => Text -> [(Text, Text)] -> Text -> RSTParser m Blocks csvTableDirective top fields rawcsv = do let explicitHeader = trim <$> lookup "header" fields @@ -820,14 +819,17 @@ csvTableDirective top fields rawcsv = do csvDelim = case trim <$> lookup "delim" fields of Just "tab" -> '\t' Just "space" -> ' ' - Just [c] -> c + Just (T.unpack -> [c]) + -> c _ -> ',' , csvQuote = case trim <$> lookup "quote" fields of - Just [c] -> c - _ -> '"' + Just (T.unpack -> [c]) + -> c + _ -> '"' , csvEscape = case trim <$> lookup "escape" fields of - Just [c] -> Just c - _ -> Nothing + Just (T.unpack -> [c]) + -> Just c + _ -> Nothing , csvKeepSpace = case trim <$> lookup "keepspace" fields of Just "true" -> True _ -> False @@ -840,16 +842,16 @@ csvTableDirective top fields rawcsv = do lookup "file" fields `mplus` lookup "url" fields of Just u -> do (bs, _) <- fetchItem u - return $ UTF8.toString bs + return $ UTF8.toText bs Nothing -> return rawcsv - let res = parseCSV opts (T.pack $ case explicitHeader of - Just h -> h ++ "\n" ++ rawcsv' - Nothing -> rawcsv') + let res = parseCSV opts (case explicitHeader of + Just h -> h <> "\n" <> rawcsv' + Nothing -> rawcsv') case res of Left e -> throwError $ PandocParsecError "csv table" e Right rawrows -> do - let parseCell = parseFromString' (plain <|> return mempty) . T.unpack + let parseCell = parseFromString' (plain <|> return mempty) let parseRow = mapM parseCell rows <- mapM parseRow rawrows let (headerRow,bodyRows,numOfCols) = @@ -865,7 +867,7 @@ csvTableDirective top fields rawcsv = do Just "auto" -> replicate numOfCols 0 Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) - $ splitBy (`elem` (" ," :: String)) specs + $ splitTextBy (`elem` (" ," :: String)) specs _ -> replicate numOfCols 0 return $ B.table title (zip (replicate numOfCols AlignDefault) widths) @@ -876,10 +878,10 @@ csvTableDirective top fields rawcsv = do -- - Only supports :format: fields with a single format for :raw: roles, -- change Text.Pandoc.Definition.Format to fix addNewRole :: PandocMonad m - => String -> [(String, String)] -> RSTParser m Blocks -addNewRole roleString fields = do + => Text -> [(Text, Text)] -> RSTParser m Blocks +addNewRole roleText fields = do pos <- getPosition - (role, parentRole) <- parseFromString' inheritedRole roleString + (role, parentRole) <- parseFromString' inheritedRole roleText customRoles <- stateRstCustomRoles <$> getState let getBaseRole (r, f, a) roles = case M.lookup r roles of @@ -888,7 +890,7 @@ addNewRole roleString fields = do (baseRole, baseFmt, baseAttr) = getBaseRole (parentRole, Nothing, nullAttr) customRoles fmt = if parentRole == "raw" then lookup "format" fields else baseFmt - annotate :: [String] -> [String] + annotate :: [Text] -> [Text] annotate = maybe id (:) $ if baseRole == "code" then lookup "language" fields @@ -904,7 +906,7 @@ addNewRole roleString fields = do pos "format" -> when (baseRole /= "raw") $ logMessage $ SkippedContent ":format: [because parent of role is not :raw:]" pos - _ -> logMessage $ SkippedContent (":" ++ key ++ ":") pos + _ -> logMessage $ SkippedContent (":" <> key <> ":") pos when (parentRole == "raw" && countKeys "format" > 1) $ logMessage $ SkippedContent ":format: [after first in definition of role]" @@ -930,30 +932,29 @@ addNewRole roleString fields = do -- hexadecimal numbers, prefixed by 0x, x, \x, U+, u, or \u -- or as XML-style hexadecimal character entities, e.g. ᨫ -- or text, which is used as-is. Comments start with .. -unicodeTransform :: String -> String -unicodeTransform t = - case t of - ('.':'.':xs) -> unicodeTransform $ dropWhile (/='\n') xs -- comment - ('0':'x':xs) -> go "0x" xs - ('x':xs) -> go "x" xs - ('\\':'x':xs) -> go "\\x" xs - ('U':'+':xs) -> go "U+" xs - ('u':xs) -> go "u" xs - ('\\':'u':xs) -> go "\\u" xs - ('&':'#':'x':xs) -> maybe ("&#x" ++ unicodeTransform xs) - -- drop semicolon - (\(c,s) -> c : unicodeTransform (drop 1 s)) - $ extractUnicodeChar xs - (x:xs) -> x : unicodeTransform xs - [] -> [] - where go pref zs = maybe (pref ++ unicodeTransform zs) - (\(c,s) -> c : unicodeTransform s) - $ extractUnicodeChar zs - -extractUnicodeChar :: String -> Maybe (Char, String) +unicodeTransform :: Text -> Text +unicodeTransform t + | Just xs <- T.stripPrefix ".." t = unicodeTransform $ T.dropWhile (/= '\n') xs -- comment + | Just xs <- T.stripPrefix "0x" t = go "0x" xs + | Just xs <- T.stripPrefix "x" t = go "x" xs + | Just xs <- T.stripPrefix "\\x" t = go "\\x" xs + | Just xs <- T.stripPrefix "U+" t = go "U+" xs + | Just xs <- T.stripPrefix "u" t = go "u" xs + | Just xs <- T.stripPrefix "\\u" t = go "\\u" xs + | Just xs <- T.stripPrefix "&#x" t = maybe ("&#x" <> unicodeTransform xs) + -- drop semicolon + (\(c,s) -> T.cons c $ unicodeTransform $ T.drop 1 s) + $ extractUnicodeChar xs + | Just (x, xs) <- T.uncons t = T.cons x $ unicodeTransform xs + | otherwise = "" + where go pref zs = maybe (pref <> unicodeTransform zs) + (\(c,s) -> T.cons c $ unicodeTransform s) + $ extractUnicodeChar zs + +extractUnicodeChar :: Text -> Maybe (Char, Text) extractUnicodeChar s = maybe Nothing (\c -> Just (c,rest)) mbc - where (ds,rest) = span isHexDigit s - mbc = safeRead ('\'':'\\':'x':ds ++ "'") + where (ds,rest) = T.span isHexDigit s + mbc = safeRead ("'\\x" <> ds <> "'") extractCaption :: PandocMonad m => RSTParser m (Inlines, Blocks) extractCaption = do @@ -963,16 +964,16 @@ extractCaption = do -- divide string by blanklines, and surround with -- \begin{aligned}...\end{aligned} if needed. -toChunks :: String -> [String] -toChunks = dropWhile null - . map (addAligned . trim . unlines) - . splitBy (all (`elem` (" \t" :: String))) . lines +toChunks :: Text -> [Text] +toChunks = dropWhile T.null + . map (addAligned . trim . T.unlines) + . splitBy (T.all (`elem` (" \t" :: String))) . T.lines -- we put this in an aligned environment if it contains \\, see #4254 - where addAligned s = if "\\\\" `isInfixOf` s - then "\\begin{aligned}\n" ++ s ++ "\n\\end{aligned}" + where addAligned s = if "\\\\" `T.isInfixOf` s + then "\\begin{aligned}\n" <> s <> "\n\\end{aligned}" else s -codeblock :: String -> [String] -> Maybe String -> String -> String -> Bool +codeblock :: Text -> [Text] -> Maybe Text -> Text -> Text -> Bool -> RSTParser m Blocks codeblock ident classes numberLines lang body rmTrailingNewlines = return $ B.codeBlockWith attribs $ stripTrailingNewlines' body @@ -984,7 +985,7 @@ codeblock ident classes numberLines lang body rmTrailingNewlines = : maybe [] (const ["numberLines"]) numberLines ++ classes kvs = maybe [] (\n -> case trimr n of - [] -> [] + "" -> [] xs -> [("startFrom", xs)]) numberLines @@ -992,25 +993,25 @@ codeblock ident classes numberLines lang body rmTrailingNewlines = --- note block --- -noteBlock :: Monad m => RSTParser m [Char] +noteBlock :: Monad m => RSTParser m Text noteBlock = try $ do (ref, raw, replacement) <- noteBlock' noteMarker updateState $ \s -> s { stateNotes = (ref, raw) : stateNotes s } -- return blanks so line count isn't affected return replacement -citationBlock :: Monad m => RSTParser m [Char] +citationBlock :: Monad m => RSTParser m Text citationBlock = try $ do (ref, raw, replacement) <- noteBlock' citationMarker updateState $ \s -> s { stateCitations = M.insert ref raw (stateCitations s), - stateKeys = M.insert (toKey ref) (('#':ref,""), ("",["citation"],[])) + stateKeys = M.insert (toKey ref) (("#" <> ref,""), ("",["citation"],[])) (stateKeys s) } -- return blanks so line count isn't affected return replacement noteBlock' :: Monad m - => RSTParser m String -> RSTParser m (String, String, String) + => RSTParser m Text -> RSTParser m (Text, Text, Text) noteBlock' marker = try $ do startPos <- getPosition string ".." @@ -1021,24 +1022,24 @@ noteBlock' marker = try $ do blanks <- option "" blanklines rest <- option "" indentedBlock endPos <- getPosition - let raw = first ++ "\n" ++ blanks ++ rest ++ "\n" - let replacement =replicate (sourceLine endPos - sourceLine startPos) '\n' + let raw = first <> "\n" <> blanks <> rest <> "\n" + let replacement = T.replicate (sourceLine endPos - sourceLine startPos) "\n" return (ref, raw, replacement) -citationMarker :: Monad m => RSTParser m [Char] +citationMarker :: Monad m => RSTParser m Text citationMarker = do char '[' res <- simpleReferenceName char ']' return res -noteMarker :: Monad m => RSTParser m [Char] +noteMarker :: Monad m => RSTParser m Text noteMarker = do char '[' - res <- many1 digit + res <- many1Char digit <|> - try (char '#' >> liftM ('#':) simpleReferenceName) - <|> count 1 (oneOf "#*") + try (char '#' >> liftM ("#" <>) simpleReferenceName) + <|> countChar 1 (oneOf "#*") char ']' return res @@ -1046,47 +1047,48 @@ noteMarker = do -- reference key -- -quotedReferenceName :: PandocMonad m => RSTParser m String +quotedReferenceName :: PandocMonad m => RSTParser m Text quotedReferenceName = try $ do char '`' >> notFollowedBy (char '`') -- `` means inline code! - manyTill anyChar (char '`') + manyTillChar anyChar (char '`') -- Simple reference names are single words consisting of alphanumerics -- plus isolated (no two adjacent) internal hyphens, underscores, -- periods, colons and plus signs; no whitespace or other characters -- are allowed. -simpleReferenceName :: Monad m => ParserT [Char] st m String +simpleReferenceName :: Monad m => ParserT Text st m Text simpleReferenceName = do x <- alphaNum xs <- many $ alphaNum <|> try (oneOf "-_:+." <* lookAhead alphaNum) - return (x:xs) + return $ T.pack (x:xs) -referenceName :: PandocMonad m => RSTParser m String +referenceName :: PandocMonad m => RSTParser m Text referenceName = quotedReferenceName <|> simpleReferenceName -referenceKey :: PandocMonad m => RSTParser m [Char] +referenceKey :: PandocMonad m => RSTParser m Text referenceKey = do startPos <- getPosition choice [substKey, anonymousKey, regularKey] optional blanklines endPos <- getPosition -- return enough blanks to replace key - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' + return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n" -targetURI :: Monad m => ParserT [Char] st m [Char] +targetURI :: Monad m => ParserT Text st m Text targetURI = do skipSpaces optional $ try $ newline >> notFollowedBy blankline contents <- trim <$> - many1 (satisfy (/='\n') + many1Char (satisfy (/='\n') <|> try (newline >> many1 spaceChar >> noneOf " \t\n")) blanklines - case reverse contents of - -- strip backticks - '_':'`':xs -> return (dropWhile (=='`') (reverse xs) ++ "_") - '_':_ -> return contents - _ -> return (escapeURI contents) + return $ stripBackticks contents + where + stripBackticks t + | Just xs <- T.stripSuffix "`_" t = T.dropWhile (=='`') xs <> "_" + | Just _ <- T.stripSuffix "_" t = t + | otherwise = escapeURI t substKey :: PandocMonad m => RSTParser m () substKey = try $ do @@ -1112,21 +1114,21 @@ anonymousKey = try $ do oneOfStrings [".. __:", "__"] src <- targetURI pos <- getPosition - let key = toKey $ "_" ++ printf "%09d" (sourceLine pos) + let key = toKey $ "_" <> T.pack (printf "%09d" (sourceLine pos)) updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } -referenceNames :: PandocMonad m => RSTParser m [String] +referenceNames :: PandocMonad m => RSTParser m [Text] referenceNames = do let rn = try $ do string ".. _" ref <- quotedReferenceName - <|> many ( noneOf ":\n" - <|> try (char '\n' <* - string " " <* - notFollowedBy blankline) - <|> try (char ':' <* lookAhead alphaNum) - ) + <|> manyChar ( noneOf ":\n" + <|> try (char '\n' <* + string " " <* + notFollowedBy blankline) + <|> try (char ':' <* lookAhead alphaNum) + ) char ':' return ref first <- rn @@ -1140,18 +1142,18 @@ regularKey = try $ do -- .. _goodbye: url.com refs <- referenceNames src <- targetURI - guard $ not (null src) + guard $ not (T.null src) let keys = map toKey refs forM_ keys $ \key -> updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } -anchorDef :: PandocMonad m => RSTParser m [Char] +anchorDef :: PandocMonad m => RSTParser m Text anchorDef = try $ do (refs, raw) <- withRaw $ try (referenceNames <* blanklines) forM_ refs $ \rawkey -> updateState $ \s -> s { stateKeys = - M.insert (toKey rawkey) (('#':rawkey,""), nullAttr) $ stateKeys s } + M.insert (toKey rawkey) (("#" <> rawkey,""), nullAttr) $ stateKeys s } -- keep this for 2nd round of parsing, where we'll add the divs (anchor) return raw @@ -1174,12 +1176,12 @@ anchor = try $ do -- because it hides them from promoteHeader, see #4240 _ -> return $ foldr addDiv b refs -headerBlock :: PandocMonad m => RSTParser m [Char] +headerBlock :: PandocMonad m => RSTParser m Text headerBlock = do ((txt, _), raw) <- withRaw (doubleHeader' <|> singleHeader') (ident,_,_) <- registerHeader nullAttr txt let key = toKey (stringify txt) - updateState $ \s -> s { stateKeys = M.insert key (('#':ident,""), nullAttr) + updateState $ \s -> s { stateKeys = M.insert key (("#" <> ident,""), nullAttr) $ stateKeys s } return raw @@ -1201,13 +1203,13 @@ headerBlock = do -- Grid tables TODO: -- - column spans -dashedLine :: Monad m => Char -> ParserT [Char] st m (Int, Int) +dashedLine :: Monad m => Char -> ParserT Text st m (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many (char ' ') return (length dashes, length $ dashes ++ sp) -simpleDashedLines :: Monad m => Char -> ParserT [Char] st m [(Int,Int)] +simpleDashedLines :: Monad m => Char -> ParserT Text st m [(Int,Int)] simpleDashedLines ch = try $ many1 (dashedLine ch) -- Parse a table row separator @@ -1215,17 +1217,17 @@ simpleTableSep :: Monad m => Char -> RSTParser m Char simpleTableSep ch = try $ simpleDashedLines ch >> newline -- Parse a table footer -simpleTableFooter :: Monad m => RSTParser m [Char] +simpleTableFooter :: Monad m => RSTParser m Text simpleTableFooter = try $ simpleTableSep '=' >> blanklines -- Parse a raw line and split it into chunks by indices. -simpleTableRawLine :: Monad m => [Int] -> RSTParser m [String] +simpleTableRawLine :: Monad m => [Int] -> RSTParser m [Text] simpleTableRawLine indices = simpleTableSplitLine indices <$> anyLine -simpleTableRawLineWithEmptyCell :: Monad m => [Int] -> RSTParser m [String] +simpleTableRawLineWithEmptyCell :: Monad m => [Int] -> RSTParser m [Text] simpleTableRawLineWithEmptyCell indices = try $ do cs <- simpleTableRawLine indices - let isEmptyCell = all (\c -> c == ' ' || c == '\t') + let isEmptyCell = T.all (\c -> c == ' ' || c == '\t') guard $ any isEmptyCell cs return cs @@ -1235,15 +1237,15 @@ simpleTableRow indices = do notFollowedBy' simpleTableFooter firstLine <- simpleTableRawLine indices conLines <- many $ simpleTableRawLineWithEmptyCell indices - let cols = map unlines . transpose $ firstLine : conLines ++ - [replicate (length indices) "" - | not (null conLines)] + let cols = map T.unlines . transpose $ firstLine : conLines ++ + [replicate (length indices) "" + | not (null conLines)] mapM (parseFromString' parseBlocks) cols -simpleTableSplitLine :: [Int] -> String -> [String] +simpleTableSplitLine :: [Int] -> Text -> [Text] simpleTableSplitLine indices line = map trimr - $ tail $ splitByIndices (init indices) line + $ tail $ splitTextByIndices (init indices) line simpleTableHeader :: PandocMonad m => Bool -- ^ Headerless table @@ -1322,35 +1324,35 @@ inlineContent = choice [ whitespace , escapedChar , symbol ] <?> "inline content" -parseInlineFromString :: PandocMonad m => String -> RSTParser m Inlines -parseInlineFromString = parseFromString' (trimInlines . mconcat <$> many inline) +parseInlineFromText :: PandocMonad m => Text -> RSTParser m Inlines +parseInlineFromText = parseFromString' (trimInlines . mconcat <$> many inline) hyphens :: Monad m => RSTParser m Inlines hyphens = do - result <- many1 (char '-') + result <- many1Char (char '-') optional endline -- don't want to treat endline after hyphen or dash as a space return $ B.str result -escapedChar :: Monad m => ParserT [Char] st m Inlines +escapedChar :: Monad m => ParserT Text st m Inlines escapedChar = do c <- escaped anyChar return $ if c == ' ' || c == '\n' || c == '\r' -- '\ ' is null in RST then mempty - else B.str [c] + else B.str $ T.singleton c symbol :: Monad m => RSTParser m Inlines symbol = do result <- oneOf specialChars - return $ B.str [result] + return $ B.str $ T.singleton result -- parses inline code, between codeStart and codeEnd code :: Monad m => RSTParser m Inlines code = try $ do string "``" - result <- manyTill anyChar (try (string "``")) + result <- manyTillChar anyChar (try (string "``")) return $ B.code - $ trim $ unwords $ lines result + $ trim $ T.unwords $ T.lines result -- succeeds only if we're not right after a str (ie. in middle of word) atStart :: Monad m => RSTParser m a -> RSTParser m a @@ -1382,7 +1384,7 @@ interpretedRole = try $ do renderRole contents Nothing role nullAttr renderRole :: PandocMonad m - => String -> Maybe String -> String -> Attr -> RSTParser m Inlines + => Text -> Maybe Text -> Text -> Attr -> RSTParser m Inlines renderRole contents fmt role attr = case role of "sup" -> return $ B.superscript $ treatAsText contents "superscript" -> return $ B.superscript $ treatAsText contents @@ -1412,36 +1414,36 @@ renderRole contents fmt role attr = case role of contents where titleRef ref = return $ B.spanWith ("",["title-ref"],[]) $ treatAsText ref - rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo) - where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html" - pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo) - where padNo = replicate (4 - length pepNo) '0' ++ pepNo - pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/" + rfcLink rfcNo = B.link rfcUrl ("RFC " <> rfcNo) $ B.str ("RFC " <> rfcNo) + where rfcUrl = "http://www.faqs.org/rfcs/rfc" <> rfcNo <> ".html" + pepLink pepNo = B.link pepUrl ("PEP " <> pepNo) $ B.str ("PEP " <> pepNo) + where padNo = T.replicate (4 - T.length pepNo) "0" <> pepNo + pepUrl = "http://www.python.org/dev/peps/pep-" <> padNo <> "/" treatAsText = B.text . handleEscapes - handleEscapes [] = [] - handleEscapes ('\\':' ':cs) = handleEscapes cs - handleEscapes ('\\':c:cs) = c : handleEscapes cs - handleEscapes (c:cs) = c : handleEscapes cs + handleEscapes = T.concat . removeSpace . T.splitOn "\\" + where headSpace t = fromMaybe t $ T.stripPrefix " " t + removeSpace (x:xs) = x : map headSpace xs + removeSpace [] = [] -roleName :: PandocMonad m => RSTParser m String -roleName = many1 (letter <|> char '-') +roleName :: PandocMonad m => RSTParser m Text +roleName = many1Char (letter <|> char '-') -roleMarker :: PandocMonad m => RSTParser m String +roleMarker :: PandocMonad m => RSTParser m Text roleMarker = char ':' *> roleName <* char ':' -roleBefore :: PandocMonad m => RSTParser m (String,String) +roleBefore :: PandocMonad m => RSTParser m (Text,Text) roleBefore = try $ do role <- roleMarker contents <- unmarkedInterpretedText return (role,contents) -roleAfter :: PandocMonad m => RSTParser m (String,String) +roleAfter :: PandocMonad m => RSTParser m (Text,Text) roleAfter = try $ do contents <- unmarkedInterpretedText role <- roleMarker <|> (stateRstDefaultRole <$> getState) return (role,contents) -unmarkedInterpretedText :: PandocMonad m => RSTParser m [Char] +unmarkedInterpretedText :: PandocMonad m => RSTParser m Text unmarkedInterpretedText = try $ do atStart (char '`') contents <- mconcat <$> (many1 @@ -1453,7 +1455,7 @@ unmarkedInterpretedText = try $ do lookAhead (satisfy isAlphaNum)) )) char '`' - return contents + return $ T.pack contents whitespace :: PandocMonad m => RSTParser m Inlines whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace" @@ -1461,7 +1463,7 @@ whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace" str :: Monad m => RSTParser m Inlines str = do let strChar = noneOf ("\t\n " ++ specialChars) - result <- many1 strChar + result <- many1Char strChar updateLastStrPos return $ B.str result @@ -1489,7 +1491,7 @@ explicitLink = try $ do notFollowedBy (char '`') -- `` marks start of inline code label' <- trimInlines . mconcat <$> manyTill (notFollowedBy (char '`') >> inlineContent) (char '<') - src <- trim <$> manyTill (noneOf ">\n") (char '>') + src <- trim <$> manyTillChar (noneOf ">\n") (char '>') skipSpaces string "`_" optional $ char '_' -- anonymous form @@ -1501,22 +1503,22 @@ explicitLink = try $ do if isURI src then return ((src, ""), nullAttr) else - case reverse src of - '_':xs -> lookupKey [] (toKey (reverse xs)) - _ -> return ((src, ""), nullAttr) + case T.unsnoc src of + Just (xs, '_') -> lookupKey [] (toKey xs) + _ -> return ((src, ""), nullAttr) return $ B.linkWith attr (escapeURI src') tit label'' -citationName :: PandocMonad m => RSTParser m String +citationName :: PandocMonad m => RSTParser m Text citationName = do raw <- citationMarker - return $ "[" ++ raw ++ "]" + return $ "[" <> raw <> "]" referenceLink :: PandocMonad m => RSTParser m Inlines referenceLink = try $ do ref <- (referenceName <|> citationName) <* char '_' let label' = B.text ref - let isAnonKey (Key ('_':_)) = True - isAnonKey _ = False + let isAnonKey (Key (T.uncons -> Just ('_',_))) = True + isAnonKey _ = False state <- getState let keyTable = stateKeys state key <- option (toKey ref) $ @@ -1533,7 +1535,7 @@ referenceLink = try $ do -- We keep a list of oldkeys so we can detect lookup loops. lookupKey :: PandocMonad m - => [Key] -> Key -> RSTParser m ((String, String), Attr) + => [Key] -> Key -> RSTParser m ((Text, Text), Attr) lookupKey oldkeys key = do pos <- getPosition state <- getState @@ -1544,8 +1546,8 @@ lookupKey oldkeys key = do logMessage $ ReferenceNotFound key' pos return (("",""),nullAttr) -- check for keys of the form link_, which need to be resolved: - Just ((u@(c:_),""),_) | last u == '_', c /= '#' -> do - let rawkey = init u + Just ((u, ""),_) | T.length u > 1, T.last u == '_', T.head u /= '#' -> do + let rawkey = T.init u let newkey = toKey rawkey if newkey `elem` oldkeys then do @@ -1576,7 +1578,7 @@ subst = try $ do case M.lookup key substTable of Nothing -> do pos <- getPosition - logMessage $ ReferenceNotFound (show key) pos + logMessage $ ReferenceNotFound (tshow key) pos return mempty Just target -> return target diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs index 6519587c6..73122cc14 100644 --- a/src/Text/Pandoc/Readers/Roff.hs +++ b/src/Text/Pandoc/Readers/Roff.hs @@ -1,5 +1,7 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.Roff Copyright : Copyright (C) 2018-2019 Yan Pashkovsky and John MacFarlane @@ -21,7 +23,7 @@ module Text.Pandoc.Readers.Roff , TableRow , RoffToken(..) , RoffTokens(..) - , linePartsToString + , linePartsToText , lexRoff ) where @@ -40,7 +42,7 @@ import qualified Data.Text as T import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Options import Text.Pandoc.Parsing -import Text.Pandoc.Shared (safeRead, substitute) +import Text.Pandoc.Shared (safeRead) import Text.Parsec hiding (tokenPrim) import Text.Pandoc.RoffChar (characterCodes, combiningAccents) import qualified Data.Sequence as Seq @@ -60,28 +62,28 @@ data FontSpec = FontSpec{ fontBold :: Bool defaultFontSpec :: FontSpec defaultFontSpec = FontSpec False False False -data LinePart = RoffStr String +data LinePart = RoffStr T.Text | Font FontSpec | MacroArg Int deriving Show type Arg = [LinePart] -type TableOption = (String, String) +type TableOption = (T.Text, T.Text) data CellFormat = CellFormat { columnType :: Char , pipePrefix :: Bool , pipeSuffix :: Bool - , columnSuffixes :: [String] + , columnSuffixes :: [T.Text] } deriving (Show, Eq, Ord) type TableRow = ([CellFormat], [RoffTokens]) data RoffToken = TextLine [LinePart] | EmptyLine - | ControlLine String [Arg] SourcePos + | ControlLine T.Text [Arg] SourcePos | Tbl [TableOption] [TableRow] SourcePos deriving Show @@ -95,7 +97,7 @@ data RoffMode = NormalMode | CopyMode deriving Show -data RoffState = RoffState { customMacros :: M.Map String RoffTokens +data RoffState = RoffState { customMacros :: M.Map T.Text RoffTokens , prevFont :: FontSpec , currentFont :: FontSpec , tableTabChar :: Char @@ -121,10 +123,10 @@ instance Default RoffState where , afterConditional = False } -type RoffLexer m = ParserT [Char] RoffState m +type RoffLexer m = ParserT T.Text RoffState m -- --- Lexer: String -> RoffToken +-- Lexer: T.Text -> RoffToken -- eofline :: Stream s m Char => ParsecT s u m () @@ -133,11 +135,11 @@ eofline = void newline <|> eof <|> () <$ lookAhead (string "\\}") spacetab :: Stream s m Char => ParsecT s u m Char spacetab = char ' ' <|> char '\t' -characterCodeMap :: M.Map String Char +characterCodeMap :: M.Map T.Text Char characterCodeMap = M.fromList $ map (\(x,y) -> (y,x)) characterCodes -combiningAccentsMap :: M.Map String Char +combiningAccentsMap :: M.Map T.Text Char combiningAccentsMap = M.fromList $ map (\(x,y) -> (y,x)) combiningAccents @@ -151,43 +153,40 @@ escapeGlyph = do c <- lookAhead (oneOf ['[','(']) escapeArg >>= resolveGlyph c -resolveGlyph :: PandocMonad m => Char -> String -> RoffLexer m [LinePart] +resolveGlyph :: PandocMonad m => Char -> T.Text -> RoffLexer m [LinePart] resolveGlyph delimChar glyph = do - let cs = substitute "_u" " u" glyph -- unicode glyphs separated by _ - (case words cs of + let cs = T.replace "_u" " u" glyph -- unicode glyphs separated by _ + (case T.words cs of [] -> mzero [s] -> case M.lookup s characterCodeMap `mplus` readUnicodeChar s of Nothing -> mzero - Just c -> return [RoffStr [c]] + Just c -> return [RoffStr $ T.singleton c] (s:ss) -> do basechar <- case M.lookup s characterCodeMap `mplus` readUnicodeChar s of Nothing -> - case s of + case T.unpack s of [ch] | isAscii ch && isAlphaNum ch -> return ch _ -> mzero Just c -> return c - let addAccents [] xs = return $ T.unpack . - Normalize.normalize Normalize.NFC . - T.pack $ reverse xs + let addAccents [] xs = return $ Normalize.normalize Normalize.NFC $ + T.reverse xs addAccents (a:as) xs = case M.lookup a combiningAccentsMap `mplus` readUnicodeChar a of - Just x -> addAccents as (x:xs) + Just x -> addAccents as $ T.cons x xs Nothing -> mzero - addAccents ss [basechar] >>= \xs -> return [RoffStr xs]) + addAccents ss (T.singleton basechar) >>= \xs -> return [RoffStr xs]) <|> case delimChar of - '[' -> escUnknown ("\\[" ++ glyph ++ "]") - '(' -> escUnknown ("\\(" ++ glyph) - '\'' -> escUnknown ("\\C'" ++ glyph ++ "'") + '[' -> escUnknown ("\\[" <> glyph <> "]") + '(' -> escUnknown ("\\(" <> glyph) + '\'' -> escUnknown ("\\C'" <> glyph <> "'") _ -> Prelude.fail "resolveGlyph: unknown glyph delimiter" -readUnicodeChar :: String -> Maybe Char -readUnicodeChar ('u':cs@(_:_:_:_:_)) = - case safeRead ('0':'x':cs) of - Just i -> Just (chr i) - Nothing -> Nothing -readUnicodeChar _ = Nothing +readUnicodeChar :: T.Text -> Maybe Char +readUnicodeChar t = case T.uncons t of + Just ('u', cs) | T.length cs > 3 -> chr <$> safeRead ("0x" <> cs) + _ -> Nothing escapeNormal :: PandocMonad m => RoffLexer m [LinePart] escapeNormal = do @@ -218,14 +217,14 @@ escapeNormal = do NormalMode -> return [RoffStr "\\"] 'H' -> escIgnore 'H' [quoteArg] 'L' -> escIgnore 'L' [quoteArg] - 'M' -> escIgnore 'M' [escapeArg, count 1 (satisfy (/='\n'))] + 'M' -> escIgnore 'M' [escapeArg, countChar 1 (satisfy (/='\n'))] 'N' -> escIgnore 'N' [quoteArg] - 'O' -> escIgnore 'O' [count 1 (oneOf ['0','1'])] + 'O' -> escIgnore 'O' [countChar 1 (oneOf ['0','1'])] 'R' -> escIgnore 'R' [quoteArg] 'S' -> escIgnore 'S' [quoteArg] - 'V' -> escIgnore 'V' [escapeArg, count 1 alphaNum] + 'V' -> escIgnore 'V' [escapeArg, countChar 1 alphaNum] 'X' -> escIgnore 'X' [quoteArg] - 'Y' -> escIgnore 'Y' [escapeArg, count 1 (satisfy (/='\n'))] + 'Y' -> escIgnore 'Y' [escapeArg, countChar 1 (satisfy (/='\n'))] 'Z' -> escIgnore 'Z' [quoteArg] '\'' -> return [RoffStr "`"] '\n' -> return mempty -- line continuation @@ -238,12 +237,12 @@ escapeNormal = do 'd' -> escIgnore 'd' [] -- forward down 1/2em 'e' -> return [RoffStr "\\"] 'f' -> escFont - 'g' -> escIgnore 'g' [escapeArg, count 1 (satisfy (/='\n'))] + 'g' -> escIgnore 'g' [escapeArg, countChar 1 (satisfy (/='\n'))] 'h' -> escIgnore 'h' [quoteArg] - 'k' -> escIgnore 'k' [escapeArg, count 1 (satisfy (/='\n'))] + 'k' -> escIgnore 'k' [escapeArg, countChar 1 (satisfy (/='\n'))] 'l' -> escIgnore 'l' [quoteArg] - 'm' -> escIgnore 'm' [escapeArg, count 1 (satisfy (/='\n'))] - 'n' -> escIgnore 'm' [escapeArg, count 1 (satisfy (/='\n'))] + 'm' -> escIgnore 'm' [escapeArg, countChar 1 (satisfy (/='\n'))] + 'n' -> escIgnore 'm' [escapeArg, countChar 1 (satisfy (/='\n'))] 'o' -> escIgnore 'o' [quoteArg] 'p' -> escIgnore 'p' [] 'r' -> escIgnore 'r' [] @@ -253,7 +252,7 @@ escapeNormal = do 'v' -> escIgnore 'v' [quoteArg] 'w' -> escIgnore 'w' [quoteArg] 'x' -> escIgnore 'x' [quoteArg] - 'z' -> escIgnore 'z' [count 1 anyChar] + 'z' -> escIgnore 'z' [countChar 1 anyChar] '|' -> return [RoffStr "\x2006"] --1/6 em space '~' -> return [RoffStr "\160"] -- nonbreaking space '\\' -> do @@ -262,40 +261,40 @@ escapeNormal = do CopyMode -> char '\\' NormalMode -> return '\\' return [RoffStr "\\"] - _ -> return [RoffStr [c]] + _ -> return [RoffStr $ T.singleton c] -- man 7 groff: "If a backslash is followed by a character that -- does not constitute a defined escape sequence, the backslash -- is silently ignored and the character maps to itself." escIgnore :: PandocMonad m => Char - -> [RoffLexer m String] + -> [RoffLexer m T.Text] -> RoffLexer m [LinePart] escIgnore c argparsers = do pos <- getPosition arg <- snd <$> withRaw (choice argparsers) <|> return "" - report $ SkippedContent ('\\':c:arg) pos + report $ SkippedContent ("\\" <> T.cons c arg) pos return mempty -escUnknown :: PandocMonad m => String -> RoffLexer m [LinePart] +escUnknown :: PandocMonad m => T.Text -> RoffLexer m [LinePart] escUnknown s = do pos <- getPosition report $ SkippedContent s pos return [RoffStr "\xFFFD"] -signedNumber :: PandocMonad m => RoffLexer m String +signedNumber :: PandocMonad m => RoffLexer m T.Text signedNumber = try $ do sign <- option "" ("-" <$ char '-' <|> "" <$ char '+') - ds <- many1 digit - return (sign ++ ds) + ds <- many1Char digit + return (sign <> ds) -- Parses: [..] or (.. -escapeArg :: PandocMonad m => RoffLexer m String +escapeArg :: PandocMonad m => RoffLexer m T.Text escapeArg = choice [ char '[' *> optional expandString *> - manyTill (noneOf ['\n',']']) (char ']') + manyTillChar (noneOf ['\n',']']) (char ']') , char '(' *> optional expandString *> - count 2 (satisfy (/='\n')) + countChar 2 (satisfy (/='\n')) ] expandString :: PandocMonad m => RoffLexer m () @@ -303,21 +302,21 @@ expandString = try $ do pos <- getPosition char '\\' char '*' - cs <- escapeArg <|> count 1 anyChar - s <- linePartsToString <$> resolveString cs pos - getInput >>= setInput . (s ++) + cs <- escapeArg <|> countChar 1 anyChar + s <- linePartsToText <$> resolveText cs pos + getInput >>= setInput . (s <>) return () -- Parses: '..' -quoteArg :: PandocMonad m => RoffLexer m String -quoteArg = char '\'' *> manyTill (noneOf ['\n','\'']) (char '\'') +quoteArg :: PandocMonad m => RoffLexer m T.Text +quoteArg = char '\'' *> manyTillChar (noneOf ['\n','\'']) (char '\'') escFont :: PandocMonad m => RoffLexer m [LinePart] escFont = do - font <- escapeArg <|> count 1 alphaNum - font' <- if null font || font == "P" + font <- escapeArg <|> countChar 1 alphaNum + font' <- if T.null font || font == "P" then prevFont <$> getState - else return $ foldr processFontLetter defaultFontSpec font + else return $ foldr processFontLetter defaultFontSpec $ T.unpack font modifyState $ \st -> st{ prevFont = currentFont st , currentFont = font' } return [Font font'] @@ -345,7 +344,7 @@ lexMacro = do guard $ sourceColumn pos == 1 || afterConditional st char '.' <|> char '\'' skipMany spacetab - macroName <- many (satisfy isAlphaNum) + macroName <- manyChar (satisfy isAlphaNum) case macroName of "nop" -> return mempty "ie" -> lexConditional "ie" @@ -374,8 +373,8 @@ lexTable pos = do spaces opts <- try tableOptions <|> [] <$ optional (char ';') case lookup "tab" opts of - Just (c:_) -> modifyState $ \st -> st{ tableTabChar = c } - _ -> modifyState $ \st -> st{ tableTabChar = '\t' } + Just (T.uncons -> Just (c, _)) -> modifyState $ \st -> st{ tableTabChar = c } + _ -> modifyState $ \st -> st{ tableTabChar = '\t' } spaces skipMany lexComment spaces @@ -388,7 +387,7 @@ lexTable pos = do string ".TE" skipMany spacetab eofline - return $ singleTok $ Tbl opts (rows ++ concat morerows) pos + return $ singleTok $ Tbl opts (rows <> concat morerows) pos lexTableRows :: PandocMonad m => RoffLexer m [TableRow] lexTableRows = do @@ -428,11 +427,11 @@ tableOptions = many1 tableOption <* spaces <* char ';' tableOption :: PandocMonad m => RoffLexer m TableOption tableOption = do - k <- many1 letter + k <- many1Char letter v <- option "" $ try $ do skipMany spacetab char '(' - manyTill anyChar (char ')') + manyTillChar anyChar (char ')') skipMany spacetab optional (char ',' >> skipMany spacetab) return (k,v) @@ -444,7 +443,7 @@ tableFormatSpec = do let speclines = first:rest spaces char '.' - return $ speclines ++ repeat (lastDef [] speclines) -- last line is default + return $ speclines <> repeat (lastDef [] speclines) -- last line is default tableFormatSpecLine :: PandocMonad m => RoffLexer m [CellFormat] tableFormatSpecLine = @@ -456,19 +455,19 @@ tableColFormat = do $ True <$ try (string "|" <* notFollowedBy spacetab) c <- oneOf ['a','A','c','C','l','L','n','N','r','R','s','S','^','_','-', '=','|'] - suffixes <- many $ try (skipMany spacetab *> count 1 digit) <|> + suffixes <- many $ try (skipMany spacetab *> countChar 1 digit) <|> (do x <- oneOf ['b','B','d','D','e','E','f','F','i','I','m','M', 'p','P','t','T','u','U','v','V','w','W','x','X', 'z','Z'] num <- case toLower x of 'w' -> many1 digit <|> (do char '(' xs <- manyTill anyChar (char ')') - return ("(" ++ xs ++ ")")) <|> + return ("(" <> xs <> ")")) <|> return "" 'f' -> count 1 alphaNum <* skipMany spacetab 'm' -> count 1 alphaNum <* skipMany spacetab _ -> return "" - return $ x : num) + return $ T.pack $ x : num) pipeSuffix' <- option False $ True <$ string "|" return $ CellFormat { columnType = c @@ -479,7 +478,7 @@ tableColFormat = do -- We don't fully handle the conditional. But we do -- include everything under '.ie n', which occurs commonly -- in man pages. -lexConditional :: PandocMonad m => String -> RoffLexer m RoffTokens +lexConditional :: PandocMonad m => T.Text -> RoffLexer m RoffTokens lexConditional mname = do pos <- getPosition skipMany spacetab @@ -498,7 +497,7 @@ lexConditional mname = do case mbtest of Nothing -> do putState st -- reset state, so we don't record macros in skipped section - report $ SkippedContent ('.':mname) pos + report $ SkippedContent (T.cons '.' mname) pos return mempty Just True -> return ifPart Just False -> do @@ -508,7 +507,7 @@ lexConditional mname = do expression :: PandocMonad m => RoffLexer m (Maybe Bool) expression = do raw <- charsInBalanced '(' ')' (satisfy (/= '\n')) - <|> many1 nonspaceChar + <|> many1Char nonspaceChar returnValue $ case raw of "1" -> Just True @@ -533,17 +532,17 @@ lexIncludeFile args = do pos <- getPosition case args of (f:_) -> do - let fp = linePartsToString f + let fp = linePartsToText f dirs <- getResourcePath - result <- readFileFromDirs dirs fp + result <- readFileFromDirs dirs $ T.unpack fp case result of Nothing -> report $ CouldNotLoadIncludeFile fp pos - Just s -> getInput >>= setInput . (s ++) + Just s -> getInput >>= setInput . (s <>) return mempty [] -> return mempty resolveMacro :: PandocMonad m - => String -> [Arg] -> SourcePos -> RoffLexer m RoffTokens + => T.Text -> [Arg] -> SourcePos -> RoffLexer m RoffTokens resolveMacro macroName args pos = do macros <- customMacros <$> getState case M.lookup macroName macros of @@ -552,7 +551,7 @@ resolveMacro macroName args pos = do let fillLP (MacroArg i) zs = case drop (i - 1) args of [] -> zs - (ys:_) -> ys ++ zs + (ys:_) -> ys <> zs fillLP z zs = z : zs let fillMacroArg (TextLine lineparts) = TextLine (foldr fillLP [] lineparts) @@ -565,7 +564,7 @@ lexStringDef args = do -- string definition [] -> Prelude.fail "No argument to .ds" (x:ys) -> do let ts = singleTok $ TextLine (intercalate [RoffStr " " ] ys) - let stringName = linePartsToString x + let stringName = linePartsToText x modifyState $ \st -> st{ customMacros = M.insert stringName ts (customMacros st) } return mempty @@ -575,14 +574,14 @@ lexMacroDef args = do -- macro definition modifyState $ \st -> st{ roffMode = CopyMode } (macroName, stopMacro) <- case args of - (x : y : _) -> return (linePartsToString x, linePartsToString y) + (x : y : _) -> return (linePartsToText x, linePartsToText y) -- optional second arg - (x:_) -> return (linePartsToString x, ".") + (x:_) -> return (linePartsToText x, ".") [] -> Prelude.fail "No argument to .de" let stop = try $ do char '.' <|> char '\'' skipMany spacetab - string stopMacro + textStr stopMacro _ <- lexArgs return () ts <- mconcat <$> manyTill manToken stop @@ -628,7 +627,7 @@ lexArgs = do char '"' return [RoffStr "\""] -checkDefined :: PandocMonad m => String -> RoffLexer m [LinePart] +checkDefined :: PandocMonad m => T.Text -> RoffLexer m [LinePart] checkDefined name = do macros <- customMacros <$> getState case M.lookup name macros of @@ -638,19 +637,19 @@ checkDefined name = do escString :: PandocMonad m => RoffLexer m [LinePart] escString = try $ do pos <- getPosition - (do cs <- escapeArg <|> count 1 anyChar - resolveString cs pos) + (do cs <- escapeArg <|> countChar 1 anyChar + resolveText cs pos) <|> mempty <$ char 'S' -- strings and macros share namespace -resolveString :: PandocMonad m - => String -> SourcePos -> RoffLexer m [LinePart] -resolveString stringname pos = do +resolveText :: PandocMonad m + => T.Text -> SourcePos -> RoffLexer m [LinePart] +resolveText stringname pos = do RoffTokens ts <- resolveMacro stringname [] pos case Foldable.toList ts of [TextLine xs] -> return xs _ -> do - report $ SkippedContent ("unknown string " ++ stringname) pos + report $ SkippedContent ("unknown string " <> stringname) pos return mempty lexLine :: PandocMonad m => RoffLexer m RoffTokens @@ -688,16 +687,16 @@ macroArg = try $ do pos <- getPosition backslash char '$' - x <- escapeArg <|> count 1 digit + x <- escapeArg <|> countChar 1 digit case safeRead x of Just i -> return [MacroArg i] Nothing -> do - report $ SkippedContent ("illegal macro argument " ++ x) pos + report $ SkippedContent ("illegal macro argument " <> x) pos return [] regularText :: PandocMonad m => RoffLexer m [LinePart] regularText = do - s <- many1 $ noneOf "\n\r\t \\\"" + s <- many1Char $ noneOf "\n\r\t \\\"" return [RoffStr s] quoteChar :: PandocMonad m => RoffLexer m [LinePart] @@ -708,7 +707,7 @@ quoteChar = do spaceTabChar :: PandocMonad m => RoffLexer m [LinePart] spaceTabChar = do c <- spacetab - return [RoffStr [c]] + return [RoffStr $ T.singleton c] lexEmptyLine :: PandocMonad m => RoffLexer m RoffTokens lexEmptyLine = newline >> return (singleTok EmptyLine) @@ -716,8 +715,8 @@ lexEmptyLine = newline >> return (singleTok EmptyLine) manToken :: PandocMonad m => RoffLexer m RoffTokens manToken = lexComment <|> lexMacro <|> lexLine <|> lexEmptyLine -linePartsToString :: [LinePart] -> String -linePartsToString = mconcat . map go +linePartsToText :: [LinePart] -> T.Text +linePartsToText = mconcat . map go where go (RoffStr s) = s go _ = mempty @@ -726,7 +725,7 @@ linePartsToString = mconcat . map go lexRoff :: PandocMonad m => SourcePos -> T.Text -> m RoffTokens lexRoff pos txt = do eithertokens <- readWithM (do setPosition pos - mconcat <$> many manToken) def (T.unpack txt) + mconcat <$> many manToken) def txt case eithertokens of Left e -> throwError e Right tokenz -> return tokenz diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index 9796de4b9..d587bc41b 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RelaxedPolyRec #-} -- RelaxedPolyRec needed for inlinesBetween on GHC < 7 {- | @@ -31,7 +32,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed, nested) import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag) -import Text.Pandoc.Shared (crFilter) +import Text.Pandoc.Shared (crFilter, tshow) import Text.Pandoc.XML (fromEntities) -- | Read twiki from an input string and return a Pandoc document. @@ -41,19 +42,19 @@ readTWiki :: PandocMonad m -> m Pandoc readTWiki opts s = do res <- readWithM parseTWiki def{ stateOptions = opts } - (T.unpack (crFilter s) ++ "\n\n") + (crFilter s <> "\n\n") case res of Left e -> throwError e Right d -> return d -type TWParser = ParserT [Char] ParserState +type TWParser = ParserT Text ParserState -- -- utility functions -- -tryMsg :: String -> TWParser m a -> TWParser m a -tryMsg msg p = try p <?> msg +tryMsg :: Text -> TWParser m a -> TWParser m a +tryMsg msg p = try p <?> T.unpack msg nested :: PandocMonad m => TWParser m a -> TWParser m a nested p = do @@ -64,25 +65,25 @@ nested p = do updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } return res -htmlElement :: PandocMonad m => String -> TWParser m (Attr, String) +htmlElement :: PandocMonad m => Text -> TWParser m (Attr, Text) htmlElement tag = tryMsg tag $ do (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag []) - content <- manyTill anyChar (endtag <|> endofinput) + content <- T.pack <$> manyTill anyChar (endtag <|> endofinput) return (htmlAttrToPandoc attr, trim content) where endtag = void $ htmlTag (~== TagClose tag) endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof - trim = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse + trim = T.dropAround (=='\n') -htmlAttrToPandoc :: [Attribute String] -> Attr +htmlAttrToPandoc :: [Attribute Text] -> Attr htmlAttrToPandoc attrs = (ident, classes, keyvals) where ident = fromMaybe "" $ lookup "id" attrs - classes = maybe [] words $ lookup "class" attrs + classes = maybe [] T.words $ lookup "class" attrs keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] parseHtmlContentWithAttrs :: PandocMonad m - => String -> TWParser m a -> TWParser m (Attr, [a]) + => Text -> TWParser m a -> TWParser m (Attr, [a]) parseHtmlContentWithAttrs tag parser = do (attr, content) <- htmlElement tag parsedContent <- try $ parseContent content @@ -91,7 +92,13 @@ parseHtmlContentWithAttrs tag parser = do parseContent = parseFromString' $ nested $ manyTill parser endOfContent endOfContent = try $ skipMany blankline >> skipSpaces >> eof -parseHtmlContent :: PandocMonad m => String -> TWParser m a -> TWParser m [a] +parseCharHtmlContentWithAttrs :: PandocMonad m + => Text -> TWParser m Char -> TWParser m (Attr, Text) +parseCharHtmlContentWithAttrs tag = fmap go . parseHtmlContentWithAttrs tag + where + go (x, y) = (x, T.pack y) + +parseHtmlContent :: PandocMonad m => Text -> TWParser m a -> TWParser m [a] parseHtmlContent tag p = snd <$> parseHtmlContentWithAttrs tag p -- @@ -113,7 +120,7 @@ block = do <|> blockElements <|> para skipMany blankline - trace (take 60 $ show $ B.toList res) + trace (T.take 60 $ tshow $ B.toList res) return res blockElements :: PandocMonad m => TWParser m B.Blocks @@ -150,38 +157,38 @@ literal = rawBlock <$> htmlElement "literal" format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs rawBlock (attrs, content) = B.rawBlock (format attrs) content -list :: PandocMonad m => String -> TWParser m B.Blocks +list :: PandocMonad m => Text -> TWParser m B.Blocks list prefix = choice [ bulletList prefix , orderedList prefix , definitionList prefix] -definitionList :: PandocMonad m => String -> TWParser m B.Blocks +definitionList :: PandocMonad m => Text -> TWParser m B.Blocks definitionList prefix = tryMsg "definitionList" $ do - indent <- lookAhead $ string prefix *> many1 (string " ") <* string "$ " - elements <- many $ parseDefinitionListItem (prefix ++ concat indent) + indent <- lookAhead $ textStr prefix *> many1 (textStr " ") <* textStr "$ " + elements <- many $ parseDefinitionListItem (prefix <> T.concat indent) return $ B.definitionList elements where parseDefinitionListItem :: PandocMonad m - => String -> TWParser m (B.Inlines, [B.Blocks]) + => Text -> TWParser m (B.Inlines, [B.Blocks]) parseDefinitionListItem indent = do - string (indent ++ "$ ") >> skipSpaces + textStr (indent <> "$ ") >> skipSpaces term <- many1Till inline $ string ": " line <- listItemLine indent $ string "$ " return (mconcat term, [line]) -bulletList :: PandocMonad m => String -> TWParser m B.Blocks +bulletList :: PandocMonad m => Text -> TWParser m B.Blocks bulletList prefix = tryMsg "bulletList" $ parseList prefix (char '*') (char ' ') -orderedList :: PandocMonad m => String -> TWParser m B.Blocks +orderedList :: PandocMonad m => Text -> TWParser m B.Blocks orderedList prefix = tryMsg "orderedList" $ parseList prefix (oneOf "1iIaA") (string ". ") parseList :: PandocMonad m - => String -> TWParser m Char -> TWParser m a -> TWParser m B.Blocks + => Text -> TWParser m Char -> TWParser m a -> TWParser m B.Blocks parseList prefix marker delim = do - (indent, style) <- lookAhead $ string prefix *> listStyle <* delim - blocks <- many $ parseListItem (prefix ++ indent) (char style <* delim) + (indent, style) <- lookAhead $ textStr prefix *> listStyle <* delim + blocks <- many $ parseListItem (prefix <> indent) (char style <* delim) return $ case style of '1' -> B.orderedListWith (1, DefaultStyle, DefaultDelim) blocks 'i' -> B.orderedListWith (1, LowerRoman, DefaultDelim) blocks @@ -191,24 +198,24 @@ parseList prefix marker delim = do _ -> B.bulletList blocks where listStyle = do - indent <- many1 $ string " " + indent <- many1 $ textStr " " style <- marker - return (concat indent, style) + return (T.concat indent, style) parseListItem :: (PandocMonad m, Show a) - => String -> TWParser m a -> TWParser m B.Blocks -parseListItem prefix marker = string prefix >> marker >> listItemLine prefix marker + => Text -> TWParser m a -> TWParser m B.Blocks +parseListItem prefix marker = textStr prefix >> marker >> listItemLine prefix marker listItemLine :: (PandocMonad m, Show a) - => String -> TWParser m a -> TWParser m B.Blocks + => Text -> TWParser m a -> TWParser m B.Blocks listItemLine prefix marker = mconcat <$> (lineContent >>= parseContent) where lineContent = do content <- anyLine continuation <- optionMaybe listContinuation - return $ filterSpaces content ++ "\n" ++ maybe "" (" " ++) continuation - filterSpaces = reverse . dropWhile (== ' ') . reverse - listContinuation = notFollowedBy (string prefix >> marker) >> + return $ filterSpaces content <> "\n" <> maybe "" (" " <>) continuation + filterSpaces = T.dropWhileEnd (== ' ') + listContinuation = notFollowedBy (textStr prefix >> marker) >> string " " >> lineContent parseContent = parseFromString' $ many1 $ nestedList <|> parseInline parseInline = (B.plain . mconcat) <$> many1Till inline (lastNewline <|> newlineBeforeNestedList) @@ -352,29 +359,29 @@ macroWithParameters = try $ do char '%' return $ buildSpan name kvs $ B.str content -buildSpan :: String -> [(String, String)] -> B.Inlines -> B.Inlines +buildSpan :: Text -> [(Text, Text)] -> B.Inlines -> B.Inlines buildSpan className kvs = B.spanWith attrs where attrs = ("", ["twiki-macro", className] ++ additionalClasses, kvsWithoutClasses) - additionalClasses = maybe [] words $ lookup "class" kvs + additionalClasses = maybe [] T.words $ lookup "class" kvs kvsWithoutClasses = [(k,v) | (k,v) <- kvs, k /= "class"] -macroName :: PandocMonad m => TWParser m String +macroName :: PandocMonad m => TWParser m Text macroName = do first <- letter rest <- many $ alphaNum <|> char '_' - return (first:rest) + return $ T.pack $ first:rest -attributes :: PandocMonad m => TWParser m (String, [(String, String)]) -attributes = foldr (either mkContent mkKvs) ([], []) +attributes :: PandocMonad m => TWParser m (Text, [(Text, Text)]) +attributes = foldr (either mkContent mkKvs) ("", []) <$> (char '{' *> spnl *> many (attribute <* spnl) <* char '}') where spnl = skipMany (spaceChar <|> newline) - mkContent c ([], kvs) = (c, kvs) - mkContent c (rest, kvs) = (c ++ " " ++ rest, kvs) + mkContent c ("", kvs) = (c, kvs) + mkContent c (rest, kvs) = (c <> " " <> rest, kvs) mkKvs kv (cont, rest) = (cont, kv : rest) -attribute :: PandocMonad m => TWParser m (Either String (String, String)) +attribute :: PandocMonad m => TWParser m (Either Text (Text, Text)) attribute = withKey <|> withoutKey where withKey = try $ do @@ -383,10 +390,10 @@ attribute = withKey <|> withoutKey curry Right key <$> parseValue False withoutKey = try $ Left <$> parseValue True parseValue allowSpaces = fromEntities <$> (withQuotes <|> withoutQuotes allowSpaces) - withQuotes = between (char '"') (char '"') (\_ -> count 1 $ noneOf ['"']) + withQuotes = between (char '"') (char '"') (\_ -> countChar 1 $ noneOf ['"']) withoutQuotes allowSpaces - | allowSpaces = many1 $ noneOf "}" - | otherwise = many1 $ noneOf " }" + | allowSpaces = many1Char $ noneOf "}" + | otherwise = many1Char $ noneOf " }" nestedInlines :: (Show a, PandocMonad m) => TWParser m a -> TWParser m B.Inlines @@ -413,10 +420,10 @@ emphHtml :: PandocMonad m => TWParser m B.Inlines emphHtml = B.emph . mconcat <$> (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline) nestedString :: (Show a, PandocMonad m) - => TWParser m a -> TWParser m String -nestedString end = innerSpace <|> count 1 nonspaceChar + => TWParser m a -> TWParser m Text +nestedString end = innerSpace <|> countChar 1 nonspaceChar where - innerSpace = try $ many1 spaceChar <* notFollowedBy end + innerSpace = try $ many1Char spaceChar <* notFollowedBy end boldCode :: PandocMonad m => TWParser m B.Inlines boldCode = try $ (B.strong . B.code . fromEntities) <$> enclosed (string "==") nestedString @@ -429,7 +436,7 @@ code = try $ (B.code . fromEntities) <$> enclosed (char '=') nestedString codeHtml :: PandocMonad m => TWParser m B.Inlines codeHtml = do - (attrs, content) <- parseHtmlContentWithAttrs "code" anyChar + (attrs, content) <- parseCharHtmlContentWithAttrs "code" anyChar return $ B.codeWith attrs $ fromEntities content autoLink :: PandocMonad m => TWParser m B.Inlines @@ -437,7 +444,7 @@ autoLink = try $ do state <- getState guard $ stateAllowLinks state (text, url) <- parseLink - guard $ checkLink (last url) + guard $ checkLink (T.last url) return $ makeLink (text, url) where parseLink = notFollowedBy nop >> (uri <|> emailAddress) @@ -447,17 +454,17 @@ autoLink = try $ do | otherwise = isAlphaNum c str :: PandocMonad m => TWParser m B.Inlines -str = B.str <$> (many1 alphaNum <|> count 1 characterReference) +str = B.str <$> (many1Char alphaNum <|> countChar 1 characterReference) nop :: PandocMonad m => TWParser m B.Inlines nop = try $ (void exclamation <|> void nopTag) >> followContent where exclamation = char '!' nopTag = stringAnyCase "<nop>" - followContent = B.str . fromEntities <$> many1 nonspaceChar + followContent = B.str . fromEntities <$> many1Char nonspaceChar symbol :: PandocMonad m => TWParser m B.Inlines -symbol = B.str <$> count 1 nonspaceChar +symbol = B.str <$> countChar 1 nonspaceChar smart :: PandocMonad m => TWParser m B.Inlines smart = do @@ -491,13 +498,13 @@ link = try $ do setState $ st{ stateAllowLinks = True } return $ B.link url title content -linkText :: PandocMonad m => TWParser m (String, String, B.Inlines) +linkText :: PandocMonad m => TWParser m (Text, Text, B.Inlines) linkText = do string "[[" - url <- many1Till anyChar (char ']') + url <- T.pack <$> many1Till anyChar (char ']') content <- option (B.str url) (mconcat <$> linkContent) char ']' return (url, "", content) where - linkContent = char '[' >> many1Till anyChar (char ']') >>= parseLinkContent + linkContent = char '[' >> many1Till anyChar (char ']') >>= parseLinkContent . T.pack parseLinkContent = parseFromString' $ many1 inline diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index a638fdf40..5e7aaf910 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -1,4 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.Textile Copyright : Copyright (C) 2010-2012 Paul Rivier @@ -38,7 +40,7 @@ import Prelude import Control.Monad (guard, liftM) import Control.Monad.Except (throwError) import Data.Char (digitToInt, isUpper) -import Data.List (intercalate, intersperse, transpose) +import Data.List (intersperse, transpose) import Data.Text (Text) import qualified Data.Text as T import Text.HTML.TagSoup (Tag (..), fromAttrib) @@ -52,7 +54,7 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag) import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline) -import Text.Pandoc.Shared (crFilter, trim, underlineSpan) +import Text.Pandoc.Shared (crFilter, trim, underlineSpan, tshow) -- | Parse a Textile text and return a Pandoc document. readTextile :: PandocMonad m @@ -61,21 +63,21 @@ readTextile :: PandocMonad m -> m Pandoc readTextile opts s = do parsed <- readWithM parseTextile def{ stateOptions = opts } - (T.unpack (crFilter s) ++ "\n\n") + (crFilter s <> "\n\n") case parsed of Right result -> return result Left e -> throwError e -- | Generate a Pandoc ADT from a textile document -parseTextile :: PandocMonad m => ParserT [Char] ParserState m Pandoc +parseTextile :: PandocMonad m => ParserT Text ParserState m Pandoc parseTextile = do many blankline startPos <- getPosition -- go through once just to get list of reference keys and notes -- docMinusKeys is the raw document with blanks where the keys/notes were... let firstPassParser = noteBlock <|> lineClump - manyTill firstPassParser eof >>= setInput . concat + manyTill firstPassParser eof >>= setInput . T.concat setPosition startPos st' <- getState let reversedNotes = stateNotes st' @@ -84,29 +86,29 @@ parseTextile = do blocks <- parseBlocks return $ Pandoc nullMeta (B.toList blocks) -- FIXME -noteMarker :: PandocMonad m => ParserT [Char] ParserState m [Char] -noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.') +noteMarker :: PandocMonad m => ParserT Text ParserState m Text +noteMarker = skipMany spaceChar >> string "fn" >> T.pack <$> manyTill digit (char '.') -noteBlock :: PandocMonad m => ParserT [Char] ParserState m [Char] +noteBlock :: PandocMonad m => ParserT Text ParserState m Text noteBlock = try $ do startPos <- getPosition ref <- noteMarker optional blankline - contents <- unlines <$> many1Till anyLine (blanklines <|> noteBlock) + contents <- T.unlines <$> many1Till anyLine (blanklines <|> noteBlock) endPos <- getPosition - let newnote = (ref, contents ++ "\n") + let newnote = (ref, contents <> "\n") st <- getState let oldnotes = stateNotes st updateState $ \s -> s { stateNotes = newnote : oldnotes } -- return blanks so line count isn't affected - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' + return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n" -- | Parse document blocks -parseBlocks :: PandocMonad m => ParserT [Char] ParserState m Blocks +parseBlocks :: PandocMonad m => ParserT Text ParserState m Blocks parseBlocks = mconcat <$> manyTill block eof -- | Block parsers list tried in definition order -blockParsers :: PandocMonad m => [ParserT [Char] ParserState m Blocks] +blockParsers :: PandocMonad m => [ParserT Text ParserState m Blocks] blockParsers = [ codeBlock , header , blockQuote @@ -121,22 +123,22 @@ blockParsers = [ codeBlock ] -- | Any block in the order of definition of blockParsers -block :: PandocMonad m => ParserT [Char] ParserState m Blocks +block :: PandocMonad m => ParserT Text ParserState m Blocks block = do res <- choice blockParsers <?> "block" - trace (take 60 $ show $ B.toList res) + trace (T.take 60 $ tshow $ B.toList res) return res -commentBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks +commentBlock :: PandocMonad m => ParserT Text ParserState m Blocks commentBlock = try $ do string "###." manyTill anyLine blanklines return mempty -codeBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks +codeBlock :: PandocMonad m => ParserT Text ParserState m Blocks codeBlock = codeBlockBc <|> codeBlockPre -codeBlockBc :: PandocMonad m => ParserT [Char] ParserState m Blocks +codeBlockBc :: PandocMonad m => ParserT Text ParserState m Blocks codeBlockBc = try $ do string "bc." extended <- option False (True <$ char '.') @@ -150,31 +152,31 @@ codeBlockBc = try $ do rest <- many (notFollowedBy ender *> anyLine) return (f:rest) else manyTill anyLine blanklines - return $ B.codeBlock (trimTrailingNewlines (unlines contents)) + return $ B.codeBlock (trimTrailingNewlines (T.unlines contents)) -trimTrailingNewlines :: String -> String -trimTrailingNewlines = reverse . dropWhile (=='\n') . reverse +trimTrailingNewlines :: Text -> Text +trimTrailingNewlines = T.dropWhileEnd (=='\n') -- | Code Blocks in Textile are between <pre> and </pre> -codeBlockPre :: PandocMonad m => ParserT [Char] ParserState m Blocks +codeBlockPre :: PandocMonad m => ParserT Text ParserState m Blocks codeBlockPre = try $ do (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True)) - result' <- manyTill anyChar (htmlTag (tagClose (=="pre"))) + result' <- T.pack <$> manyTill anyChar (htmlTag (tagClose (=="pre"))) -- drop leading newline if any - let result'' = case result' of - '\n':xs -> xs - _ -> result' + let result'' = case T.uncons result' of + Just ('\n', xs) -> xs + _ -> result' -- drop trailing newline if any - let result''' = case reverse result'' of - '\n':_ -> init result'' - _ -> result'' - let classes = words $ fromAttrib "class" t + let result''' = case T.unsnoc result'' of + Just (xs, '\n') -> xs + _ -> result'' + let classes = T.words $ fromAttrib "class" t let ident = fromAttrib "id" t let kvs = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] return $ B.codeBlockWith (ident,classes,kvs) result''' -- | Header of the form "hN. content" with N in 1..6 -header :: PandocMonad m => ParserT [Char] ParserState m Blocks +header :: PandocMonad m => ParserT Text ParserState m Blocks header = try $ do char 'h' level <- digitToInt <$> oneOf "123456" @@ -186,14 +188,14 @@ header = try $ do return $ B.headerWith attr' level name -- | Blockquote of the form "bq. content" -blockQuote :: PandocMonad m => ParserT [Char] ParserState m Blocks +blockQuote :: PandocMonad m => ParserT Text ParserState m Blocks blockQuote = try $ do string "bq" >> attributes >> char '.' >> whitespace B.blockQuote <$> para -- Horizontal rule -hrule :: PandocMonad m => ParserT [Char] st m Blocks +hrule :: PandocMonad m => ParserT Text st m Blocks hrule = try $ do skipSpaces start <- oneOf "-*" @@ -208,39 +210,39 @@ hrule = try $ do -- | Can be a bullet list or an ordered list. This implementation is -- strict in the nesting, sublist must start at exactly "parent depth -- plus one" -anyList :: PandocMonad m => ParserT [Char] ParserState m Blocks +anyList :: PandocMonad m => ParserT Text ParserState m Blocks anyList = try $ anyListAtDepth 1 <* blanklines -- | This allow one type of list to be nested into an other type, -- provided correct nesting -anyListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks +anyListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks anyListAtDepth depth = choice [ bulletListAtDepth depth, orderedListAtDepth depth, definitionList ] -- | Bullet List of given depth, depth being the number of leading '*' -bulletListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks +bulletListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth depth) -- | Bullet List Item of given depth, depth being the number of -- leading '*' -bulletListItemAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks +bulletListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks bulletListItemAtDepth = genericListItemAtDepth '*' -- | Ordered List of given depth, depth being the number of -- leading '#' -orderedListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks +orderedListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks orderedListAtDepth depth = try $ do items <- many1 (orderedListItemAtDepth depth) return $ B.orderedList items -- | Ordered List Item of given depth, depth being the number of -- leading '#' -orderedListItemAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks +orderedListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks orderedListItemAtDepth = genericListItemAtDepth '#' -- | Common implementation of list items -genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT [Char] ParserState m Blocks +genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT Text ParserState m Blocks genericListItemAtDepth c depth = try $ do count depth (char c) >> attributes >> whitespace contents <- mconcat <$> many ((B.plain . mconcat <$> many1 inline) <|> @@ -250,25 +252,25 @@ genericListItemAtDepth c depth = try $ do return $ contents <> sublist -- | A definition list is a set of consecutive definition items -definitionList :: PandocMonad m => ParserT [Char] ParserState m Blocks +definitionList :: PandocMonad m => ParserT Text ParserState m Blocks definitionList = try $ B.definitionList <$> many1 definitionListItem -- | List start character. -listStart :: PandocMonad m => ParserT [Char] ParserState m () +listStart :: PandocMonad m => ParserT Text ParserState m () listStart = genericListStart '*' <|> () <$ genericListStart '#' <|> () <$ definitionListStart -genericListStart :: PandocMonad m => Char -> ParserT [Char] st m () +genericListStart :: PandocMonad m => Char -> ParserT Text st m () genericListStart c = () <$ try (many1 (char c) >> whitespace) -basicDLStart :: PandocMonad m => ParserT [Char] ParserState m () +basicDLStart :: PandocMonad m => ParserT Text ParserState m () basicDLStart = do char '-' whitespace notFollowedBy newline -definitionListStart :: PandocMonad m => ParserT [Char] ParserState m Inlines +definitionListStart :: PandocMonad m => ParserT Text ParserState m Inlines definitionListStart = try $ do basicDLStart trimInlines . mconcat <$> @@ -281,26 +283,26 @@ definitionListStart = try $ do -- the term defined, then spaces and ":=". The definition follows, on -- the same single line, or spaned on multiple line, after a line -- break. -definitionListItem :: PandocMonad m => ParserT [Char] ParserState m (Inlines, [Blocks]) +definitionListItem :: PandocMonad m => ParserT Text ParserState m (Inlines, [Blocks]) definitionListItem = try $ do term <- (mconcat . intersperse B.linebreak) <$> many1 definitionListStart def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef) return (term, def') - where inlineDef :: PandocMonad m => ParserT [Char] ParserState m [Blocks] + where inlineDef :: PandocMonad m => ParserT Text ParserState m [Blocks] inlineDef = liftM (\d -> [B.plain d]) $ optional whitespace >> (trimInlines . mconcat <$> many inline) <* newline - multilineDef :: PandocMonad m => ParserT [Char] ParserState m [Blocks] + multilineDef :: PandocMonad m => ParserT Text ParserState m [Blocks] multilineDef = try $ do optional whitespace >> newline - s <- many1Till anyChar (try (string "=:" >> newline)) - -- this ++ "\n\n" does not look very good - ds <- parseFromString' parseBlocks (s ++ "\n\n") + s <- T.pack <$> many1Till anyChar (try (string "=:" >> newline)) + -- this <> "\n\n" does not look very good + ds <- parseFromString' parseBlocks (s <> "\n\n") return [ds] -- raw content -- | A raw Html Block, optionally followed by blanklines -rawHtmlBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks +rawHtmlBlock :: PandocMonad m => ParserT Text ParserState m Blocks rawHtmlBlock = try $ do skipMany spaceChar (_,b) <- htmlTag isBlockTag @@ -308,14 +310,14 @@ rawHtmlBlock = try $ do return $ B.rawBlock "html" b -- | Raw block of LaTeX content -rawLaTeXBlock' :: PandocMonad m => ParserT [Char] ParserState m Blocks +rawLaTeXBlock' :: PandocMonad m => ParserT Text ParserState m Blocks rawLaTeXBlock' = do guardEnabled Ext_raw_tex B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces) -- | In textile, paragraphs are separated by blank lines. -para :: PandocMonad m => ParserT [Char] ParserState m Blocks +para :: PandocMonad m => ParserT Text ParserState m Blocks para = B.para . trimInlines . mconcat <$> many1 inline -- Tables @@ -326,7 +328,7 @@ toAlignment '>' = AlignRight toAlignment '=' = AlignCenter toAlignment _ = AlignDefault -cellAttributes :: PandocMonad m => ParserT [Char] ParserState m (Bool, Alignment) +cellAttributes :: PandocMonad m => ParserT Text ParserState m (Bool, Alignment) cellAttributes = try $ do isHeader <- option False (True <$ char '_') -- we just ignore colspan and rowspan markers: @@ -339,18 +341,18 @@ cellAttributes = try $ do return (isHeader, alignment) -- | A table cell spans until a pipe | -tableCell :: PandocMonad m => ParserT [Char] ParserState m ((Bool, Alignment), Blocks) +tableCell :: PandocMonad m => ParserT Text ParserState m ((Bool, Alignment), Blocks) tableCell = try $ do char '|' (isHeader, alignment) <- option (False, AlignDefault) cellAttributes notFollowedBy blankline - raw <- trim <$> + raw <- trim . T.pack <$> many (noneOf "|\n" <|> try (char '\n' <* notFollowedBy blankline)) content <- mconcat <$> parseFromString' (many inline) raw return ((isHeader, alignment), B.plain content) -- | A table row is made of many table cells -tableRow :: PandocMonad m => ParserT [Char] ParserState m [((Bool, Alignment), Blocks)] +tableRow :: PandocMonad m => ParserT Text ParserState m [((Bool, Alignment), Blocks)] tableRow = try $ do -- skip optional row attributes optional $ try $ do @@ -360,7 +362,7 @@ tableRow = try $ do many1 tableCell <* char '|' <* blankline -- | A table with an optional header. -table :: PandocMonad m => ParserT [Char] ParserState m Blocks +table :: PandocMonad m => ParserT Text ParserState m Blocks table = try $ do -- ignore table attributes caption <- option mempty $ try $ do @@ -384,7 +386,7 @@ table = try $ do (map (map snd) rows) -- | Ignore markers for cols, thead, tfoot. -ignorableRow :: PandocMonad m => ParserT [Char] ParserState m () +ignorableRow :: PandocMonad m => ParserT Text ParserState m () ignorableRow = try $ do char '|' oneOf ":^-~" @@ -393,9 +395,9 @@ ignorableRow = try $ do _ <- anyLine return () -explicitBlockStart :: PandocMonad m => String -> ParserT [Char] ParserState m () +explicitBlockStart :: PandocMonad m => Text -> ParserT Text ParserState m () explicitBlockStart name = try $ do - string name + string (T.unpack name) attributes char '.' optional whitespace @@ -404,9 +406,9 @@ explicitBlockStart name = try $ do -- | Blocks like 'p' and 'table' do not need explicit block tag. -- However, they can be used to set HTML/CSS attributes when needed. maybeExplicitBlock :: PandocMonad m - => String -- ^ block tag name - -> ParserT [Char] ParserState m Blocks -- ^ implicit block - -> ParserT [Char] ParserState m Blocks + => Text -- ^ block tag name + -> ParserT Text ParserState m Blocks -- ^ implicit block + -> ParserT Text ParserState m Blocks maybeExplicitBlock name blk = try $ do optional $ explicitBlockStart name blk @@ -419,11 +421,11 @@ maybeExplicitBlock name blk = try $ do -- | Any inline element -inline :: PandocMonad m => ParserT [Char] ParserState m Inlines +inline :: PandocMonad m => ParserT Text ParserState m Inlines inline = choice inlineParsers <?> "inline" -- | Inline parsers tried in order -inlineParsers :: PandocMonad m => [ParserT [Char] ParserState m Inlines] +inlineParsers :: PandocMonad m => [ParserT Text ParserState m Inlines] inlineParsers = [ str , whitespace , endline @@ -437,13 +439,13 @@ inlineParsers = [ str , link , image , mark - , (B.str . (:[])) <$> characterReference + , (B.str . T.singleton) <$> characterReference , smartPunctuation inline , symbol ] -- | Inline markups -inlineMarkup :: PandocMonad m => ParserT [Char] ParserState m Inlines +inlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines inlineMarkup = choice [ simpleInline (string "??") (B.cite []) , simpleInline (string "**") B.strong , simpleInline (string "__") B.emph @@ -457,33 +459,33 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite []) ] -- | Trademark, registered, copyright -mark :: PandocMonad m => ParserT [Char] st m Inlines +mark :: PandocMonad m => ParserT Text st m Inlines mark = try $ char '(' >> (try tm <|> try reg <|> copy) -reg :: PandocMonad m => ParserT [Char] st m Inlines +reg :: PandocMonad m => ParserT Text st m Inlines reg = do oneOf "Rr" char ')' return $ B.str "\174" -tm :: PandocMonad m => ParserT [Char] st m Inlines +tm :: PandocMonad m => ParserT Text st m Inlines tm = do oneOf "Tt" oneOf "Mm" char ')' return $ B.str "\8482" -copy :: PandocMonad m => ParserT [Char] st m Inlines +copy :: PandocMonad m => ParserT Text st m Inlines copy = do oneOf "Cc" char ')' return $ B.str "\169" -note :: PandocMonad m => ParserT [Char] ParserState m Inlines +note :: PandocMonad m => ParserT Text ParserState m Inlines note = try $ do ref <- char '[' *> many1 digit <* char ']' notes <- stateNotes <$> getState - case lookup ref notes of + case lookup (T.pack ref) notes of Nothing -> Prelude.fail "note not found" Just raw -> B.note <$> parseFromString' parseBlocks raw @@ -500,42 +502,42 @@ stringBreakers :: [Char] stringBreakers = " \t\n\r.,\"'?!;:<>«»„“”‚‘’()[]" wordBoundaries :: [Char] -wordBoundaries = markupChars ++ stringBreakers +wordBoundaries = markupChars <> stringBreakers -- | Parse a hyphened sequence of words -hyphenedWords :: PandocMonad m => ParserT [Char] ParserState m String +hyphenedWords :: PandocMonad m => ParserT Text ParserState m Text hyphenedWords = do x <- wordChunk xs <- many (try $ char '-' >> wordChunk) - return $ intercalate "-" (x:xs) + return $ T.intercalate "-" (x:xs) -wordChunk :: PandocMonad m => ParserT [Char] ParserState m String +wordChunk :: PandocMonad m => ParserT Text ParserState m Text wordChunk = try $ do hd <- noneOf wordBoundaries tl <- many ( noneOf wordBoundaries <|> try (notFollowedBy' note *> oneOf markupChars <* lookAhead (noneOf wordBoundaries) ) ) - return $ hd:tl + return $ T.pack $ hd:tl -- | Any string -str :: PandocMonad m => ParserT [Char] ParserState m Inlines +str :: PandocMonad m => ParserT Text ParserState m Inlines str = do baseStr <- hyphenedWords -- RedCloth compliance : if parsed word is uppercase and immediately -- followed by parens, parens content is unconditionally word acronym fullStr <- option baseStr $ try $ do - guard $ all isUpper baseStr - acro <- enclosed (char '(') (char ')') anyChar' - return $ concat [baseStr, " (", acro, ")"] + guard $ T.all isUpper baseStr + acro <- T.pack <$> enclosed (char '(') (char ')') anyChar' + return $ T.concat [baseStr, " (", acro, ")"] updateLastStrPos return $ B.str fullStr -- | Some number of space chars -whitespace :: PandocMonad m => ParserT [Char] st m Inlines +whitespace :: PandocMonad m => ParserT Text st m Inlines whitespace = many1 spaceChar >> return B.space <?> "whitespace" -- | In Textile, an isolated endline character is a line break -endline :: PandocMonad m => ParserT [Char] ParserState m Inlines +endline :: PandocMonad m => ParserT Text ParserState m Inlines endline = try $ do newline notFollowedBy blankline @@ -543,18 +545,18 @@ endline = try $ do notFollowedBy rawHtmlBlock return B.linebreak -rawHtmlInline :: PandocMonad m => ParserT [Char] ParserState m Inlines +rawHtmlInline :: PandocMonad m => ParserT Text ParserState m Inlines rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag -- | Raw LaTeX Inline -rawLaTeXInline' :: PandocMonad m => ParserT [Char] ParserState m Inlines +rawLaTeXInline' :: PandocMonad m => ParserT Text ParserState m Inlines rawLaTeXInline' = try $ do guardEnabled Ext_raw_tex B.rawInline "latex" <$> rawLaTeXInline -- | Textile standard link syntax is "label":target. But we -- can also have ["label":target]. -link :: PandocMonad m => ParserT [Char] ParserState m Inlines +link :: PandocMonad m => ParserT Text ParserState m Inlines link = try $ do bracketed <- (True <$ char '[') <|> return False char '"' *> notFollowedBy (oneOf " \t\n\r") @@ -567,121 +569,122 @@ link = try $ do else lookAhead $ space <|> eof' <|> try (oneOf "!.,;:" *> (space <|> newline <|> eof')) - url <- many1Till nonspaceChar stop + url <- T.pack <$> many1Till nonspaceChar stop let name' = if B.toList name == [Str "$"] then B.str url else name return $ if attr == nullAttr then B.link url "" name' else B.spanWith attr $ B.link url "" name' -- | image embedding -image :: PandocMonad m => ParserT [Char] ParserState m Inlines +image :: PandocMonad m => ParserT Text ParserState m Inlines image = try $ do char '!' >> notFollowedBy space (ident, cls, kvs) <- attributes let attr = case lookup "style" kvs of Just stls -> (ident, cls, pickStylesToKVs ["width", "height"] stls) Nothing -> (ident, cls, kvs) - src <- many1 (noneOf " \t\n\r!(") - alt <- option "" $ try $ char '(' *> manyTill anyChar (char ')') + src <- T.pack <$> many1 (noneOf " \t\n\r!(") + alt <- fmap T.pack $ option "" $ try $ char '(' *> manyTill anyChar (char ')') char '!' return $ B.imageWith attr src alt (B.str alt) -escapedInline :: PandocMonad m => ParserT [Char] ParserState m Inlines +escapedInline :: PandocMonad m => ParserT Text ParserState m Inlines escapedInline = escapedEqs <|> escapedTag -escapedEqs :: PandocMonad m => ParserT [Char] ParserState m Inlines -escapedEqs = B.str <$> +escapedEqs :: PandocMonad m => ParserT Text ParserState m Inlines +escapedEqs = B.str . T.pack <$> try (string "==" *> manyTill anyChar' (try $ string "==")) -- | literal text escaped btw <notextile> tags -escapedTag :: PandocMonad m => ParserT [Char] ParserState m Inlines -escapedTag = B.str <$> +escapedTag :: PandocMonad m => ParserT Text ParserState m Inlines +escapedTag = B.str . T.pack <$> try (string "<notextile>" *> manyTill anyChar' (try $ string "</notextile>")) -- | Any special symbol defined in wordBoundaries -symbol :: PandocMonad m => ParserT [Char] ParserState m Inlines -symbol = B.str . singleton <$> (notFollowedBy newline *> - notFollowedBy rawHtmlBlock *> - oneOf wordBoundaries) +symbol :: PandocMonad m => ParserT Text ParserState m Inlines +symbol = B.str . T.singleton <$> (notFollowedBy newline *> + notFollowedBy rawHtmlBlock *> + oneOf wordBoundaries) -- | Inline code -code :: PandocMonad m => ParserT [Char] ParserState m Inlines +code :: PandocMonad m => ParserT Text ParserState m Inlines code = code1 <|> code2 -- any character except a newline before a blank line -anyChar' :: PandocMonad m => ParserT [Char] ParserState m Char +anyChar' :: PandocMonad m => ParserT Text ParserState m Char anyChar' = satisfy (/='\n') <|> try (char '\n' <* notFollowedBy blankline) -code1 :: PandocMonad m => ParserT [Char] ParserState m Inlines -code1 = B.code <$> surrounded (char '@') anyChar' +code1 :: PandocMonad m => ParserT Text ParserState m Inlines +code1 = B.code . T.pack <$> surrounded (char '@') anyChar' -code2 :: PandocMonad m => ParserT [Char] ParserState m Inlines +code2 :: PandocMonad m => ParserT Text ParserState m Inlines code2 = do htmlTag (tagOpen (=="tt") null) - B.code <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt")) + B.code . T.pack <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt")) -- | Html / CSS attributes -attributes :: PandocMonad m => ParserT [Char] ParserState m Attr +attributes :: PandocMonad m => ParserT Text ParserState m Attr attributes = foldl (flip ($)) ("",[],[]) <$> try (do special <- option id specialAttribute attrs <- many attribute return (special : attrs)) -specialAttribute :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) +specialAttribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) specialAttribute = do alignStr <- ("center" <$ char '=') <|> ("justify" <$ try (string "<>")) <|> ("right" <$ char '>') <|> ("left" <$ char '<') notFollowedBy spaceChar - return $ addStyle ("text-align:" ++ alignStr) + return $ addStyle $ T.pack $ "text-align:" ++ alignStr -attribute :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) +attribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) attribute = try $ (classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar -classIdAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) +classIdAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) classIdAttr = try $ do -- (class class #id) char '(' - ws <- words `fmap` manyTill anyChar' (char ')') + ws <- T.words `fmap` T.pack <$> manyTill anyChar' (char ')') case reverse ws of - [] -> return $ \(_,_,keyvals) -> ("",[],keyvals) - (('#':ident'):classes') -> return $ \(_,_,keyvals) -> - (ident',classes',keyvals) - classes' -> return $ \(_,_,keyvals) -> - ("",classes',keyvals) - -styleAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) + [] + -> return $ \(_,_,keyvals) -> ("",[],keyvals) + ((T.uncons -> Just ('#', ident')):classes') + -> return $ \(_,_,keyvals) -> (ident',classes',keyvals) + classes' + -> return $ \(_,_,keyvals) -> ("",classes',keyvals) + +styleAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) styleAttr = do style <- try $ enclosed (char '{') (char '}') anyChar' - return $ addStyle style + return $ addStyle $ T.pack style -addStyle :: String -> Attr -> Attr +addStyle :: Text -> Attr -> Attr addStyle style (id',classes,keyvals) = (id',classes,keyvals') where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"] - style' = style ++ ";" ++ concat [v | ("style",v) <- keyvals] + style' = style <> ";" <> T.concat [v | ("style",v) <- keyvals] -langAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) +langAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) langAttr = do lang <- try $ enclosed (char '[') (char ']') alphaNum - return $ \(id',classes,keyvals) -> (id',classes,("lang",lang):keyvals) + return $ \(id',classes,keyvals) -> (id',classes,("lang",T.pack lang):keyvals) -- | Parses material surrounded by a parser. surrounded :: (PandocMonad m, Show t) - => ParserT [Char] st m t -- ^ surrounding parser - -> ParserT [Char] st m a -- ^ content parser (to be used repeatedly) - -> ParserT [Char] st m [a] + => ParserT Text st m t -- ^ surrounding parser + -> ParserT Text st m a -- ^ content parser (to be used repeatedly) + -> ParserT Text st m [a] surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border) simpleInline :: PandocMonad m - => ParserT [Char] ParserState m t -- ^ surrounding parser + => ParserT Text ParserState m t -- ^ surrounding parser -> (Inlines -> Inlines) -- ^ Inline constructor - -> ParserT [Char] ParserState m Inlines -- ^ content parser (to be used repeatedly) + -> ParserT Text ParserState m Inlines -- ^ content parser (to be used repeatedly) simpleInline border construct = try $ do notAfterString border *> notFollowedBy (oneOf " \t\n\r") @@ -695,7 +698,7 @@ simpleInline border construct = try $ do then body else B.spanWith attr body -groupedInlineMarkup :: PandocMonad m => ParserT [Char] ParserState m Inlines +groupedInlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines groupedInlineMarkup = try $ do char '[' sp1 <- option mempty $ B.space <$ whitespace @@ -704,9 +707,5 @@ groupedInlineMarkup = try $ do char ']' return $ sp1 <> result <> sp2 --- | Create a singleton list -singleton :: a -> [a] -singleton x = [x] - -eof' :: Monad m => ParserT [Char] s m Char +eof' :: Monad m => ParserT Text s m Char eof' = '\n' <$ eof diff --git a/src/Text/Pandoc/Readers/TikiWiki.hs b/src/Text/Pandoc/Readers/TikiWiki.hs index 5daf6b0bb..501c204f5 100644 --- a/src/Text/Pandoc/Readers/TikiWiki.hs +++ b/src/Text/Pandoc/Readers/TikiWiki.hs @@ -43,19 +43,19 @@ readTikiWiki :: PandocMonad m -> m Pandoc readTikiWiki opts s = do res <- readWithM parseTikiWiki def{ stateOptions = opts } - (T.unpack (crFilter s) ++ "\n\n") + (crFilter s <> "\n\n") case res of Left e -> throwError e Right d -> return d -type TikiWikiParser = ParserT [Char] ParserState +type TikiWikiParser = ParserT Text ParserState -- -- utility functions -- -tryMsg :: String -> TikiWikiParser m a -> TikiWikiParser m a -tryMsg msg p = try p <?> msg +tryMsg :: Text -> TikiWikiParser m a -> TikiWikiParser m a +tryMsg msg p = try p <?> (T.unpack msg) skip :: TikiWikiParser m a -> TikiWikiParser m () skip parser = Control.Monad.void parser @@ -89,7 +89,7 @@ block = do <|> para skipMany blankline when (verbosity >= INFO) $ - trace (printf "line %d: %s" (sourceLine pos) (take 60 $ show $ B.toList res)) + trace (T.pack $ printf "line %d: %s" (sourceLine pos) (take 60 $ show $ B.toList res)) return res blockElements :: PandocMonad m => TikiWikiParser m B.Blocks @@ -133,7 +133,7 @@ tableRow :: PandocMonad m => TikiWikiParser m [B.Blocks] tableRow = try $ do -- row <- sepBy1 (many1Till inline $ oneOf "\n|") (try $ string "|" <* notFollowedBy (oneOf "|\n")) -- return $ map (B.plain . mconcat) row - row <- sepBy1 (many1 (noneOf "\n|") >>= parseColumn) (try $ string "|" <* notFollowedBy (oneOf "|\n")) + row <- sepBy1 (many1 (noneOf "\n|") >>= parseColumn . T.pack) (try $ string "|" <* notFollowedBy (oneOf "|\n")) return $ map B.plain row where parseColumn x = do @@ -342,15 +342,15 @@ listItemLine nest = lineContent >>= parseContent lineContent = do content <- anyLine continuation <- optionMaybe listContinuation - return $ filterSpaces content ++ "\n" ++ Data.Maybe.fromMaybe "" continuation - filterSpaces = reverse . dropWhile (== ' ') . reverse + return $ filterSpaces content <> "\n" <> Data.Maybe.fromMaybe "" continuation + filterSpaces = T.dropWhileEnd (== ' ') listContinuation = string (replicate nest '+') >> lineContent parseContent x = do parsed <- parseFromString (many1 inline) x return $ mconcat $ dropWhileEnd (== B.space) parsed -- Turn the CODE macro attributes into Pandoc code block attributes. -mungeAttrs :: [(String, String)] -> (String, [String], [(String, String)]) +mungeAttrs :: [(Text, Text)] -> (Text, [Text], [(Text, Text)]) mungeAttrs rawAttrs = ("", classes, rawAttrs) where -- "colors" is TikiWiki CODE macro for "name of language to do @@ -370,7 +370,7 @@ codeMacro = try $ do string "{CODE(" rawAttrs <- macroAttrs string ")}" - body <- manyTill anyChar (try (string "{CODE}")) + body <- T.pack <$> manyTill anyChar (try (string "{CODE}")) newline if not (null rawAttrs) then @@ -428,9 +428,9 @@ nbsp = try $ do htmlComment :: PandocMonad m => TikiWikiParser m B.Inlines htmlComment = try $ do string "~hc~" - inner <- many1 $ noneOf "~" + inner <- fmap T.pack $ many1 $ noneOf "~" string "~/hc~" - return $ B.str $ " NOT SUPPORTED: ~hc~ (html comment opener) BEGIN: " ++ inner ++ " ~/hc~ :END " + return $ B.str $ " NOT SUPPORTED: ~hc~ (html comment opener) BEGIN: " <> inner <> " ~/hc~ :END " linebreak :: PandocMonad m => TikiWikiParser m B.Inlines linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline) @@ -469,15 +469,15 @@ image = try $ do let title = fromMaybe src $ lookup "desc" rawAttrs let alt = fromMaybe title $ lookup "alt" rawAttrs let classes = map fst $ filter (\(_,b) -> b == "" || b == "y") rawAttrs - if not (null src) + if not (T.null src) then return $ B.imageWith ("", classes, rawAttrs) src title (B.str alt) else - return $ B.str $ " NOT SUPPORTED: image without src attribute BEGIN: {img " ++ printAttrs rawAttrs ++ "} :END " + return $ B.str $ " NOT SUPPORTED: image without src attribute BEGIN: {img " <> printAttrs rawAttrs <> "} :END " where - printAttrs attrs = unwords $ map (\(a, b) -> a ++ "=\"" ++ b ++ "\"") attrs + printAttrs attrs = T.unwords $ map (\(a, b) -> a <> "=\"" <> b <> "\"") attrs -imageAttr :: PandocMonad m => TikiWikiParser m (String, String) +imageAttr :: PandocMonad m => TikiWikiParser m (Text, Text) imageAttr = try $ do key <- many1 (noneOf "=} \t\n") char '=' @@ -485,7 +485,7 @@ imageAttr = try $ do value <- many1 (noneOf "}\"\n") optional $ char '"' optional $ char ',' - return (key, value) + return (T.pack key, T.pack value) -- __strong__ @@ -500,57 +500,57 @@ emph = try $ fmap B.emph (enclosed (string "''") nestedInlines) escapedChar :: PandocMonad m => TikiWikiParser m B.Inlines escapedChar = try $ do string "~" - mNumber <- safeRead <$> many1 digit + mNumber <- safeRead . T.pack <$> many1 digit string "~" return $ B.str $ case mNumber of - Just number -> [toEnum (number :: Int) :: Char] - Nothing -> [] + Just number -> T.singleton $ toEnum (number :: Int) + Nothing -> "" -- UNSUPPORTED, as there doesn't seem to be any facility in calibre -- for this centered :: PandocMonad m => TikiWikiParser m B.Inlines centered = try $ do string "::" - inner <- many1 $ noneOf ":\n" + inner <- fmap T.pack $ many1 $ noneOf ":\n" string "::" - return $ B.str $ " NOT SUPPORTED: :: (centered) BEGIN: ::" ++ inner ++ ":: :END " + return $ B.str $ " NOT SUPPORTED: :: (centered) BEGIN: ::" <> inner <> ":: :END " -- UNSUPPORTED, as there doesn't seem to be any facility in calibre -- for this colored :: PandocMonad m => TikiWikiParser m B.Inlines colored = try $ do string "~~" - inner <- many1 $ noneOf "~\n" + inner <- fmap T.pack $ many1 $ noneOf "~\n" string "~~" - return $ B.str $ " NOT SUPPORTED: ~~ (colored) BEGIN: ~~" ++ inner ++ "~~ :END " + return $ B.str $ " NOT SUPPORTED: ~~ (colored) BEGIN: ~~" <> inner <> "~~ :END " -- UNSUPPORTED, as there doesn't seem to be any facility in calibre -- for this underlined :: PandocMonad m => TikiWikiParser m B.Inlines underlined = try $ do string "===" - inner <- many1 $ noneOf "=\n" + inner <- fmap T.pack $ many1 $ noneOf "=\n" string "===" - return $ B.str $ " NOT SUPPORTED: ==== (underlined) BEGIN: ===" ++ inner ++ "=== :END " + return $ B.str $ " NOT SUPPORTED: ==== (underlined) BEGIN: ===" <> inner <> "=== :END " -- UNSUPPORTED, as there doesn't seem to be any facility in calibre -- for this boxed :: PandocMonad m => TikiWikiParser m B.Inlines boxed = try $ do string "^" - inner <- many1 $ noneOf "^\n" + inner <- fmap T.pack $ many1 $ noneOf "^\n" string "^" - return $ B.str $ " NOT SUPPORTED: ^ (boxed) BEGIN: ^" ++ inner ++ "^ :END " + return $ B.str $ " NOT SUPPORTED: ^ (boxed) BEGIN: ^" <> inner <> "^ :END " -- --text-- strikeout :: PandocMonad m => TikiWikiParser m B.Inlines strikeout = try $ fmap B.strikeout (enclosed (string "--") nestedInlines) -nestedString :: (Show a, PandocMonad m) => TikiWikiParser m a -> TikiWikiParser m String -nestedString end = innerSpace <|> count 1 nonspaceChar +nestedString :: (Show a, PandocMonad m) => TikiWikiParser m a -> TikiWikiParser m Text +nestedString end = innerSpace <|> countChar 1 nonspaceChar where - innerSpace = try $ many1 spaceChar <* notFollowedBy end + innerSpace = try $ T.pack <$> many1 spaceChar <* notFollowedBy end breakChars :: PandocMonad m => TikiWikiParser m B.Inlines breakChars = try $ string "%%%" >> return B.linebreak @@ -564,7 +564,7 @@ superMacro = try $ do string "{SUP(" manyTill anyChar (string ")}") body <- manyTill anyChar (string "{SUP}") - return $ B.superscript $ B.text body + return $ B.superscript $ B.text $ T.pack body -- subscript: baz{TAG(tag=>sub)}sub{TAG}qux / qux{SUB()}sub2{SUB}qux subTag :: PandocMonad m => TikiWikiParser m B.Inlines @@ -575,22 +575,22 @@ subMacro = try $ do string "{SUB(" manyTill anyChar (string ")}") body <- manyTill anyChar (string "{SUB}") - return $ B.subscript $ B.text body + return $ B.subscript $ B.text $ T.pack body -- -+text+- code :: PandocMonad m => TikiWikiParser m B.Inlines code = try $ fmap (B.code . fromEntities) ( between (string "-+") (string "+-") nestedString) -macroAttr :: PandocMonad m => TikiWikiParser m (String, String) +macroAttr :: PandocMonad m => TikiWikiParser m (Text, Text) macroAttr = try $ do key <- many1 (noneOf "=)") char '=' optional $ char '"' value <- many1 (noneOf " )\"") optional $ char '"' - return (key, value) + return (T.pack key, T.pack value) -macroAttrs :: PandocMonad m => TikiWikiParser m [(String, String)] +macroAttrs :: PandocMonad m => TikiWikiParser m [(Text, Text)] macroAttrs = try $ sepEndBy macroAttr spaces -- ~np~ __not bold__ ~/np~ @@ -598,13 +598,13 @@ noparse :: PandocMonad m => TikiWikiParser m B.Inlines noparse = try $ do string "~np~" body <- manyTill anyChar (string "~/np~") - return $ B.str body + return $ B.str $ T.pack body str :: PandocMonad m => TikiWikiParser m B.Inlines -str = fmap B.str (many1 alphaNum <|> count 1 characterReference) +str = fmap B.str (T.pack <$> many1 alphaNum <|> countChar 1 characterReference) symbol :: PandocMonad m => TikiWikiParser m B.Inlines -symbol = fmap B.str (count 1 nonspaceChar) +symbol = fmap B.str (countChar 1 nonspaceChar) -- [[not a link] notExternalLink :: PandocMonad m => TikiWikiParser m B.Inlines @@ -612,14 +612,14 @@ notExternalLink = try $ do start <- string "[[" body <- many (noneOf "\n[]") end <- string "]" - return $ B.text (start ++ body ++ end) + return $ B.text $ T.pack $ start ++ body ++ end -- [http://www.somesite.org url|Some Site title] -- ((internal link)) -- -- The ((...)) wiki links and [...] external links are handled -- exactly the same; this abstracts that out -makeLink :: PandocMonad m => String -> String -> String -> TikiWikiParser m B.Inlines +makeLink :: PandocMonad m => Text -> Text -> Text -> TikiWikiParser m B.Inlines makeLink start middle end = try $ do st <- getState guard $ stateAllowLinks st @@ -627,15 +627,15 @@ makeLink start middle end = try $ do (url, title, anchor) <- wikiLinkText start middle end parsedTitle <- parseFromString (many1 inline) title setState $ st{ stateAllowLinks = True } - return $ B.link (url++anchor) "" $mconcat parsedTitle + return $ B.link (url <> anchor) "" $ mconcat parsedTitle -wikiLinkText :: PandocMonad m => String -> String -> String -> TikiWikiParser m (String, String, String) +wikiLinkText :: PandocMonad m => Text -> Text -> Text -> TikiWikiParser m (Text, Text, Text) wikiLinkText start middle end = do - string start - url <- many1 (noneOf $ middle ++ "\n") + string (T.unpack start) + url <- T.pack <$> many1 (noneOf $ T.unpack middle ++ "\n") seg1 <- option url linkContent seg2 <- option "" linkContent - string end + string (T.unpack end) if seg2 /= "" then return (url, seg2, seg1) @@ -644,7 +644,7 @@ wikiLinkText start middle end = do where linkContent = do char '|' - many (noneOf middle) + T.pack <$> many (noneOf $ T.unpack middle) externalLink :: PandocMonad m => TikiWikiParser m B.Inlines externalLink = makeLink "[" "]|" "]" diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index 0af52e046..996a818fd 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Txt2Tags Copyright : Copyright (C) 2014 Matthew Pickering @@ -18,7 +19,6 @@ import Prelude import Control.Monad (guard, void, when) import Control.Monad.Except (catchError, throwError) import Control.Monad.Reader (Reader, asks, runReader) -import Data.Char (toLower) import Data.Default import Data.List (intercalate, transpose) import Data.Maybe (fromMaybe) @@ -36,13 +36,13 @@ import Text.Pandoc.Parsing hiding (space, spaces, uri) import Text.Pandoc.Shared (compactify, compactifyDL, crFilter, escapeURI, underlineSpan) -type T2T = ParserT String ParserState (Reader T2TMeta) +type T2T = ParserT Text ParserState (Reader T2TMeta) -- | An object for the T2T macros meta information -- the contents of each field is simply substituted verbatim into the file data T2TMeta = T2TMeta { - date :: String -- ^ Current date - , mtime :: String -- ^ Last modification time of infile + date :: Text -- ^ Current date + , mtime :: Text -- ^ Last modification time of infile , infile :: FilePath -- ^ Input file , outfile :: FilePath -- ^ Output file } deriving Show @@ -63,7 +63,7 @@ getT2TMeta = do _ -> catchError (maximum <$> mapM getModTime inps) (const (return "")) - return $ T2TMeta curDate curMtime (intercalate ", " inps) outp + return $ T2TMeta (T.pack curDate) (T.pack curMtime) (intercalate ", " inps) outp -- | Read Txt2Tags from an input string returning a Pandoc document readTxt2Tags :: PandocMonad m @@ -74,14 +74,14 @@ readTxt2Tags opts s = do meta <- getT2TMeta let parsed = flip runReader meta $ readWithM parseT2T (def {stateOptions = opts}) $ - T.unpack (crFilter s) ++ "\n\n" + crFilter s <> "\n\n" case parsed of Right result -> return result Left e -> throwError e -- | Read Txt2Tags (ignoring all macros) from an input string returning -- a Pandoc document --- readTxt2TagsNoMacros :: PandocMonad m => ReaderOptions -> String -> m Pandoc +-- readTxt2TagsNoMacros :: PandocMonad m => ReaderOptions -> Text -> m Pandoc -- readTxt2TagsNoMacros = readTxt2Tags parseT2T :: T2T Pandoc @@ -106,7 +106,7 @@ parseHeader = do header :: T2T () header = titleline >> authorline >> dateline -headerline :: B.ToMetaValue a => String -> T2T a -> T2T () +headerline :: B.ToMetaValue a => Text -> T2T a -> T2T () headerline field p = (() <$ try blankline) <|> (p >>= updateState . B.setMeta field) @@ -123,15 +123,15 @@ authorline = dateline :: T2T () dateline = headerline "date" (trimInlines . mconcat <$> manyTill inline newline) -type Keyword = String -type Value = String +type Keyword = Text +type Value = Text setting :: T2T (Keyword, Value) setting = do string "%!" - keyword <- ignoreSpacesCap (many1 alphaNum) + keyword <- ignoreSpacesCap (many1Char alphaNum) char ':' - value <- ignoreSpacesCap (manyTill anyChar newline) + value <- ignoreSpacesCap (manyTillChar anyChar newline) return (keyword, value) -- Blocks @@ -163,10 +163,10 @@ balancedTitle c = try $ do spaces level <- length <$> many1 (char c) guard (level <= 5) -- Max header level 5 - heading <- manyTill (noneOf "\n\r") (count level (char c)) + heading <- manyTillChar (noneOf "\n\r") (count level (char c)) label <- optionMaybe (enclosed (char '[') (char ']') (alphaNum <|> oneOf "_-")) many spaceChar *> newline - let attr = maybe nullAttr (\x -> (x, [], [])) label + let attr = maybe nullAttr (\x -> (T.pack x, [], [])) label return $ B.headerWith attr level (trimInlines $ B.text heading) para :: T2T Blocks @@ -192,7 +192,7 @@ quote :: T2T Blocks quote = try $ do lookAhead tab rawQuote <- many1 (tab *> optional spaces *> anyLine) - contents <- parseFromString' parseBlocks (intercalate "\n" rawQuote ++ "\n\n") + contents <- parseFromString' parseBlocks (T.intercalate "\n" rawQuote <> "\n\n") return $ B.blockQuote contents commentLine :: T2T Inlines @@ -243,17 +243,17 @@ listItem start end = try $ do markerLength <- try start firstLine <- anyLineNewline blank <- option "" ("\n" <$ blankline) - rest <- concat <$> many (listContinuation markerLength) - parseFromString' end $ firstLine ++ blank ++ rest + rest <- T.concat <$> many (listContinuation markerLength) + parseFromString' end $ firstLine <> blank <> rest -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. listContinuation :: Int - -> T2T String + -> T2T Text listContinuation markerLength = try $ notFollowedBy' (blankline >> blankline) - *> (mappend <$> (concat <$> many1 listLine) - <*> many blankline) + *> (mappend <$> (T.concat <$> many1 listLine) + <*> manyChar blankline) where listLine = try $ indentWith markerLength *> anyLineNewline -- Table @@ -327,16 +327,16 @@ taggedBlock = do -- Generic -genericBlock :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks +genericBlock :: Monoid a => T2T a -> (a -> Blocks) -> Text -> T2T Blocks genericBlock p f s = blockMarkupArea p f s <|> blockMarkupLine p f s -blockMarkupArea :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks +blockMarkupArea :: Monoid a => T2T a -> (a -> Blocks) -> Text -> T2T Blocks blockMarkupArea p f s = try (do - string s *> blankline - f . mconcat <$> manyTill p (eof <|> void (string s *> blankline))) + textStr s *> blankline + f . mconcat <$> manyTill p (eof <|> void (textStr s *> blankline))) -blockMarkupLine :: T2T a -> (a -> Blocks) -> String -> T2T Blocks -blockMarkupLine p f s = try (f <$> (string s *> space *> p)) +blockMarkupLine :: T2T a -> (a -> Blocks) -> Text -> T2T Blocks +blockMarkupLine p f s = try (f <$> (textStr s *> space *> p)) -- Can be in either block or inline position comment :: Monoid a => T2T a @@ -385,15 +385,15 @@ italic :: T2T Inlines italic = inlineMarkup inline B.emph '/' B.str code :: T2T Inlines -code = inlineMarkup ((:[]) <$> anyChar) B.code '`' id +code = inlineMarkup (T.singleton <$> anyChar) B.code '`' id raw :: T2T Inlines -raw = inlineMarkup ((:[]) <$> anyChar) B.text '"' id +raw = inlineMarkup (T.singleton <$> anyChar) B.text '"' id tagged :: T2T Inlines tagged = do target <- getTarget - inlineMarkup ((:[]) <$> anyChar) (B.rawInline target) '\'' id + inlineMarkup (T.singleton <$> anyChar) (B.rawInline target) '\'' id -- Parser for markup indicated by a double character. -- Inline markup is greedy and glued @@ -404,33 +404,33 @@ inlineMarkup :: Monoid a => T2T a -- Content parser -> (a -> Inlines) -- Constructor -> Char -- Fence - -> (String -> a) -- Special Case to handle ****** + -> (Text -> a) -- Special Case to handle ****** -> T2T Inlines inlineMarkup p f c special = try $ do - start <- many1 (char c) - let l = length start + start <- many1Char (char c) + let l = T.length start guard (l >= 2) when (l == 2) (void $ notFollowedBy space) -- We must make sure that there is no space before the start of the -- closing tags - body <- optionMaybe (try $ manyTill (noneOf "\n\r") + body <- optionMaybe (try $ manyTillChar (noneOf "\n\r") (try $ lookAhead (noneOf " " >> string [c,c] ))) case body of Just middle -> do lastChar <- anyChar - end <- many1 (char c) + end <- many1Char (char c) let parser inp = parseFromString' (mconcat <$> many p) inp - let start' = case drop 2 start of + let start' = case T.drop 2 start of "" -> mempty xs -> special xs - body' <- parser (middle ++ [lastChar]) - let end' = case drop 2 end of + body' <- parser (middle <> T.singleton lastChar) + let end' = case T.drop 2 end of "" -> mempty xs -> special xs return $ f (start' `mappend` body' `mappend` end') Nothing -> do -- Either bad or case such as ***** guard (l >= 5) - let body' = replicate (l - 4) c + let body' = T.replicate (l - 4) $ T.singleton c return $ f (special body') link :: T2T Inlines @@ -441,12 +441,12 @@ titleLink :: T2T Inlines titleLink = try $ do char '[' notFollowedBy space - tokens <- sepBy1 (many $ noneOf " ]") space + tokens <- sepBy1 (manyChar $ noneOf " ]") space guard (length tokens >= 2) char ']' let link' = last tokens - guard $ not $ null link' - let tit = unwords (init tokens) + guard $ not $ T.null link' + let tit = T.unwords (init tokens) return $ B.link link' "" (B.text tit) -- Link with image @@ -455,7 +455,7 @@ imageLink = try $ do char '[' body <- image many1 space - l <- manyTill (noneOf "\n\r ") (char ']') + l <- manyTillChar (noneOf "\n\r ") (char ']') return (B.link l "" body) macro :: T2T Inlines @@ -466,7 +466,7 @@ macro = try $ do maybe (return mempty) (\f -> B.str <$> asks f) (lookup name commands) where commands = [ ("date", date), ("mtime", mtime) - , ("infile", infile), ("outfile", outfile)] + , ("infile", T.pack . infile), ("outfile", T.pack . outfile)] -- raw URLs in text are automatically linked url :: T2T Inlines @@ -474,7 +474,7 @@ url = try $ do (rawUrl, escapedUrl) <- try uri <|> emailAddress return $ B.link rawUrl "" (B.str escapedUrl) -uri :: T2T (String, String) +uri :: T2T (Text, Text) uri = try $ do address <- t2tURI return (address, escapeURI address) @@ -486,25 +486,25 @@ uri = try $ do --isT2TURI (parse t2tURI "" -> Right _) = True --isT2TURI _ = False -t2tURI :: T2T String +t2tURI :: T2T Text t2tURI = do - start <- try ((++) <$> proto <*> urlLogin) <|> guess - domain <- many1 chars - sep <- many (char '/') - form' <- option mempty ((:) <$> char '?' <*> many1 form) - anchor' <- option mempty ((:) <$> char '#' <*> many anchor) - return (start ++ domain ++ sep ++ form' ++ anchor') + start <- try ((<>) <$> proto <*> urlLogin) <|> guess + domain <- many1Char chars + sep <- manyChar (char '/') + form' <- option mempty (T.cons <$> char '?' <*> many1Char form) + anchor' <- option mempty (T.cons <$> char '#' <*> manyChar anchor) + return (start <> domain <> sep <> form' <> anchor') where protos = ["http", "https", "ftp", "telnet", "gopher", "wais"] - proto = (++) <$> oneOfStrings protos <*> string "://" - guess = (++) <$> (((++) <$> stringAnyCase "www" <*> option mempty ((:[]) <$> oneOf "23")) - <|> stringAnyCase "ftp") <*> ((:[]) <$> char '.') + proto = (<>) <$> oneOfStrings protos <*> textStr "://" + guess = (<>) <$> (((<>) <$> stringAnyCase "www" <*> option mempty (T.singleton <$> oneOf "23")) + <|> stringAnyCase "ftp") <*> (T.singleton <$> char '.') login = alphaNum <|> oneOf "_.-" - pass = many (noneOf " @") + pass = manyChar (noneOf " @") chars = alphaNum <|> oneOf "%._/~:,=$@&+-" anchor = alphaNum <|> oneOf "%._0" form = chars <|> oneOf ";*" - urlLogin = option mempty $ try ((\x y z -> x ++ y ++ [z]) <$> many1 login <*> option mempty ((:) <$> char ':' <*> pass) <*> char '@') + urlLogin = option mempty $ try ((\x y z -> x <> y <> T.singleton z) <$> many1Char login <*> option mempty (T.cons <$> char ':' <*> pass) <*> char '@') image :: T2T Inlines @@ -512,12 +512,12 @@ image = try $ do -- List taken from txt2tags source let extensions = [".jpg", ".jpeg", ".gif", ".png", ".eps", ".bmp"] char '[' - (path, ext) <- manyUntil (noneOf "\n\t\r ") (oneOfStrings extensions) + (path, ext) <- manyUntilChar (noneOf "\n\t\r ") (oneOfStrings extensions) char ']' - return $ B.image (path ++ ext) "" mempty + return $ B.image (path <> ext) "" mempty -- Characters used in markup -specialChars :: String +specialChars :: [Char] specialChars = "%*-_/|:+;" tab :: T2T Char @@ -526,8 +526,8 @@ tab = char '\t' space :: T2T Char space = char ' ' -spaces :: T2T String -spaces = many space +spaces :: T2T Text +spaces = manyChar space endline :: T2T Inlines endline = try $ do @@ -544,17 +544,17 @@ endline = try $ do return B.softbreak str :: T2T Inlines -str = try $ B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") +str = try $ B.str <$> many1Char (noneOf $ specialChars ++ "\n\r ") whitespace :: T2T Inlines whitespace = try $ B.space <$ spaceChar symbol :: T2T Inlines -symbol = B.str . (:[]) <$> oneOf specialChars +symbol = B.str . T.singleton <$> oneOf specialChars -- Utility -getTarget :: T2T String +getTarget :: T2T Text getTarget = do mv <- lookupMeta "target" . stateMeta <$> getState return $ case mv of @@ -565,5 +565,5 @@ getTarget = do atStart :: T2T () atStart = (sourceColumn <$> getPosition) >>= guard . (== 1) -ignoreSpacesCap :: T2T String -> T2T String -ignoreSpacesCap p = map toLower <$> (spaces *> p <* spaces) +ignoreSpacesCap :: T2T Text -> T2T Text +ignoreSpacesCap p = T.toLower <$> (spaces *> p <* spaces) diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs index 27b7d7245..f7edabc48 100644 --- a/src/Text/Pandoc/Readers/Vimwiki.hs +++ b/src/Text/Pandoc/Readers/Vimwiki.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE CPP #-} {- | Module : Text.Pandoc.Readers.Vimwiki @@ -51,9 +52,10 @@ import Prelude import Control.Monad (guard) import Control.Monad.Except (throwError) import Data.Default -import Data.List (isInfixOf, isPrefixOf) +import Data.List (isInfixOf) import Data.Maybe import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Builder (Blocks, Inlines, fromList, toList, trimInlines) import qualified Text.Pandoc.Builder as B (blockQuote, bulletList, code, codeBlockWith, definitionList, @@ -73,12 +75,13 @@ import Text.Pandoc.Options (ReaderOptions) import Text.Pandoc.Parsing (ParserState, ParserT, blanklines, emailAddress, many1Till, orderedListMarker, readWithM, registerHeader, spaceChar, stateMeta, - stateOptions, uri) -import Text.Pandoc.Shared (crFilter, splitBy, stringify, stripFirstAndLast, - isURI) + stateOptions, uri, manyTillChar, manyChar, textStr, + many1Char, countChar, many1TillChar) +import Text.Pandoc.Shared (crFilter, splitTextBy, stringify, stripFirstAndLast, + isURI, tshow) import Text.Parsec.Char (alphaNum, anyChar, char, newline, noneOf, oneOf, space, spaces, string) -import Text.Parsec.Combinator (between, choice, count, eof, lookAhead, many1, +import Text.Parsec.Combinator (between, choice, eof, lookAhead, many1, manyTill, notFollowedBy, option, skipMany1) import Text.Parsec.Prim (getState, many, try, updateState, (<|>)) @@ -128,7 +131,7 @@ block = do , definitionList , para ] - trace (take 60 $ show $ toList res) + trace (T.take 60 $ tshow $ toList res) return res blockML :: PandocMonad m => VwParser m Blocks @@ -218,32 +221,32 @@ defMarkerM = string "::" >> spaceChar defMarkerE :: PandocMonad m => VwParser m Char defMarkerE = string "::" >> newline -hasDefMarkerM :: PandocMonad m => VwParser m String -hasDefMarkerM = manyTill (noneOf "\n") (try defMarkerM) +hasDefMarkerM :: PandocMonad m => VwParser m Text +hasDefMarkerM = manyTillChar (noneOf "\n") (try defMarkerM) preformatted :: PandocMonad m => VwParser m Blocks preformatted = try $ do many spaceChar >> string "{{{" - attrText <- many (noneOf "\n") + attrText <- manyChar (noneOf "\n") lookAhead newline - contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}}" + contents <- manyTillChar anyChar (try (char '\n' >> many spaceChar >> string "}}}" >> many spaceChar >> newline)) - if (contents /= "") && (head contents == '\n') - then return $ B.codeBlockWith (makeAttr attrText) (tail contents) + if (contents /= "") && (T.head contents == '\n') + then return $ B.codeBlockWith (makeAttr attrText) (T.tail contents) else return $ B.codeBlockWith (makeAttr attrText) contents -makeAttr :: String -> Attr +makeAttr :: Text -> Attr makeAttr s = - let xs = splitBy (`elem` " \t") s in + let xs = splitTextBy (`elem` (" \t" :: String)) s in ("", [], mapMaybe nameValue xs) -nameValue :: String -> Maybe (String, String) +nameValue :: Text -> Maybe (Text, Text) nameValue s = - let t = splitBy (== '=') s in + let t = splitTextBy (== '=') s in if length t /= 2 then Nothing else let (a, b) = (head t, last t) in - if (length b < 2) || ((head b, last b) /= ('"', '"')) + if (T.length b < 2) || ((T.head b, T.last b) /= ('"', '"')) then Nothing else Just (a, stripFirstAndLast b) @@ -253,16 +256,16 @@ displayMath = try $ do many spaceChar >> string "{{$" mathTag <- option "" mathTagParser many space - contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}$" + contents <- manyTillChar anyChar (try (char '\n' >> many spaceChar >> string "}}$" >> many spaceChar >> newline)) let contentsWithTags | mathTag == "" = contents - | otherwise = "\\begin{" ++ mathTag ++ "}\n" ++ contents - ++ "\n\\end{" ++ mathTag ++ "}" + | otherwise = "\\begin{" <> mathTag <> "}\n" <> contents + <> "\n\\end{" <> mathTag <> "}" return $ B.para $ B.displayMath contentsWithTags -mathTagLaTeX :: String -> String +mathTagLaTeX :: Text -> Text mathTagLaTeX s = case s of "equation" -> "" "equation*" -> "" @@ -360,17 +363,17 @@ combineList x [y] = case toList y of _ -> x:[y] combineList x xs = x:xs -listStart :: PandocMonad m => VwParser m (Int, String) +listStart :: PandocMonad m => VwParser m (Int, Text) listStart = try $ do s <- many spaceChar listType <- bulletListMarkers <|> orderedListMarkers spaceChar return (length s, listType) -bulletListMarkers :: PandocMonad m => VwParser m String +bulletListMarkers :: PandocMonad m => VwParser m Text bulletListMarkers = "ul" <$ (char '*' <|> char '-') -orderedListMarkers :: PandocMonad m => VwParser m String +orderedListMarkers :: PandocMonad m => VwParser m Text orderedListMarkers = ("ol" <$choice (orderedListMarker Decimal Period:(($OneParen) . orderedListMarker <$> [Decimal, LowerRoman, UpperRoman, LowerAlpha, UpperAlpha]))) <|> ("ol" <$ char '#') @@ -421,9 +424,9 @@ placeholder :: PandocMonad m => VwParser m () placeholder = try $ choice (ph <$> ["title", "date"]) <|> noHtmlPh <|> templatePh -ph :: PandocMonad m => String -> VwParser m () +ph :: PandocMonad m => Text -> VwParser m () ph s = try $ do - many spaceChar >>string ('%':s) >> spaceChar + many spaceChar >> textStr (T.cons '%' s) >> spaceChar contents <- trimInlines . mconcat <$> manyTill inline (lookAhead newline) --use lookAhead because of placeholder in the whitespace parser let meta' = B.setMeta s contents nullMeta @@ -476,7 +479,7 @@ inlineML :: PandocMonad m => VwParser m Inlines inlineML = choice $ whitespace endlineML:inlineList str :: PandocMonad m => VwParser m Inlines -str = B.str <$>many1 (noneOf $ spaceChars ++ specialChars) +str = B.str <$> many1Char (noneOf $ spaceChars ++ specialChars) whitespace :: PandocMonad m => VwParser m () -> VwParser m Inlines whitespace endline = B.space <$ (skipMany1 spaceChar <|> @@ -487,7 +490,7 @@ whitespace' :: PandocMonad m => VwParser m Inlines whitespace' = B.space <$ skipMany1 spaceChar special :: PandocMonad m => VwParser m Inlines -special = B.str <$> count 1 (oneOf specialChars) +special = B.str <$> countChar 1 (oneOf specialChars) bareURL :: PandocMonad m => VwParser m Inlines bareURL = try $ do @@ -505,8 +508,8 @@ strong = try $ do return $ B.spanWith (makeId contents, [], []) mempty <> B.strong contents -makeId :: Inlines -> String -makeId i = concat (stringify <$> toList i) +makeId :: Inlines -> Text +makeId i = T.concat (stringify <$> toList i) emph :: PandocMonad m => VwParser m Inlines emph = try $ do @@ -527,7 +530,7 @@ strikeout = try $ do code :: PandocMonad m => VwParser m Inlines code = try $ do char '`' - contents <- many1Till (noneOf "\n") (char '`') + contents <- many1TillChar (noneOf "\n") (char '`') return $ B.code contents superscript :: PandocMonad m => VwParser m Inlines @@ -542,8 +545,8 @@ subscript = try $ link :: PandocMonad m => VwParser m Inlines link = try $ do string "[[" - contents <- lookAhead $ manyTill anyChar (string "]]") - case '|' `elem` contents of + contents <- lookAhead $ manyTillChar anyChar (string "]]") + case T.any (== '|') contents of False -> do manyTill anyChar (string "]]") -- not using try here because [[hell]o]] is not rendered as a link in vimwiki @@ -552,7 +555,7 @@ link = try $ do else "wikilink" return $ B.link (procLink contents) tit (B.str contents) True -> do - url <- manyTill anyChar $ char '|' + url <- manyTillChar anyChar $ char '|' lab <- mconcat <$> manyTill inline (string "]]") let tit = if isURI url then "" @@ -568,52 +571,52 @@ image = try $ do images :: PandocMonad m => Int -> VwParser m Inlines images k | k == 0 = do - imgurl <- manyTill anyChar (try $ string "}}") + imgurl <- manyTillChar anyChar (try $ string "}}") return $ B.image (procImgurl imgurl) "" (B.str "") | k == 1 = do - imgurl <- manyTill anyChar (char '|') + imgurl <- manyTillChar anyChar (char '|') alt <- mconcat <$> manyTill inline (try $ string "}}") return $ B.image (procImgurl imgurl) "" alt | k == 2 = do - imgurl <- manyTill anyChar (char '|') - alt <- mconcat <$>manyTill inline (char '|') - attrText <- manyTill anyChar (try $ string "}}") + imgurl <- manyTillChar anyChar (char '|') + alt <- mconcat <$> manyTill inline (char '|') + attrText <- manyTillChar anyChar (try $ string "}}") return $ B.imageWith (makeAttr attrText) (procImgurl imgurl) "" alt | otherwise = do - imgurl <- manyTill anyChar (char '|') - alt <- mconcat <$>manyTill inline (char '|') - attrText <- manyTill anyChar (char '|') + imgurl <- manyTillChar anyChar (char '|') + alt <- mconcat <$> manyTill inline (char '|') + attrText <- manyTillChar anyChar (char '|') manyTill anyChar (try $ string "}}") return $ B.imageWith (makeAttr attrText) (procImgurl imgurl) "" alt -procLink' :: String -> String +procLink' :: Text -> Text procLink' s - | take 6 s == "local:" = "file" ++ drop 5 s - | take 6 s == "diary:" = "diary/" ++ drop 6 s - | or ((`isPrefixOf` s) <$> [ "http:", "https:", "ftp:", "file:", "mailto:", + | T.take 6 s == "local:" = "file" <> T.drop 5 s + | T.take 6 s == "diary:" = "diary/" <> T.drop 6 s + | or ((`T.isPrefixOf` s) <$> [ "http:", "https:", "ftp:", "file:", "mailto:", "news:", "telnet:" ]) = s | s == "" = "" - | last s == '/' = s + | T.last s == '/' = s | otherwise = s -procLink :: String -> String -procLink s = procLink' x ++ y - where (x, y) = break (=='#') s +procLink :: Text -> Text +procLink s = procLink' x <> y + where (x, y) = T.break (=='#') s -procImgurl :: String -> String -procImgurl s = if take 6 s == "local:" then "file" ++ drop 5 s else s +procImgurl :: Text -> Text +procImgurl s = if T.take 6 s == "local:" then "file" <> T.drop 5 s else s inlineMath :: PandocMonad m => VwParser m Inlines inlineMath = try $ - B.math <$ char '$' <*> many1Till (noneOf "\n") (char '$') + B.math <$ char '$' <*> many1TillChar (noneOf "\n") (char '$') tag :: PandocMonad m => VwParser m Inlines tag = try $ do char ':' - s <- manyTill (noneOf spaceChars) (try (char ':' >> lookAhead space)) - guard $ not $ "::" `isInfixOf` (":" ++ s ++ ":") - let ss = splitBy (==':') s + s <- manyTillChar (noneOf spaceChars) (try (char ':' >> lookAhead space)) + guard $ not $ "::" `T.isInfixOf` (":" <> s <> ":") + let ss = splitTextBy (==':') s return $ mconcat $ makeTagSpan' (head ss):(makeTagSpan <$> tail ss) todoMark :: PandocMonad m => VwParser m Inlines @@ -646,16 +649,16 @@ nFBTTBSB = hasDefMarker :: PandocMonad m => VwParser m () hasDefMarker = () <$ manyTill (noneOf "\n") (string "::" >> oneOf spaceChars) -makeTagSpan' :: String -> Inlines -makeTagSpan' s = B.spanWith ('-' : s, [], []) (B.str "") <> +makeTagSpan' :: Text -> Inlines +makeTagSpan' s = B.spanWith (T.cons '-' s, [], []) (B.str "") <> B.spanWith (s, ["tag"], []) (B.str s) -makeTagSpan :: String -> Inlines +makeTagSpan :: Text -> Inlines makeTagSpan s = B.space <> makeTagSpan' s -mathTagParser :: PandocMonad m => VwParser m String +mathTagParser :: PandocMonad m => VwParser m Text mathTagParser = do - s <- try $ lookAhead (char '%' >> manyTill (noneOf spaceChars) + s <- try $ lookAhead (char '%' >> manyTillChar (noneOf spaceChars) (try $ char '%' >> many (noneOf $ '%':spaceChars) >> space)) - char '%' >> string s >> char '%' + char '%' >> textStr s >> char '%' return $ mathTagLaTeX s |