diff options
author | despresc <christian.j.j.despres@gmail.com> | 2019-11-04 16:12:37 -0500 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2019-11-12 16:03:45 -0800 |
commit | 90e436d49604e3fd1ef9432fb23f6d7f6245c7fd (patch) | |
tree | 4e7f0692f989643189f1fc6786050d95e239a0ea /src/Text/Pandoc/Readers | |
parent | d3966372f5049eea56213b069fc4d70d8af9144c (diff) | |
download | pandoc-90e436d49604e3fd1ef9432fb23f6d7f6245c7fd.tar.gz |
Switch to new pandoc-types and use Text instead of String [API change].
PR #5884.
+ Use pandoc-types 1.20 and texmath 0.12.
+ Text is now used instead of String, with a few exceptions.
+ In the MediaBag module, some of the types using Strings
were switched to use FilePath instead (not Text).
+ In the Parsing module, new parsers `manyChar`, `many1Char`,
`manyTillChar`, `many1TillChar`, `many1Till`, `manyUntil`,
`mantyUntilChar` have been added: these are like their
unsuffixed counterparts but pack some or all of their output.
+ `glob` in Text.Pandoc.Class still takes String since it seems
to be intended as an interface to Glob, which uses strings.
It seems to be used only once in the package, in the EPUB writer,
so that is not hard to change.
Diffstat (limited to 'src/Text/Pandoc/Readers')
47 files changed, 2427 insertions, 2307 deletions
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index 368c86d4f..40b6f77c9 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -1,4 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.CommonMark Copyright : Copyright (C) 2015-2019 John MacFarlane @@ -18,9 +20,9 @@ where import Prelude import CMarkGFM import Control.Monad.State -import Data.List (groupBy) import qualified Data.Set as Set -import Data.Text (Text, unpack) +import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Emoji (emojiToInline) @@ -40,24 +42,24 @@ readCommonMark opts s = return $ [ extTable | isEnabled Ext_pipe_tables opts ] ++ [ extAutolink | isEnabled Ext_autolink_bare_uris opts ] -convertEmojis :: String -> [Inline] -convertEmojis s@(':':xs) = - case break (==':') xs of - (ys,':':zs) -> +convertEmojis :: Text -> [Inline] +convertEmojis s@(T.uncons -> Just (':',xs)) = + case T.break (==':') xs of + (ys, T.uncons -> Just (':',zs)) -> case emojiToInline ys of Just em -> em : convertEmojis zs - Nothing -> Str (':' : ys) : convertEmojis (':':zs) + Nothing -> Str (":" <> ys) : convertEmojis (":" <> zs) _ -> [Str s] convertEmojis s = - case break (==':') s of + case T.break (==':') s of ("","") -> [] (_,"") -> [Str s] - (xs,ys) -> Str xs:convertEmojis ys + (xs,ys) -> Str xs : convertEmojis ys addHeaderIdentifiers :: ReaderOptions -> Pandoc -> Pandoc addHeaderIdentifiers opts doc = evalState (walkM (addHeaderId opts) doc) mempty -addHeaderId :: ReaderOptions -> Block -> State (Set.Set String) Block +addHeaderId :: ReaderOptions -> Block -> State (Set.Set Text) Block addHeaderId opts (Header lev (_,classes,kvs) ils) = do ids <- get let ident = uniqueIdent (readerExtensions opts) ils ids @@ -82,14 +84,14 @@ addBlock _ (Node _ THEMATIC_BREAK _) = addBlock opts (Node _ BLOCK_QUOTE nodes) = (BlockQuote (addBlocks opts nodes) :) addBlock opts (Node _ (HTML_BLOCK t) _) - | isEnabled Ext_raw_html opts = (RawBlock (Format "html") (unpack t) :) + | isEnabled Ext_raw_html opts = (RawBlock (Format "html") t :) | otherwise = id -- Note: the cmark parser will never generate CUSTOM_BLOCK, -- so we don't need to handle it: addBlock _ (Node _ (CUSTOM_BLOCK _onEnter _onExit) _nodes) = id addBlock _ (Node _ (CODE_BLOCK info t) _) = - (CodeBlock ("", take 1 (words (unpack info)), []) (unpack t) :) + (CodeBlock ("", take 1 (T.words info), []) t :) addBlock opts (Node _ (HEADING lev) nodes) = (Header lev ("",[],[]) (addInlines opts nodes) :) addBlock opts (Node _ (LIST listAttrs) nodes) = @@ -176,29 +178,28 @@ addInlines opts = foldr (addInline opts) [] addInline :: ReaderOptions -> Node -> [Inline] -> [Inline] addInline opts (Node _ (TEXT t) _) = (foldr ((++) . toinl) [] clumps ++) - where raw = unpack t - clumps = groupBy samekind raw + where clumps = T.groupBy samekind t samekind ' ' ' ' = True samekind ' ' _ = False samekind _ ' ' = False samekind _ _ = True - toinl (' ':_) = [Space] - toinl xs = if isEnabled Ext_emoji opts - then convertEmojis xs - else [Str xs] + toinl (T.uncons -> Just (' ', _)) = [Space] + toinl xs = if isEnabled Ext_emoji opts + then convertEmojis xs + else [Str xs] addInline _ (Node _ LINEBREAK _) = (LineBreak :) addInline opts (Node _ SOFTBREAK _) | isEnabled Ext_hard_line_breaks opts = (LineBreak :) | otherwise = (SoftBreak :) addInline opts (Node _ (HTML_INLINE t) _) - | isEnabled Ext_raw_html opts = (RawInline (Format "html") (unpack t) :) + | isEnabled Ext_raw_html opts = (RawInline (Format "html") t :) | otherwise = id -- Note: the cmark parser will never generate CUSTOM_BLOCK, -- so we don't need to handle it: addInline _ (Node _ (CUSTOM_INLINE _onEnter _onExit) _nodes) = id addInline _ (Node _ (CODE t) _) = - (Code ("",[],[]) (unpack t) :) + (Code ("",[],[]) t :) addInline opts (Node _ EMPH nodes) = (Emph (addInlines opts nodes) :) addInline opts (Node _ STRONG nodes) = @@ -206,7 +207,7 @@ addInline opts (Node _ STRONG nodes) = addInline opts (Node _ STRIKETHROUGH nodes) = (Strikeout (addInlines opts nodes) :) addInline opts (Node _ (LINK url title) nodes) = - (Link nullAttr (addInlines opts nodes) (unpack url, unpack title) :) + (Link nullAttr (addInlines opts nodes) (url, title) :) addInline opts (Node _ (IMAGE url title) nodes) = - (Image nullAttr (addInlines opts nodes) (unpack url, unpack title) :) + (Image nullAttr (addInlines opts nodes) (url, title) :) addInline _ _ = id diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs index ceb63ac84..1aa1dfaa4 100644 --- a/src/Text/Pandoc/Readers/Creole.hs +++ b/src/Text/Pandoc/Readers/Creole.hs @@ -19,6 +19,7 @@ import Control.Monad.Except (guard, liftM2, throwError) import qualified Data.Foldable as F import Data.Maybe (fromMaybe) import Data.Text (Text) +import qualified Data.Text as T import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad (..)) import Text.Pandoc.Definition @@ -70,7 +71,6 @@ parseCreole = do eof return $ B.doc bs - -- -- block parsers -- @@ -92,9 +92,9 @@ nowiki = try $ fmap (B.codeBlock . mconcat) (nowikiStart >> manyTill content nowikiEnd) where content = brackets <|> line - brackets = try $ option "" ((:[]) <$> newline) - <+> (char ' ' >> (many (char ' ') <+> string "}}}") <* eol) - line = option "" ((:[]) <$> newline) <+> manyTill anyChar eol + brackets = try $ option "" (T.singleton <$> newline) + <+> (char ' ' >> (manyChar (char ' ') <+> textStr "}}}") <* eol) + line = option "" (T.singleton <$> newline) <+> manyTillChar anyChar eol eol = lookAhead $ try $ nowikiEnd <|> newline nowikiStart = optional newline >> string "{{{" >> skipMany spaceChar >> newline nowikiEnd = try $ linebreak >> string "}}}" >> skipMany spaceChar >> newline @@ -106,7 +106,7 @@ header = try $ do fmap length (many1 (char '=')) guard $ level <= 6 skipSpaces - content <- B.str <$> manyTill (noneOf "\n") headerEnd + content <- B.str <$> manyTillChar (noneOf "\n") headerEnd return $ B.header level content where headerEnd = try $ skipSpaces >> many (char '=') >> skipSpaces >> newline @@ -204,7 +204,7 @@ inline = choice [ whitespace escapedChar :: PandocMonad m => CRLParser m B.Inlines escapedChar = - fmap (B.str . (:[])) (try $ char '~' >> noneOf "\t\n ") + fmap (B.str . T.singleton) (try $ char '~' >> noneOf "\t\n ") escapedLink :: PandocMonad m => CRLParser m B.Inlines escapedLink = try $ do @@ -217,8 +217,8 @@ image = try $ do (orig, src) <- wikiImg return $ B.image src "" (B.str orig) where - linkSrc = many $ noneOf "|}\n\r\t" - linkDsc = char '|' >> many (noneOf "}\n\r\t") + linkSrc = manyChar $ noneOf "|}\n\r\t" + linkDsc = char '|' >> manyChar (noneOf "}\n\r\t") wikiImg = try $ do string "{{" src <- linkSrc @@ -231,11 +231,11 @@ link = try $ do (orig, src) <- uriLink <|> wikiLink return $ B.link src "" orig where - linkSrc = many $ noneOf "|]\n\r\t" - linkDsc :: PandocMonad m => String -> CRLParser m B.Inlines + linkSrc = manyChar $ noneOf "|]\n\r\t" + linkDsc :: PandocMonad m => Text -> CRLParser m B.Inlines linkDsc otxt = B.str <$> try (option otxt - (char '|' >> many (noneOf "]\n\r\t"))) + (char '|' >> manyChar (noneOf "]\n\r\t"))) linkImg = try $ char '|' >> image wikiLink = try $ do string "[[" @@ -248,7 +248,7 @@ link = try $ do return (B.str orig, src) inlineNowiki :: PandocMonad m => CRLParser m B.Inlines -inlineNowiki = B.code <$> (start >> manyTill (noneOf "\n\r") end) +inlineNowiki = B.code <$> (start >> manyTillChar (noneOf "\n\r") end) where start = try $ string "{{{" end = try $ string "}}}" >> lookAhead (noneOf "}") @@ -271,11 +271,11 @@ linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline) innerNewline = return B.space symbol :: PandocMonad m => CRLParser m B.Inlines -symbol = fmap (B.str . (:[])) (oneOf specialChars) +symbol = fmap (B.str . T.singleton) (oneOf specialChars) str :: PandocMonad m => CRLParser m B.Inlines str = let strChar = noneOf ("\t\n " ++ specialChars) in - fmap B.str (many1 strChar) + fmap B.str (many1Char strChar) bold :: PandocMonad m => CRLParser m B.Inlines bold = B.strong . mconcat <$> diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 3f44f83f8..ade9d27a3 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.DocBook Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -536,20 +537,22 @@ instance Default DBState where readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readDocBook _ inp = do - let tree = normalizeTree . parseXML . handleInstructions - $ T.unpack $ crFilter inp + let tree = normalizeTree . parseXML . handleInstructions $ crFilter inp (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree return $ Pandoc (dbMeta st') (toList . mconcat $ bs) -- We treat <?asciidoc-br?> specially (issue #1236), converting it -- to <br/>, since xml-light doesn't parse the instruction correctly. -- Other xml instructions are simply removed from the input stream. -handleInstructions :: String -> String -handleInstructions ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions xs -handleInstructions xs = case break (=='<') xs of +handleInstructions :: Text -> Text +handleInstructions = T.pack . handleInstructions' . T.unpack + +handleInstructions' :: String -> String +handleInstructions' ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions' xs +handleInstructions' xs = case break (=='<') xs of (ys, []) -> ys - ([], '<':zs) -> '<' : handleInstructions zs - (ys, zs) -> ys ++ handleInstructions zs + ([], '<':zs) -> '<' : handleInstructions' zs + (ys, zs) -> ys ++ handleInstructions' zs getFigure :: PandocMonad m => Element -> DB m Blocks getFigure e = do @@ -580,13 +583,13 @@ convertEntity :: String -> String convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) -- convenience function to get an attribute value, defaulting to "" -attrValue :: String -> Element -> String +attrValue :: String -> Element -> Text attrValue attr elt = - fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) + maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) -- convenience function -named :: String -> Element -> Bool -named s e = qName (elName e) == s +named :: Text -> Element -> Bool +named s e = qName (elName e) == T.unpack s -- @@ -611,7 +614,7 @@ addMetadataFromElement e = do [z] -> getInlines z >>= addMeta fieldname zs -> mapM getInlines zs >>= addMeta fieldname -addMeta :: PandocMonad m => ToMetaValue a => String -> a -> DB m () +addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> DB m () addMeta field val = modify (setMeta field val) instance HasMeta DBState where @@ -638,10 +641,8 @@ admonitionTags :: [String] admonitionTags = ["important","caution","note","tip","warning"] -- Trim leading and trailing newline characters -trimNl :: String -> String -trimNl = reverse . go . reverse . go - where go ('\n':xs) = xs - go xs = xs +trimNl :: Text -> Text +trimNl = T.dropAround (== '\n') -- meld text into beginning of first paragraph of Blocks. -- assumes Blocks start with a Para; if not, does nothing. @@ -668,7 +669,7 @@ getMediaobject e = do h = case atVal "depth" of "" -> [] d -> [("height", d)] - atr = (atVal "id", words $ atVal "role", w ++ h) + atr = (atVal "id", T.words $ atVal "role", w ++ h) in return (atVal "fileref", atr) let getCaption el = case filterChild (\x -> named "caption" x || named "textobject" x @@ -691,8 +692,8 @@ parseBlock :: PandocMonad m => Content -> DB m Blocks parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE parseBlock (Text (CData _ s _)) = if all isSpace s then return mempty - else return $ plain $ trimInlines $ text s -parseBlock (CRef x) = return $ plain $ str $ map toUpper x + else return $ plain $ trimInlines $ text $ T.pack s +parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x parseBlock (Elem e) = case qName (elName e) of "toc" -> skip -- skip TOC, since in pandoc it's autogenerated @@ -740,7 +741,7 @@ parseBlock (Elem e) = "refsect2" -> sect 2 "refsect3" -> sect 3 "refsection" -> gets dbSectionLevel >>= sect . (+1) - l@_ | l `elem` admonitionTags -> parseAdmonition l + l@_ | l `elem` admonitionTags -> parseAdmonition $ T.pack l "area" -> skip "areaset" -> skip "areaspec" -> skip @@ -800,7 +801,7 @@ parseBlock (Elem e) = "subtitle" -> return mempty -- handled in parent element _ -> skip >> getBlocks e where skip = do - lift $ report $ IgnoredElement $ qName (elName e) + lift $ report $ IgnoredElement $ T.pack $ qName (elName e) return mempty parseMixed container conts = do @@ -818,7 +819,7 @@ parseBlock (Elem e) = "" -> [] x -> [x] return $ codeBlockWith (attrValue "id" e, classes', []) - $ trimNl $ strContentRecursive e + $ trimNl $ T.pack $ strContentRecursive e parseBlockquote = do attrib <- case filterChild (named "attribution") e of Nothing -> return mempty @@ -871,9 +872,9 @@ parseBlock (Elem e) = _ -> AlignDefault let toWidth c = case findAttr (unqual "colwidth") c of Just w -> fromMaybe 0 - $ safeRead $ '0': filter (\x -> + $ safeRead $ "0" <> T.filter (\x -> (x >= '0' && x <= '9') - || x == '.') w + || x == '.') (T.pack w) Nothing -> 0 :: Double let numrows = case bodyrows of [] -> 0 @@ -938,9 +939,9 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing elementToStr x = x parseInline :: PandocMonad m => Content -> DB m Inlines -parseInline (Text (CData _ s _)) = return $ text s +parseInline (Text (CData _ s _)) = return $ text $ T.pack s parseInline (CRef ref) = - return $ maybe (text $ map toUpper ref) text $ lookupEntity ref + return $ maybe (text $ T.toUpper $ T.pack ref) (text . T.pack) $ lookupEntity ref parseInline (Elem e) = case qName (elName e) of "equation" -> equation e displayMath @@ -980,7 +981,7 @@ parseInline (Elem e) = "constant" -> codeWithLang "userinput" -> codeWithLang "varargs" -> return $ code "(...)" - "keycap" -> return (str $ strContent e) + "keycap" -> return (str $ T.pack $ strContent e) "keycombo" -> keycombo <$> mapM parseInline (elContent e) "menuchoice" -> menuchoice <$> @@ -992,20 +993,20 @@ parseInline (Elem e) = let title = case attrValue "endterm" e of "" -> maybe "???" xrefTitleByElem (findElementById linkend content) - endterm -> maybe "???" strContent + endterm -> maybe "???" (T.pack . strContent) (findElementById endterm content) - return $ link ('#' : linkend) "" (text title) - "email" -> return $ link ("mailto:" ++ strContent e) "" - $ str $ strContent e - "uri" -> return $ link (strContent e) "" $ str $ strContent e + return $ link ("#" <> linkend) "" (text title) + "email" -> return $ link ("mailto:" <> T.pack (strContent e)) "" + $ str $ T.pack $ strContent e + "uri" -> return $ link (T.pack $ strContent e) "" $ str $ T.pack $ strContent e "ulink" -> link (attrValue "url" e) "" <$> innerInlines "link" -> do ils <- innerInlines let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of - Just h -> h - _ -> '#' : attrValue "linkend" e + Just h -> T.pack h + _ -> "#" <> attrValue "linkend" e let ils' = if ils == mempty then str href else ils - let attr = (attrValue "id" e, words $ attrValue "role" e, []) + let attr = (attrValue "id" e, T.words $ attrValue "role" e, []) return $ linkWith attr href "" ils' "foreignphrase" -> emph <$> innerInlines "emphasis" -> case attrValue "role" e of @@ -1023,7 +1024,7 @@ parseInline (Elem e) = "br" -> return linebreak _ -> skip >> innerInlines where skip = do - lift $ report $ IgnoredElement $ qName (elName e) + lift $ report $ IgnoredElement $ T.pack $ qName (elName e) return mempty innerInlines = (trimInlines . mconcat) <$> @@ -1032,7 +1033,7 @@ parseInline (Elem e) = let classes' = case attrValue "language" e of "" -> [] l -> [l] - return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e + return $ codeWith (attrValue "id" e,classes',[]) $ T.pack $ strContentRecursive e simpleList = (mconcat . intersperse (str "," <> space)) <$> mapM getInlines (filterChildren (named "member") e) segmentedList = do @@ -1062,8 +1063,8 @@ parseInline (Elem e) = -- if there's no such attribute, employ some heuristics based on what -- docbook-xsl does. xrefTitleByElem el - | not (null xrefLabel) = xrefLabel - | otherwise = case qName (elName el) of + | not (T.null xrefLabel) = xrefLabel + | otherwise = case qName (elName el) of "chapter" -> descendantContent "title" el "section" -> descendantContent "title" el "sect1" -> descendantContent "title" el @@ -1073,10 +1074,10 @@ parseInline (Elem e) = "sect5" -> descendantContent "title" el "cmdsynopsis" -> descendantContent "command" el "funcsynopsis" -> descendantContent "function" el - _ -> qName (elName el) ++ "_title" + _ -> T.pack $ qName (elName el) ++ "_title" where xrefLabel = attrValue "xreflabel" el - descendantContent name = maybe "???" strContent + descendantContent name = maybe "???" (T.pack . strContent) . filterElementName (\n -> qName n == name) -- | Extract a math equation from an element @@ -1088,20 +1089,20 @@ equation :: Monad m => Element -- ^ The element from which to extract a mathematical equation - -> (String -> Inlines) + -> (Text -> Inlines) -- ^ A constructor for some Inlines, taking the TeX code as input -> m Inlines equation e constructor = - return $ mconcat $ map constructor $ mathMLEquations ++ latexEquations + return $ mconcat $ map constructor $ mathMLEquations <> latexEquations where - mathMLEquations :: [String] + mathMLEquations :: [Text] mathMLEquations = map writeTeX $ rights $ readMath (\x -> qName (elName x) == "math" && qPrefix (elName x) == Just "mml") - (readMathML . showElement) + (readMathML . T.pack . showElement) - latexEquations :: [String] + latexEquations :: [Text] latexEquations = readMath (\x -> qName (elName x) == "mathphrase") - (concat . fmap showVerbatimCData . elContent) + (T.concat . fmap showVerbatimCData . elContent) readMath :: (Element -> Bool) -> (Element -> b) -> [b] readMath childPredicate fromElement = @@ -1111,9 +1112,10 @@ equation e constructor = -- | Get the actual text stored in a CData block. 'showContent' -- returns the text still surrounded by the [[CDATA]] tags. -showVerbatimCData :: Content -> String -showVerbatimCData (Text (CData _ d _)) = d -showVerbatimCData c = showContent c +showVerbatimCData :: Content -> Text +showVerbatimCData (Text (CData _ d _)) = T.pack d +showVerbatimCData c = T.pack $ showContent c + -- | Set the prefix of a name to 'Nothing' removePrefix :: QName -> QName diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 387c3c7e2..cd4ff01db 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -4,6 +4,7 @@ {-# LANGUAGE PatternGuards #-} {-# LANGUAGE MultiWayIf #-} {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.Docx Copyright : Copyright (C) 2014-2019 Jesse Rosenthal @@ -68,12 +69,12 @@ import Data.Default (Default) import Data.List (delete, intersect) import Data.Char (isSpace) import qualified Data.Map as M +import qualified Data.Text as T import Data.Maybe (isJust, fromMaybe) import Data.Sequence (ViewL (..), viewl) import qualified Data.Sequence as Seq import qualified Data.Set as Set import Text.Pandoc.Builder --- import Text.Pandoc.Definition import Text.Pandoc.MediaBag (MediaBag) import Text.Pandoc.Options import Text.Pandoc.Readers.Docx.Combine @@ -101,14 +102,14 @@ readDocx opts bytes readDocx _ _ = throwError $ PandocSomeError "couldn't parse docx file" -data DState = DState { docxAnchorMap :: M.Map String String - , docxAnchorSet :: Set.Set String - , docxImmedPrevAnchor :: Maybe String +data DState = DState { docxAnchorMap :: M.Map T.Text T.Text + , docxAnchorSet :: Set.Set T.Text + , docxImmedPrevAnchor :: Maybe T.Text , docxMediaBag :: MediaBag , docxDropCap :: Inlines -- keep track of (numId, lvl) values for -- restarting - , docxListState :: M.Map (String, String) Integer + , docxListState :: M.Map (T.Text, T.Text) Integer , docxPrevPara :: Inlines } @@ -142,7 +143,7 @@ spansToKeep = [] divsToKeep :: [ParaStyleName] divsToKeep = ["Definition", "Definition Term"] -metaStyles :: M.Map ParaStyleName String +metaStyles :: M.Map ParaStyleName T.Text metaStyles = M.fromList [ ("Title", "title") , ("Subtitle", "subtitle") , ("Author", "author") @@ -167,7 +168,7 @@ isEmptyPar (Paragraph _ parParts) = isEmptyElem _ = True isEmptyPar _ = False -bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map String MetaValue) +bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map T.Text MetaValue) bodyPartsToMeta' [] = return M.empty bodyPartsToMeta' (bp : bps) | (Paragraph pPr parParts) <- bp @@ -232,22 +233,22 @@ runElemToInlines Tab = space runElemToInlines SoftHyphen = text "\xad" runElemToInlines NoBreakHyphen = text "\x2011" -runElemToString :: RunElem -> String -runElemToString (TextRun s) = s -runElemToString LnBrk = ['\n'] -runElemToString Tab = ['\t'] -runElemToString SoftHyphen = ['\xad'] -runElemToString NoBreakHyphen = ['\x2011'] +runElemToText :: RunElem -> T.Text +runElemToText (TextRun s) = s +runElemToText LnBrk = T.singleton '\n' +runElemToText Tab = T.singleton '\t' +runElemToText SoftHyphen = T.singleton '\xad' +runElemToText NoBreakHyphen = T.singleton '\x2011' -runToString :: Run -> String -runToString (Run _ runElems) = concatMap runElemToString runElems -runToString _ = "" +runToText :: Run -> T.Text +runToText (Run _ runElems) = T.concat $ map runElemToText runElems +runToText _ = "" -parPartToString :: ParPart -> String -parPartToString (PlainRun run) = runToString run -parPartToString (InternalHyperLink _ runs) = concatMap runToString runs -parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs -parPartToString _ = "" +parPartToText :: ParPart -> T.Text +parPartToText (PlainRun run) = runToText run +parPartToText (InternalHyperLink _ runs) = T.concat $ map runToText runs +parPartToText (ExternalHyperLink _ runs) = T.concat $ map runToText runs +parPartToText _ = "" blacklistedCharStyles :: [CharStyleName] blacklistedCharStyles = ["Hyperlink"] @@ -310,7 +311,7 @@ runToInlines :: PandocMonad m => Run -> DocxContext m Inlines runToInlines (Run rs runElems) | maybe False isCodeCharStyle $ rParentStyle rs = do rPr <- resolveDependentRunStyle rs - let codeString = code $ concatMap runElemToString runElems + let codeString = code $ T.concat $ map runElemToText runElems return $ case rVertAlign rPr of Just SupScrpt -> superscript codeString Just SubScrpt -> subscript codeString @@ -328,17 +329,17 @@ runToInlines (Endnote bps) = do return $ note blksList runToInlines (InlineDrawing fp title alt bs ext) = do (lift . lift) $ P.insertMedia fp Nothing bs - return $ imageWith (extentToAttr ext) fp title $ text alt + return $ imageWith (extentToAttr ext) (T.pack fp) title $ text alt runToInlines InlineChart = return $ spanWith ("", ["chart"], []) $ text "[CHART]" extentToAttr :: Extent -> Attr extentToAttr (Just (w, h)) = ("", [], [("width", showDim w), ("height", showDim h)] ) where - showDim d = show (d / 914400) ++ "in" + showDim d = tshow (d / 914400) <> "in" extentToAttr _ = nullAttr -blocksToInlinesWarn :: PandocMonad m => String -> Blocks -> DocxContext m Inlines +blocksToInlinesWarn :: PandocMonad m => T.Text -> Blocks -> DocxContext m Inlines blocksToInlinesWarn cmtId blks = do let blkList = toList blks notParaOrPlain :: Block -> Bool @@ -347,7 +348,7 @@ blocksToInlinesWarn cmtId blks = do notParaOrPlain _ = True unless ( not (any notParaOrPlain blkList)) $ lift $ P.report $ DocxParserWarning $ - "Docx comment " ++ cmtId ++ " will not retain formatting" + "Docx comment " <> cmtId <> " will not retain formatting" return $ blocksToInlines' blkList -- The majority of work in this function is done in the primed @@ -440,12 +441,12 @@ parPartToInlines' (BookMark _ anchor) = return $ spanWith (newAnchor, ["anchor"], []) mempty parPartToInlines' (Drawing fp title alt bs ext) = do (lift . lift) $ P.insertMedia fp Nothing bs - return $ imageWith (extentToAttr ext) fp title $ text alt + return $ imageWith (extentToAttr ext) (T.pack fp) title $ text alt parPartToInlines' Chart = return $ spanWith ("", ["chart"], []) $ text "[CHART]" parPartToInlines' (InternalHyperLink anchor runs) = do ils <- smushInlines <$> mapM runToInlines runs - return $ link ('#' : anchor) "" ils + return $ link ("#" <> anchor) "" ils parPartToInlines' (ExternalHyperLink target runs) = do ils <- smushInlines <$> mapM runToInlines runs return $ link target "" ils @@ -463,7 +464,7 @@ isAnchorSpan (Span (_, classes, kvs) _) = null kvs isAnchorSpan _ = False -dummyAnchors :: [String] +dummyAnchors :: [T.Text] dummyAnchors = ["_GoBack"] makeHeaderAnchor :: PandocMonad m => Blocks -> DocxContext m Blocks @@ -477,7 +478,7 @@ makeHeaderAnchor' (Header n (ident, classes, kvs) ils) , (Span (anchIdent, ["anchor"], _) cIls) <- c = do hdrIDMap <- gets docxAnchorMap exts <- readerExtensions <$> asks docxOptions - let newIdent = if null ident + let newIdent = if T.null ident then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap) else ident newIls = concatMap f ils where f il | il == c = cIls @@ -490,7 +491,7 @@ makeHeaderAnchor' (Header n (ident, classes, kvs) ils) = do hdrIDMap <- gets docxAnchorMap exts <- readerExtensions <$> asks docxOptions - let newIdent = if null ident + let newIdent = if T.null ident then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap) else ident modify $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap} @@ -558,8 +559,8 @@ parStyleToTransform pPr else transform parStyleToTransform _ = return id -normalizeToClassName :: (FromStyleName a) => a -> String -normalizeToClassName = map go . fromStyleName +normalizeToClassName :: (FromStyleName a) => a -> T.Text +normalizeToClassName = T.map go . fromStyleName where go c | isSpace c = '-' | otherwise = c @@ -574,7 +575,8 @@ bodyPartToBlocks (Paragraph pPr parparts) return $ transform $ codeBlock $ - concatMap parPartToString parparts + T.concat $ + map parPartToText parparts | Just (style, n) <- pHeading pPr = do ils <-local (\s-> s{docxInHeaderBlock=True}) (smushInlines <$> mapM parPartToInlines parparts) @@ -646,7 +648,7 @@ bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do , ("num-id", numId) , ("format", fmt) , ("text", txt) - , ("start", show start) + , ("start", tshow start) ] modify $ \st -> st{ docxListState = -- expire all the continuation data for lists of level > this one: @@ -705,12 +707,12 @@ bodyPartToBlocks (OMathPara e) = -- replace targets with generated anchors. rewriteLink' :: PandocMonad m => Inline -> DocxContext m Inline -rewriteLink' l@(Link attr ils ('#':target, title)) = do +rewriteLink' l@(Link attr ils (T.uncons -> Just ('#',target), title)) = do anchorMap <- gets docxAnchorMap case M.lookup target anchorMap of Just newTarget -> do modify $ \s -> s{docxAnchorSet = Set.insert newTarget (docxAnchorSet s)} - return $ Link attr ils ('#':newTarget, title) + return $ Link attr ils ("#" <> newTarget, title) Nothing -> do modify $ \s -> s{docxAnchorSet = Set.insert target (docxAnchorSet s)} return l diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs index da40a80ea..82791d669 100644 --- a/src/Text/Pandoc/Readers/Docx/Combine.hs +++ b/src/Text/Pandoc/Readers/Docx/Combine.hs @@ -1,7 +1,8 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE TypeSynonymInstances #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.Combine Copyright : © 2014-2019 Jesse Rosenthal <jrosenthal@jhu.edu>, diff --git a/src/Text/Pandoc/Readers/Docx/Fields.hs b/src/Text/Pandoc/Readers/Docx/Fields.hs index e7a916f1c..05d9dd697 100644 --- a/src/Text/Pandoc/Readers/Docx/Fields.hs +++ b/src/Text/Pandoc/Readers/Docx/Fields.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.Fields Copyright : Copyright (C) 2014-2019 Jesse Rosenthal @@ -16,16 +17,18 @@ module Text.Pandoc.Readers.Docx.Fields ( FieldInfo(..) ) where import Prelude +import Data.Functor (($>)) +import qualified Data.Text as T import Text.Parsec -import Text.Parsec.String (Parser) +import Text.Parsec.Text (Parser) -type URL = String +type URL = T.Text data FieldInfo = HyperlinkField URL | UnknownField deriving (Show) -parseFieldInfo :: String -> Either ParseError FieldInfo +parseFieldInfo :: T.Text -> Either ParseError FieldInfo parseFieldInfo = parse fieldInfo "" fieldInfo :: Parser FieldInfo @@ -34,31 +37,31 @@ fieldInfo = <|> return UnknownField -escapedQuote :: Parser String -escapedQuote = string "\\\"" +escapedQuote :: Parser T.Text +escapedQuote = string "\\\"" $> "\\\"" -inQuotes :: Parser String +inQuotes :: Parser T.Text inQuotes = - (try escapedQuote) <|> (anyChar >>= (\c -> return [c])) + (try escapedQuote) <|> (anyChar >>= (\c -> return $ T.singleton c)) -quotedString :: Parser String +quotedString :: Parser T.Text quotedString = do char '"' - concat <$> manyTill inQuotes (try (char '"')) + T.concat <$> manyTill inQuotes (try (char '"')) -unquotedString :: Parser String -unquotedString = manyTill anyChar (try $ lookAhead space *> return () <|> eof) +unquotedString :: Parser T.Text +unquotedString = T.pack <$> manyTill anyChar (try $ lookAhead space *> return () <|> eof) -fieldArgument :: Parser String +fieldArgument :: Parser T.Text fieldArgument = quotedString <|> unquotedString -- there are other switches, but this is the only one I've seen in the wild so far, so it's the first one I'll implement. See §17.16.5.25 -hyperlinkSwitch :: Parser (String, String) +hyperlinkSwitch :: Parser (T.Text, T.Text) hyperlinkSwitch = do sw <- string "\\l" spaces farg <- fieldArgument - return (sw, farg) + return (T.pack sw, farg) hyperlink :: Parser URL hyperlink = do @@ -68,6 +71,6 @@ hyperlink = do farg <- fieldArgument switches <- spaces *> many hyperlinkSwitch let url = case switches of - ("\\l", s) : _ -> farg ++ ('#': s) + ("\\l", s) : _ -> farg <> "#" <> s _ -> farg return url diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs index eb24640c5..b7b7a3835 100644 --- a/src/Text/Pandoc/Readers/Docx/Lists.hs +++ b/src/Text/Pandoc/Readers/Docx/Lists.hs @@ -22,6 +22,7 @@ import Prelude import Data.List import Data.Maybe import Data.String (fromString) +import qualified Data.Text as T import Text.Pandoc.Generic (bottomUp) import Text.Pandoc.JSON import Text.Pandoc.Readers.Docx.Parse (ParaStyleName) @@ -45,20 +46,20 @@ getNumId _ = Nothing getNumIdN :: Block -> Integer getNumIdN b = fromMaybe (-1) (getNumId b) -getText :: Block -> Maybe String +getText :: Block -> Maybe T.Text getText (Div (_, _, kvs) _) = lookup "text" kvs getText _ = Nothing data ListType = Itemized | Enumerated ListAttributes -listStyleMap :: [(String, ListNumberStyle)] +listStyleMap :: [(T.Text, ListNumberStyle)] listStyleMap = [("upperLetter", UpperAlpha), ("lowerLetter", LowerAlpha), ("upperRoman", UpperRoman), ("lowerRoman", LowerRoman), ("decimal", Decimal)] -listDelimMap :: [(String, ListNumberDelim)] +listDelimMap :: [(T.Text, ListNumberDelim)] listDelimMap = [("%1)", OneParen), ("(%1)", TwoParens), ("%1.", Period)] @@ -82,11 +83,11 @@ getListType b@(Div (_, _, kvs) _) | isListItem b = _ -> Nothing getListType _ = Nothing -listParagraphDivs :: [String] +listParagraphDivs :: [T.Text] listParagraphDivs = ["list-paragraph"] listParagraphStyles :: [ParaStyleName] -listParagraphStyles = map fromString listParagraphDivs +listParagraphStyles = map (fromString . T.unpack) listParagraphDivs -- This is a first stab at going through and attaching meaning to list -- paragraphs, without an item marker, following a list item. We diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 889bd80fc..8598ada6f 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.Parse Copyright : Copyright (C) 2014-2019 Jesse Rosenthal @@ -63,6 +64,7 @@ import qualified Data.ByteString.Lazy as B import Data.Char (chr, ord, readLitChar) import Data.List import qualified Data.Map as M +import qualified Data.Text as T import Data.Maybe import System.FilePath import Text.Pandoc.Readers.Docx.Util @@ -71,7 +73,7 @@ import Text.Pandoc.Shared (filteredFilesFromArchive, safeRead) import qualified Text.Pandoc.UTF8 as UTF8 import Text.TeXMath (Exp) import Text.TeXMath.Readers.OMML (readOMML) -import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, stringToFont) +import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont) import Text.XML.Light import qualified Text.XML.Light.Cursor as XMLC @@ -88,7 +90,7 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes } deriving Show -data ReaderState = ReaderState { stateWarnings :: [String] +data ReaderState = ReaderState { stateWarnings :: [T.Text] , stateFldCharState :: FldCharState } deriving Show @@ -119,7 +121,6 @@ eitherToD (Left _) = throwError DocxError concatMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b] concatMapM f xs = liftM concat (mapM f xs) - -- This is similar to `mapMaybe`: it maps a function returning the D -- monad over a list, and only keeps the non-erroring return values. mapD :: (a -> D b) -> [a] -> D [b] @@ -178,18 +179,18 @@ type ParStyleMap = M.Map ParaStyleId ParStyle data Numbering = Numbering NameSpaces [Numb] [AbstractNumb] deriving Show -data Numb = Numb String String [LevelOverride] +data Numb = Numb T.Text T.Text [LevelOverride] deriving Show -- ilvl startOverride lvl -data LevelOverride = LevelOverride String (Maybe Integer) (Maybe Level) +data LevelOverride = LevelOverride T.Text (Maybe Integer) (Maybe Level) deriving Show -data AbstractNumb = AbstractNumb String [Level] +data AbstractNumb = AbstractNumb T.Text [Level] deriving Show -- ilvl format string start -data Level = Level String String String (Maybe Integer) +data Level = Level T.Text T.Text T.Text (Maybe Integer) deriving Show data DocumentLocation = InDocument | InFootnote | InEndnote @@ -199,11 +200,11 @@ data Relationship = Relationship DocumentLocation RelId Target deriving Show data Notes = Notes NameSpaces - (Maybe (M.Map String Element)) - (Maybe (M.Map String Element)) + (Maybe (M.Map T.Text Element)) + (Maybe (M.Map T.Text Element)) deriving Show -data Comments = Comments NameSpaces (M.Map String Element) +data Comments = Comments NameSpaces (M.Map T.Text Element) deriving Show data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer @@ -238,8 +239,8 @@ defaultParagraphStyle = ParagraphStyle { pStyle = [] data BodyPart = Paragraph ParagraphStyle [ParPart] - | ListItem ParagraphStyle String String (Maybe Level) [ParPart] - | Tbl String TblGrid TblLook [Row] + | ListItem ParagraphStyle T.Text T.Text (Maybe Level) [ParPart] + | Tbl T.Text TblGrid TblLook [Row] | OMathPara [Exp] deriving Show @@ -279,7 +280,7 @@ data ParPart = PlainRun Run | BookMark BookMarkId Anchor | InternalHyperLink Anchor [Run] | ExternalHyperLink URL [Run] - | Drawing FilePath String String B.ByteString Extent -- title, alt + | Drawing FilePath T.Text T.Text B.ByteString Extent -- title, alt | Chart -- placeholder for now | PlainOMath [Exp] | Field FieldInfo [Run] @@ -290,28 +291,28 @@ data ParPart = PlainRun Run data Run = Run RunStyle [RunElem] | Footnote [BodyPart] | Endnote [BodyPart] - | InlineDrawing FilePath String String B.ByteString Extent -- title, alt + | InlineDrawing FilePath T.Text T.Text B.ByteString Extent -- title, alt | InlineChart -- placeholder deriving Show -data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen +data RunElem = TextRun T.Text | LnBrk | Tab | SoftHyphen | NoBreakHyphen deriving Show -type Target = String -type Anchor = String -type URL = String -type BookMarkId = String -type RelId = String -type ChangeId = String -type CommentId = String -type Author = String -type ChangeDate = String -type CommentDate = String +type Target = T.Text +type Anchor = T.Text +type URL = T.Text +type BookMarkId = T.Text +type RelId = T.Text +type ChangeId = T.Text +type CommentId = T.Text +type Author = T.Text +type ChangeDate = T.Text +type CommentDate = T.Text archiveToDocx :: Archive -> Either DocxError Docx archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive -archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [String]) +archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [T.Text]) archiveToDocxWithWarnings archive = do docXmlPath <- case getDocumentXmlPath archive of Just fp -> Right fp @@ -341,7 +342,7 @@ archiveToDocxWithWarnings archive = do Right doc -> Right (Docx doc, stateWarnings st) Left e -> Left e -getDocumentXmlPath :: Archive -> Maybe String +getDocumentXmlPath :: Archive -> Maybe FilePath getDocumentXmlPath zf = do entry <- findEntryByPath "_rels/.rels" zf relsElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry @@ -394,7 +395,7 @@ constructBogusParStyleData stName = ParStyle , numInfo = Nothing , psParentStyle = Nothing , pStyleName = stName - , pStyleId = ParaStyleId . filter (/=' ') . fromStyleName $ stName + , pStyleId = ParaStyleId . T.filter (/=' ') . fromStyleName $ stName } archiveToNotes :: Archive -> Notes @@ -441,8 +442,8 @@ filePathToRelType path docXmlPath = relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship relElemToRelationship relType element | qName (elName element) == "Relationship" = do - relId <- findAttr (QName "Id" Nothing Nothing) element - target <- findAttr (QName "Target" Nothing Nothing) element + relId <- findAttrText (QName "Id" Nothing Nothing) element + target <- findAttrText (QName "Target" Nothing Nothing) element return $ Relationship relType relId target relElemToRelationship _ _ = Nothing @@ -464,7 +465,7 @@ filePathIsMedia fp = in (dir == "word/media/") -lookupLevel :: String -> String -> Numbering -> Maybe Level +lookupLevel :: T.Text -> T.Text -> Numbering -> Maybe Level lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do (absNumId, ovrrides) <- lookup numId $ map (\(Numb nid absnumid ovrRides) -> (nid, (absnumid, ovrRides))) numbs @@ -483,7 +484,7 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do loElemToLevelOverride :: NameSpaces -> Element -> Maybe LevelOverride loElemToLevelOverride ns element | isElem ns "w" "lvlOverride" element = do - ilvl <- findAttrByName ns "w" "ilvl" element + ilvl <- findAttrTextByName ns "w" "ilvl" element let startOverride = findChildByName ns "w" "startOverride" element >>= findAttrByName ns "w" "val" >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) @@ -495,9 +496,9 @@ loElemToLevelOverride _ _ = Nothing numElemToNum :: NameSpaces -> Element -> Maybe Numb numElemToNum ns element | isElem ns "w" "num" element = do - numId <- findAttrByName ns "w" "numId" element + numId <- findAttrTextByName ns "w" "numId" element absNumId <- findChildByName ns "w" "abstractNumId" element - >>= findAttrByName ns "w" "val" + >>= findAttrTextByName ns "w" "val" let lvlOverrides = mapMaybe (loElemToLevelOverride ns) (findChildrenByName ns "w" "lvlOverride" element) @@ -507,7 +508,7 @@ numElemToNum _ _ = Nothing absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb absNumElemToAbsNum ns element | isElem ns "w" "abstractNum" element = do - absNumId <- findAttrByName ns "w" "abstractNumId" element + absNumId <- findAttrTextByName ns "w" "abstractNumId" element let levelElems = findChildrenByName ns "w" "lvl" element levels = mapMaybe (levelElemToLevel ns) levelElems return $ AbstractNumb absNumId levels @@ -516,11 +517,11 @@ absNumElemToAbsNum _ _ = Nothing levelElemToLevel :: NameSpaces -> Element -> Maybe Level levelElemToLevel ns element | isElem ns "w" "lvl" element = do - ilvl <- findAttrByName ns "w" "ilvl" element + ilvl <- findAttrTextByName ns "w" "ilvl" element fmt <- findChildByName ns "w" "numFmt" element - >>= findAttrByName ns "w" "val" + >>= findAttrTextByName ns "w" "val" txt <- findChildByName ns "w" "lvlText" element - >>= findAttrByName ns "w" "val" + >>= findAttrTextByName ns "w" "val" let start = findChildByName ns "w" "start" element >>= findAttrByName ns "w" "val" >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) @@ -544,11 +545,11 @@ archiveToNumbering :: Archive -> Numbering archiveToNumbering archive = fromMaybe (Numbering [] [] []) (archiveToNumbering' archive) -elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map String Element) +elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map T.Text Element) elemToNotes ns notetype element - | isElem ns "w" (notetype ++ "s") element = + | isElem ns "w" (notetype <> "s") element = let pairs = mapMaybe - (\e -> findAttrByName ns "w" "id" e >>= + (\e -> findAttrTextByName ns "w" "id" e >>= (\a -> Just (a, e))) (findChildrenByName ns "w" notetype element) in @@ -556,11 +557,11 @@ elemToNotes ns notetype element M.fromList pairs elemToNotes _ _ _ = Nothing -elemToComments :: NameSpaces -> Element -> M.Map String Element +elemToComments :: NameSpaces -> Element -> M.Map T.Text Element elemToComments ns element | isElem ns "w" "comments" element = let pairs = mapMaybe - (\e -> findAttrByName ns "w" "id" e >>= + (\e -> findAttrTextByName ns "w" "id" e >>= (\a -> Just (a, e))) (findChildrenByName ns "w" "comment" element) in @@ -632,7 +633,7 @@ testBitMask bitMaskS n = pHeading :: ParagraphStyle -> Maybe (ParaStyleName, Int) pHeading = getParStyleField headingLev . pStyle -pNumInfo :: ParagraphStyle -> Maybe (String, String) +pNumInfo :: ParagraphStyle -> Maybe (T.Text, T.Text) pNumInfo = getParStyleField numInfo . pStyle elemToBodyPart :: NameSpaces -> Element -> D BodyPart @@ -640,7 +641,7 @@ elemToBodyPart ns element | isElem ns "w" "p" element , (c:_) <- findChildrenByName ns "m" "oMathPara" element = do - expsLst <- eitherToD $ readOMML $ showElement c + expsLst <- eitherToD $ readOMML $ T.pack $ showElement c return $ OMathPara expsLst elemToBodyPart ns element | isElem ns "w" "p" element @@ -664,7 +665,7 @@ elemToBodyPart ns element | isElem ns "w" "tbl" element = do let caption' = findChildByName ns "w" "tblPr" element >>= findChildByName ns "w" "tblCaption" - >>= findAttrByName ns "w" "val" + >>= findAttrTextByName ns "w" "val" caption = fromMaybe "" caption' grid' = case findChildByName ns "w" "tblGrid" element of Just g -> elemToTblGrid ns g @@ -687,10 +688,10 @@ lookupRelationship docLocation relid rels = where pairs = map (\(Relationship loc relid' target) -> ((loc, relid'), target)) rels -expandDrawingId :: String -> D (FilePath, B.ByteString) +expandDrawingId :: T.Text -> D (FilePath, B.ByteString) expandDrawingId s = do location <- asks envLocation - target <- asks (lookupRelationship location s . envRelationships) + target <- asks (fmap T.unpack . lookupRelationship location s . envRelationships) case target of Just filepath -> do bytes <- asks (lookup ("word/" ++ filepath) . envMedia) @@ -699,12 +700,12 @@ expandDrawingId s = do Nothing -> throwError DocxError Nothing -> throwError DocxError -getTitleAndAlt :: NameSpaces -> Element -> (String, String) +getTitleAndAlt :: NameSpaces -> Element -> (T.Text, T.Text) getTitleAndAlt ns element = let mbDocPr = findChildByName ns "wp" "inline" element >>= findChildByName ns "wp" "docPr" - title = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "title") - alt = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "descr") + title = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "title") + alt = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "descr") in (title, alt) elemToParPart :: NameSpaces -> Element -> D ParPart @@ -716,7 +717,7 @@ elemToParPart ns element = let (title, alt) = getTitleAndAlt ns drawingElem a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem - >>= findAttrByName ns "r" "embed" + >>= findAttrTextByName ns "r" "embed" in case drawing of Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem) @@ -726,7 +727,7 @@ elemToParPart ns element | isElem ns "w" "r" element , Just _ <- findChildByName ns "w" "pict" element = let drawing = findElement (elemName ns "v" "imagedata") element - >>= findAttrByName ns "r" "id" + >>= findAttrTextByName ns "r" "id" in case drawing of -- Todo: check out title and attr for deprecated format. @@ -795,7 +796,7 @@ elemToParPart ns element fldCharState <- gets stateFldCharState case fldCharState of FldCharOpen -> do - info <- eitherToD $ parseFieldInfo $ strContent instrText + info <- eitherToD $ parseFieldInfo $ T.pack $ strContent instrText modify $ \st -> st{stateFldCharState = FldCharFieldInfo info} return NullParPart _ -> return NullParPart @@ -816,56 +817,56 @@ elemToParPart ns element return $ ChangedRuns change runs elemToParPart ns element | isElem ns "w" "bookmarkStart" element - , Just bmId <- findAttrByName ns "w" "id" element - , Just bmName <- findAttrByName ns "w" "name" element = + , Just bmId <- findAttrTextByName ns "w" "id" element + , Just bmName <- findAttrTextByName ns "w" "name" element = return $ BookMark bmId bmName elemToParPart ns element | isElem ns "w" "hyperlink" element - , Just relId <- findAttrByName ns "r" "id" element = do + , Just relId <- findAttrTextByName ns "r" "id" element = do location <- asks envLocation runs <- mapD (elemToRun ns) (elChildren element) rels <- asks envRelationships case lookupRelationship location relId rels of Just target -> - case findAttrByName ns "w" "anchor" element of - Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs + case findAttrTextByName ns "w" "anchor" element of + Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) runs Nothing -> return $ ExternalHyperLink target runs Nothing -> return $ ExternalHyperLink "" runs elemToParPart ns element | isElem ns "w" "hyperlink" element - , Just anchor <- findAttrByName ns "w" "anchor" element = do + , Just anchor <- findAttrTextByName ns "w" "anchor" element = do runs <- mapD (elemToRun ns) (elChildren element) return $ InternalHyperLink anchor runs elemToParPart ns element | isElem ns "w" "commentRangeStart" element - , Just cmtId <- findAttrByName ns "w" "id" element = do + , Just cmtId <- findAttrTextByName ns "w" "id" element = do (Comments _ commentMap) <- asks envComments case M.lookup cmtId commentMap of Just cmtElem -> elemToCommentStart ns cmtElem Nothing -> throwError WrongElem elemToParPart ns element | isElem ns "w" "commentRangeEnd" element - , Just cmtId <- findAttrByName ns "w" "id" element = + , Just cmtId <- findAttrTextByName ns "w" "id" element = return $ CommentEnd cmtId elemToParPart ns element | isElem ns "m" "oMath" element = - fmap PlainOMath (eitherToD $ readOMML $ showElement element) + fmap PlainOMath (eitherToD $ readOMML $ T.pack $ showElement element) elemToParPart _ _ = throwError WrongElem elemToCommentStart :: NameSpaces -> Element -> D ParPart elemToCommentStart ns element | isElem ns "w" "comment" element - , Just cmtId <- findAttrByName ns "w" "id" element - , Just cmtAuthor <- findAttrByName ns "w" "author" element - , Just cmtDate <- findAttrByName ns "w" "date" element = do + , Just cmtId <- findAttrTextByName ns "w" "id" element + , Just cmtAuthor <- findAttrTextByName ns "w" "author" element + , Just cmtDate <- findAttrTextByName ns "w" "date" element = do bps <- mapD (elemToBodyPart ns) (elChildren element) return $ CommentStart cmtId cmtAuthor cmtDate bps elemToCommentStart _ _ = throwError WrongElem -lookupFootnote :: String -> Notes -> Maybe Element +lookupFootnote :: T.Text -> Notes -> Maybe Element lookupFootnote s (Notes _ fns _) = fns >>= M.lookup s -lookupEndnote :: String -> Notes -> Maybe Element +lookupEndnote :: T.Text -> Notes -> Maybe Element lookupEndnote s (Notes _ _ ens) = ens >>= M.lookup s elemToExtent :: Element -> Extent @@ -876,7 +877,7 @@ elemToExtent drawingElem = where wp_ns = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem - >>= findAttr (QName at Nothing Nothing) >>= safeRead + >>= findAttr (QName at Nothing Nothing) >>= safeRead . T.pack childElemToRun :: NameSpaces -> Element -> D Run @@ -887,7 +888,7 @@ childElemToRun ns element = let (title, alt) = getTitleAndAlt ns element a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem - >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) + >>= findAttrText (QName "embed" (lookup "r" ns) (Just "r")) in case drawing of Just s -> expandDrawingId s >>= @@ -900,7 +901,7 @@ childElemToRun ns element = return InlineChart childElemToRun ns element | isElem ns "w" "footnoteReference" element - , Just fnId <- findAttrByName ns "w" "id" element = do + , Just fnId <- findAttrTextByName ns "w" "id" element = do notes <- asks envNotes case lookupFootnote fnId notes of Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e) @@ -908,7 +909,7 @@ childElemToRun ns element Nothing -> return $ Footnote [] childElemToRun ns element | isElem ns "w" "endnoteReference" element - , Just enId <- findAttrByName ns "w" "id" element = do + , Just enId <- findAttrTextByName ns "w" "id" element = do notes <- asks envNotes case lookupEndnote enId notes of Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e) @@ -961,15 +962,15 @@ getParStyleField _ _ = Nothing getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange getTrackedChange ns element | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element - , Just cId <- findAttrByName ns "w" "id" element - , Just cAuthor <- findAttrByName ns "w" "author" element - , Just cDate <- findAttrByName ns "w" "date" element = + , Just cId <- findAttrTextByName ns "w" "id" element + , Just cAuthor <- findAttrTextByName ns "w" "author" element + , Just cDate <- findAttrTextByName ns "w" "date" element = Just $ TrackedChange Insertion (ChangeInfo cId cAuthor cDate) getTrackedChange ns element | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element - , Just cId <- findAttrByName ns "w" "id" element - , Just cAuthor <- findAttrByName ns "w" "author" element - , Just cDate <- findAttrByName ns "w" "date" element = + , Just cId <- findAttrTextByName ns "w" "id" element + , Just cAuthor <- findAttrTextByName ns "w" "author" element + , Just cDate <- findAttrTextByName ns "w" "date" element = Just $ TrackedChange Deletion (ChangeInfo cId cAuthor cDate) getTrackedChange _ _ = Nothing @@ -978,7 +979,7 @@ elemToParagraphStyle ns element sty | Just pPr <- findChildByName ns "w" "pPr" element = let style = mapMaybe - (fmap ParaStyleId . findAttrByName ns "w" "val") + (fmap ParaStyleId . findAttrTextByName ns "w" "val") (findChildrenByName ns "w" "pStyle" pPr) in ParagraphStyle {pStyle = mapMaybe (`M.lookup` sty) style @@ -1010,7 +1011,7 @@ elemToRunStyleD ns element charStyles <- asks envCharStyles let parentSty = findChildByName ns "w" "rStyle" rPr >>= - findAttrByName ns "w" "val" >>= + findAttrTextByName ns "w" "val" >>= flip M.lookup charStyles . CharStyleId return $ elemToRunStyle ns element parentSty elemToRunStyleD _ _ = return defaultRunStyle @@ -1020,12 +1021,12 @@ elemToRunElem ns element | isElem ns "w" "t" element || isElem ns "w" "delText" element || isElem ns "m" "t" element = do - let str = strContent element + let str = T.pack $ strContent element font <- asks envFont case font of Nothing -> return $ TextRun str Just f -> return . TextRun $ - map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str + T.map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str | isElem ns "w" "br" element = return LnBrk | isElem ns "w" "tab" element = return Tab | isElem ns "w" "softHyphen" element = return SoftHyphen @@ -1043,11 +1044,11 @@ getSymChar ns element | Just s <- lowerFromPrivate <$> getCodepoint , Just font <- getFont = case readLitChar ("\\x" ++ s) of - [(char, _)] -> TextRun . maybe "" (:[]) $ getUnicode font char + [(char, _)] -> TextRun . maybe "" T.singleton $ getUnicode font char _ -> TextRun "" where getCodepoint = findAttrByName ns "w" "char" element - getFont = stringToFont =<< findAttrByName ns "w" "font" element + getFont = textToFont . T.pack =<< findAttrByName ns "w" "font" element lowerFromPrivate ('F':xs) = '0':xs lowerFromPrivate xs = xs getSymChar _ _ = TextRun "" @@ -1059,7 +1060,7 @@ elemToRunElems ns element let qualName = elemName ns "w" let font = do fontElem <- findElement (qualName "rFonts") element - stringToFont =<< + textToFont . T.pack =<< foldr ((<|>) . (flip findAttr fontElem . qualName)) Nothing ["ascii", "hAnsi"] local (setFont font) (mapD (elemToRunElem ns) (elChildren element)) elemToRunElems _ _ = throwError WrongElem diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs index ac2d6fa07..f81707e92 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs @@ -3,6 +3,7 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.Parse.Styles Copyright : Copyright (C) 2014-2019 Jesse Rosenthal @@ -46,20 +47,19 @@ import Prelude import Codec.Archive.Zip import Control.Applicative ((<|>)) import Control.Monad.Except -import Data.Char (toLower) -import Data.List import Data.Function (on) import Data.String (IsString(..)) import qualified Data.Map as M +import qualified Data.Text as T import Data.Maybe import Data.Coerce import Text.Pandoc.Readers.Docx.Util import qualified Text.Pandoc.UTF8 as UTF8 import Text.XML.Light -newtype CharStyleId = CharStyleId String +newtype CharStyleId = CharStyleId T.Text deriving (Show, Eq, Ord, IsString, FromStyleId) -newtype ParaStyleId = ParaStyleId String +newtype ParaStyleId = ParaStyleId T.Text deriving (Show, Eq, Ord, IsString, FromStyleId) newtype CharStyleName = CharStyleName CIString @@ -68,25 +68,31 @@ newtype ParaStyleName = ParaStyleName CIString deriving (Show, Eq, Ord, IsString, FromStyleName) -- Case-insensitive comparisons -newtype CIString = CIString String deriving (Show, IsString, FromStyleName) +newtype CIString = CIString T.Text deriving (Show, IsString, FromStyleName) class FromStyleName a where - fromStyleName :: a -> String + fromStyleName :: a -> T.Text instance FromStyleName String where + fromStyleName = T.pack + +instance FromStyleName T.Text where fromStyleName = id class FromStyleId a where - fromStyleId :: a -> String + fromStyleId :: a -> T.Text instance FromStyleId String where + fromStyleId = T.pack + +instance FromStyleId T.Text where fromStyleId = id instance Eq CIString where - (==) = (==) `on` map toLower . coerce + (==) = (==) `on` T.toCaseFold . coerce instance Ord CIString where - compare = compare `on` map toLower . coerce + compare = compare `on` T.toCaseFold . coerce data VertAlign = BaseLn | SupScrpt | SubScrpt deriving Show @@ -108,7 +114,7 @@ data RunStyle = RunStyle { isBold :: Maybe Bool deriving Show data ParStyle = ParStyle { headingLev :: Maybe (ParaStyleName, Int) - , numInfo :: Maybe (String, String) + , numInfo :: Maybe (T.Text, T.Text) , psParentStyle :: Maybe ParStyle , pStyleName :: ParaStyleName , pStyleId :: ParaStyleId @@ -146,7 +152,7 @@ isBasedOnStyle ns element parentStyle , Just styleType <- findAttrByName ns "w" "type" element , styleType == cStyleType parentStyle , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>= - findAttrByName ns "w" "val" + findAttrTextByName ns "w" "val" , Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps) | isElem ns "w" "style" element , Just styleType <- findAttrByName ns "w" "type" element @@ -234,7 +240,7 @@ checkOnOff _ _ _ = Nothing elemToCharStyle :: NameSpaces -> Element -> Maybe CharStyle -> Maybe CharStyle elemToCharStyle ns element parentStyle - = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element) + = CharStyle <$> (CharStyleId <$> findAttrTextByName ns "w" "styleId" element) <*> getElementStyleName ns element <*> (Just $ elemToRunStyle ns element parentStyle) @@ -267,32 +273,32 @@ elemToRunStyle _ _ _ = defaultRunStyle getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int) getHeaderLevel ns element | Just styleName <- getElementStyleName ns element - , Just n <- stringToInteger =<< - (stripPrefix "heading " . map toLower $ + , Just n <- stringToInteger . T.unpack =<< + (T.stripPrefix "heading " . T.toLower $ fromStyleName styleName) , n > 0 = Just (styleName, fromInteger n) getHeaderLevel _ _ = Nothing -getElementStyleName :: Coercible String a => NameSpaces -> Element -> Maybe a +getElementStyleName :: Coercible T.Text a => NameSpaces -> Element -> Maybe a getElementStyleName ns el = coerce <$> - ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val") - <|> findAttrByName ns "w" "styleId" el) + ((findChildByName ns "w" "name" el >>= findAttrTextByName ns "w" "val") + <|> findAttrTextByName ns "w" "styleId" el) -getNumInfo :: NameSpaces -> Element -> Maybe (String, String) +getNumInfo :: NameSpaces -> Element -> Maybe (T.Text, T.Text) getNumInfo ns element = do let numPr = findChildByName ns "w" "pPr" element >>= findChildByName ns "w" "numPr" lvl = fromMaybe "0" (numPr >>= findChildByName ns "w" "ilvl" >>= - findAttrByName ns "w" "val") + findAttrTextByName ns "w" "val") numId <- numPr >>= findChildByName ns "w" "numId" >>= - findAttrByName ns "w" "val" + findAttrTextByName ns "w" "val" return (numId, lvl) elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle elemToParStyleData ns element parentStyle - | Just styleId <- findAttrByName ns "w" "styleId" element + | Just styleId <- findAttrTextByName ns "w" "styleId" element , Just styleName <- getElementStyleName ns element = Just $ ParStyle { diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs index f4855efd2..0de1114bd 100644 --- a/src/Text/Pandoc/Readers/Docx/Util.hs +++ b/src/Text/Pandoc/Readers/Docx/Util.hs @@ -19,11 +19,14 @@ module Text.Pandoc.Readers.Docx.Util ( , elemToNameSpaces , findChildByName , findChildrenByName + , findAttrText , findAttrByName + , findAttrTextByName ) where import Prelude import Data.Maybe (mapMaybe) +import qualified Data.Text as T import Text.XML.Light type NameSpaces = [(String, String)] @@ -55,7 +58,13 @@ findChildrenByName ns pref name el = let ns' = ns ++ elemToNameSpaces el in findChildren (elemName ns' pref name) el +findAttrText :: QName -> Element -> Maybe T.Text +findAttrText x = fmap T.pack . findAttr x + findAttrByName :: NameSpaces -> String -> String -> Element -> Maybe String findAttrByName ns pref name el = let ns' = ns ++ elemToNameSpaces el in findAttr (elemName ns' pref name) el + +findAttrTextByName :: NameSpaces -> String -> String -> Element -> Maybe T.Text +findAttrTextByName a b c = fmap T.pack . findAttrByName a b c diff --git a/src/Text/Pandoc/Readers/DokuWiki.hs b/src/Text/Pandoc/Readers/DokuWiki.hs index 60d406df1..3a92cfa19 100644 --- a/src/Text/Pandoc/Readers/DokuWiki.hs +++ b/src/Text/Pandoc/Readers/DokuWiki.hs @@ -2,6 +2,7 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.DokuWiki Copyright : Copyright (C) 2018-2019 Alexander Krotov @@ -20,8 +21,7 @@ import Control.Monad import Control.Monad.Except (throwError) import Data.Char (isAlphaNum, isDigit) import qualified Data.Foldable as F -import Data.List (intercalate, transpose, isPrefixOf, isSuffixOf) -import Data.List.Split (splitOn) +import Data.List (transpose) import Data.Maybe (fromMaybe, catMaybes) import Data.Text (Text) import qualified Data.Text as T @@ -31,7 +31,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Error (PandocError (PandocParsecError)) import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed, nested) -import Text.Pandoc.Shared (crFilter, trim, underlineSpan) +import Text.Pandoc.Shared (crFilter, trim, underlineSpan, tshow) -- | Read DokuWiki from an input string and return a Pandoc document. readDokuWiki :: PandocMonad m @@ -42,7 +42,7 @@ readDokuWiki opts s = do let input = crFilter s res <- runParserT parseDokuWiki def {stateOptions = opts } "source" input case res of - Left e -> throwError $ PandocParsecError (T.unpack input) e + Left e -> throwError $ PandocParsecError input e Right d -> return d type DWParser = ParserT Text ParserState @@ -71,9 +71,9 @@ parseDokuWiki = B.doc . mconcat <$> many block <* spaces <* eof -- | Parse <code> and <file> attributes -codeLanguage :: PandocMonad m => DWParser m (String, [String], [(String, String)]) +codeLanguage :: PandocMonad m => DWParser m (Text, [Text], [(Text, Text)]) codeLanguage = try $ do - rawLang <- option "-" (spaceChar *> manyTill anyChar (lookAhead (spaceChar <|> char '>'))) + rawLang <- option "-" (spaceChar *> manyTillChar anyChar (lookAhead (spaceChar <|> char '>'))) let attr = case rawLang of "-" -> [] l -> [l] @@ -81,16 +81,16 @@ codeLanguage = try $ do -- | Generic parser for <code> and <file> tags codeTag :: PandocMonad m - => ((String, [String], [(String, String)]) -> String -> a) - -> String + => ((Text, [Text], [(Text, Text)]) -> Text -> a) + -> Text -> DWParser m a codeTag f tag = try $ f <$ char '<' - <* string tag + <* textStr tag <*> codeLanguage <* manyTill anyChar (char '>') <* optional (manyTill spaceChar eol) - <*> manyTill anyChar (try $ string "</" <* string tag <* char '>') + <*> manyTillChar anyChar (try $ string "</" <* textStr tag <* char '>') -- * Inline parsers @@ -167,19 +167,19 @@ underlined :: PandocMonad m => DWParser m B.Inlines underlined = try $ underlineSpan <$> enclosed (string "__") nestedInlines nowiki :: PandocMonad m => DWParser m B.Inlines -nowiki = try $ B.text <$ string "<nowiki>" <*> manyTill anyChar (try $ string "</nowiki>") +nowiki = try $ B.text <$ string "<nowiki>" <*> manyTillChar anyChar (try $ string "</nowiki>") percent :: PandocMonad m => DWParser m B.Inlines -percent = try $ B.text <$> enclosed (string "%%") nestedString +percent = try $ B.text <$> enclosed (string "%%") nestedText -nestedString :: (Show a, PandocMonad m) - => DWParser m a -> DWParser m String -nestedString end = innerSpace <|> count 1 nonspaceChar +nestedText :: (Show a, PandocMonad m) + => DWParser m a -> DWParser m Text +nestedText end = innerSpace <|> countChar 1 nonspaceChar where - innerSpace = try $ many1 spaceChar <* notFollowedBy end + innerSpace = try $ many1Char spaceChar <* notFollowedBy end monospaced :: PandocMonad m => DWParser m B.Inlines -monospaced = try $ B.code <$> enclosed (string "''") nestedString +monospaced = try $ B.code <$> enclosed (string "''") nestedText subscript :: PandocMonad m => DWParser m B.Inlines subscript = try $ B.subscript <$> between (string "<sub>") (try $ string "</sub>") nestedInlines @@ -201,12 +201,12 @@ inlineFile :: PandocMonad m => DWParser m B.Inlines inlineFile = codeTag B.codeWith "file" inlineHtml :: PandocMonad m => DWParser m B.Inlines -inlineHtml = try $ B.rawInline "html" <$ string "<html>" <*> manyTill anyChar (try $ string "</html>") +inlineHtml = try $ B.rawInline "html" <$ string "<html>" <*> manyTillChar anyChar (try $ string "</html>") inlinePhp :: PandocMonad m => DWParser m B.Inlines -inlinePhp = try $ B.codeWith ("", ["php"], []) <$ string "<php>" <*> manyTill anyChar (try $ string "</php>") +inlinePhp = try $ B.codeWith ("", ["php"], []) <$ string "<php>" <*> manyTillChar anyChar (try $ string "</php>") -makeLink :: (String, String) -> B.Inlines +makeLink :: (Text, Text) -> B.Inlines makeLink (text, url) = B.link url "" $ B.str text autoEmail :: PandocMonad m => DWParser m B.Inlines @@ -220,7 +220,7 @@ autoLink = try $ do state <- getState guard $ stateAllowLinks state (text, url) <- uri - guard $ checkLink (last url) + guard $ checkLink (T.last url) return $ makeLink (text, url) where checkLink c @@ -234,10 +234,10 @@ nocache :: PandocMonad m => DWParser m B.Inlines nocache = try $ mempty <$ string "~~NOCACHE~~" str :: PandocMonad m => DWParser m B.Inlines -str = B.str <$> (many1 alphaNum <|> count 1 characterReference) +str = B.str <$> (many1Char alphaNum <|> countChar 1 characterReference) symbol :: PandocMonad m => DWParser m B.Inlines -symbol = B.str <$> count 1 nonspaceChar +symbol = B.str <$> countChar 1 nonspaceChar link :: PandocMonad m => DWParser m B.Inlines link = try $ do @@ -248,77 +248,78 @@ link = try $ do setState $ st{ stateAllowLinks = True } return l -isExternalLink :: String -> Bool -isExternalLink s = - case dropWhile (\c -> isAlphaNum c || (c `elem` ['-', '.', '+'])) s of - (':':'/':'/':_) -> True - _ -> False - -isAbsolutePath :: String -> Bool -isAbsolutePath ('.':_) = False -isAbsolutePath s = ':' `elem` s - -normalizeDots :: String -> String -normalizeDots path@('.':_) = - case dropWhile (== '.') path of - ':':_ -> path - _ -> takeWhile (== '.') path ++ ':':dropWhile (== '.') path -normalizeDots path = path +isExternalLink :: Text -> Bool +isExternalLink s = "://" `T.isPrefixOf` sSuff + where + sSuff = T.dropWhile (\c -> isAlphaNum c || (c `elem` ['-', '.', '+'])) s + +isAbsolutePath :: Text -> Bool +isAbsolutePath (T.uncons -> Just ('.', _)) = False +isAbsolutePath s = T.any (== ':') s + +normalizeDots :: Text -> Text +normalizeDots path + | not (T.null pref) = case T.uncons suff of + Just (':', _) -> path + _ -> pref <> ":" <> suff + | otherwise = path + where + (pref, suff) = T.span (== '.') path -normalizeInternalPath :: String -> String +normalizeInternalPath :: Text -> Text normalizeInternalPath path = if isAbsolutePath path then ensureAbsolute normalizedPath else normalizedPath where - normalizedPath = intercalate "/" $ dropWhile (== ".") $ splitOn ":" $ normalizeDots path - ensureAbsolute s@('/':_) = s - ensureAbsolute s = '/':s + normalizedPath = T.intercalate "/" $ dropWhile (== ".") $ T.splitOn ":" $ normalizeDots path + ensureAbsolute s@(T.uncons -> Just ('/', _)) = s + ensureAbsolute s = "/" <> s -normalizePath :: String -> String +normalizePath :: Text -> Text normalizePath path = if isExternalLink path then path else normalizeInternalPath path -urlToText :: String -> String +urlToText :: Text -> Text urlToText url = if isExternalLink url then url - else reverse $ takeWhile (/= ':') $ reverse url + else T.takeWhileEnd (/= ':') url -- Parse link or image parseLink :: PandocMonad m - => (String -> Maybe B.Inlines -> B.Inlines) - -> String - -> String + => (Text -> Maybe B.Inlines -> B.Inlines) + -> Text + -> Text -> DWParser m B.Inlines parseLink f l r = f - <$ string l - <*> many1Till anyChar (lookAhead (void (char '|') <|> try (void $ string r))) - <*> optionMaybe (B.trimInlines . mconcat <$> (char '|' *> manyTill inline (try $ lookAhead $ string r))) - <* string r + <$ textStr l + <*> many1TillChar anyChar (lookAhead (void (char '|') <|> try (void $ textStr r))) + <*> optionMaybe (B.trimInlines . mconcat <$> (char '|' *> manyTill inline (try $ lookAhead $ textStr r))) + <* textStr r -- | Split Interwiki link into left and right part -- | Return Nothing if it is not Interwiki link -splitInterwiki :: String -> Maybe (String, String) +splitInterwiki :: Text -> Maybe (Text, Text) splitInterwiki path = - case span (\c -> isAlphaNum c || c == '.') path of - (l, '>':r) -> Just (l, r) + case T.span (\c -> isAlphaNum c || c == '.') path of + (l, T.uncons -> Just ('>', r)) -> Just (l, r) _ -> Nothing -interwikiToUrl :: String -> String -> String -interwikiToUrl "callto" page = "callto://" ++ page -interwikiToUrl "doku" page = "https://www.dokuwiki.org/" ++ page -interwikiToUrl "phpfn" page = "https://secure.php.net/" ++ page -interwikiToUrl "tel" page = "tel:" ++ page -interwikiToUrl "wp" page = "https://en.wikipedia.org/wiki/" ++ page -interwikiToUrl "wpde" page = "https://de.wikipedia.org/wiki/" ++ page -interwikiToUrl "wpes" page = "https://es.wikipedia.org/wiki/" ++ page -interwikiToUrl "wpfr" page = "https://fr.wikipedia.org/wiki/" ++ page -interwikiToUrl "wpjp" page = "https://jp.wikipedia.org/wiki/" ++ page -interwikiToUrl "wppl" page = "https://pl.wikipedia.org/wiki/" ++ page -interwikiToUrl _ page = "https://www.google.com/search?q=" ++ page ++ "&btnI=lucky" +interwikiToUrl :: Text -> Text -> Text +interwikiToUrl "callto" page = "callto://" <> page +interwikiToUrl "doku" page = "https://www.dokuwiki.org/" <> page +interwikiToUrl "phpfn" page = "https://secure.php.net/" <> page +interwikiToUrl "tel" page = "tel:" <> page +interwikiToUrl "wp" page = "https://en.wikipedia.org/wiki/" <> page +interwikiToUrl "wpde" page = "https://de.wikipedia.org/wiki/" <> page +interwikiToUrl "wpes" page = "https://es.wikipedia.org/wiki/" <> page +interwikiToUrl "wpfr" page = "https://fr.wikipedia.org/wiki/" <> page +interwikiToUrl "wpjp" page = "https://jp.wikipedia.org/wiki/" <> page +interwikiToUrl "wppl" page = "https://pl.wikipedia.org/wiki/" <> page +interwikiToUrl _ page = "https://www.google.com/search?q=" <> page <> "&btnI=lucky" linkText :: PandocMonad m => DWParser m B.Inlines linkText = parseLink fromRaw "[[" "]]" @@ -338,23 +339,23 @@ linkText = parseLink fromRaw "[[" "]]" Just (_, r) -> r -- Matches strings like "100x100" (width x height) and "50" (width) -isWidthHeightParameter :: String -> Bool +isWidthHeightParameter :: Text -> Bool isWidthHeightParameter s = - case s of - (x:xs) -> - isDigit x && case dropWhile isDigit xs of - ('x':ys@(_:_)) -> all isDigit ys - "" -> True + case T.uncons s of + Just (x, xs) -> + isDigit x && case T.uncons $ T.dropWhile isDigit xs of + Just ('x', ys) | not (T.null ys) -> T.all isDigit ys + Nothing -> True _ -> False _ -> False -parseWidthHeight :: String -> (Maybe String, Maybe String) +parseWidthHeight :: Text -> (Maybe Text, Maybe Text) parseWidthHeight s = (width, height) where - width = Just $ takeWhile isDigit s + width = Just $ T.takeWhile isDigit s height = - case dropWhile isDigit s of - ('x':xs) -> Just xs + case T.uncons $ T.dropWhile isDigit s of + Just ('x', xs) -> Just xs _ -> Nothing image :: PandocMonad m => DWParser m B.Inlines @@ -365,17 +366,17 @@ image = try $ parseLink fromRaw "{{" "}}" then B.link normalizedPath "" (fromMaybe defaultDescription description) else B.imageWith ("", classes, attributes) normalizedPath "" (fromMaybe defaultDescription description) where - (path', parameters) = span (/= '?') $ trim path + (path', parameters) = T.span (/= '?') $ trim path normalizedPath = normalizePath path' - leftPadding = " " `isPrefixOf` path - rightPadding = " " `isSuffixOf` path + leftPadding = " " `T.isPrefixOf` path + rightPadding = " " `T.isSuffixOf` path classes = case (leftPadding, rightPadding) of (False, False) -> [] (False, True) -> ["align-left"] (True, False) -> ["align-right"] (True, True) -> ["align-center"] - parameterList = splitOn "&" $ drop 1 parameters + parameterList = T.splitOn "&" $ T.drop 1 parameters linkOnly = "linkonly" `elem` parameterList (width, height) = maybe (Nothing, Nothing) parseWidthHeight (F.find isWidthHeightParameter parameterList) attributes = catMaybes [fmap ("width",) width, fmap ("height",) height] @@ -389,7 +390,7 @@ block = do <|> blockElements <|> para skipMany blankline - trace (take 60 $ show $ B.toList res) + trace (T.take 60 $ tshow $ B.toList res) return res blockElements :: PandocMonad m => DWParser m B.Blocks @@ -417,30 +418,30 @@ header = try $ do attr <- registerHeader nullAttr contents return $ B.headerWith attr (7 - lev) contents -list :: PandocMonad m => String -> DWParser m B.Blocks +list :: PandocMonad m => Text -> DWParser m B.Blocks list prefix = bulletList prefix <|> orderedList prefix -bulletList :: PandocMonad m => String -> DWParser m B.Blocks +bulletList :: PandocMonad m => Text -> DWParser m B.Blocks bulletList prefix = try $ B.bulletList <$> parseList prefix '*' -orderedList :: PandocMonad m => String -> DWParser m B.Blocks +orderedList :: PandocMonad m => Text -> DWParser m B.Blocks orderedList prefix = try $ B.orderedList <$> parseList prefix '-' parseList :: PandocMonad m - => String + => Text -> Char -> DWParser m [B.Blocks] parseList prefix marker = many1 ((<>) <$> item <*> fmap mconcat (many continuation)) where - continuation = try $ list (" " ++ prefix) - item = try $ string prefix *> char marker *> char ' ' *> itemContents + continuation = try $ list (" " <> prefix) + item = try $ textStr prefix *> char marker *> char ' ' *> itemContents itemContents = B.plain . mconcat <$> many1Till inline' eol indentedCode :: PandocMonad m => DWParser m B.Blocks -indentedCode = try $ B.codeBlock . unlines <$> many1 indentedLine +indentedCode = try $ B.codeBlock . T.unlines <$> many1 indentedLine where - indentedLine = try $ string " " *> manyTill anyChar eol + indentedLine = try $ string " " *> manyTillChar anyChar eol quote :: PandocMonad m => DWParser m B.Blocks quote = try $ nestedQuote 0 @@ -456,13 +457,13 @@ blockHtml :: PandocMonad m => DWParser m B.Blocks blockHtml = try $ B.rawBlock "html" <$ string "<HTML>" <* optional (manyTill spaceChar eol) - <*> manyTill anyChar (try $ string "</HTML>") + <*> manyTillChar anyChar (try $ string "</HTML>") blockPhp :: PandocMonad m => DWParser m B.Blocks blockPhp = try $ B.codeBlockWith ("", ["php"], []) <$ string "<PHP>" <* optional (manyTill spaceChar eol) - <*> manyTill anyChar (try $ string "</PHP>") + <*> manyTillChar anyChar (try $ string "</PHP>") table :: PandocMonad m => DWParser m B.Blocks table = do diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index 8e9746090..93ddeb9ee 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -1,6 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.EPUB Copyright : Copyright (C) 2014-2019 Matthew Pickering @@ -24,7 +25,8 @@ import Control.DeepSeq (NFData, deepseq) import Control.Monad (guard, liftM, liftM2, mplus) import Control.Monad.Except (throwError) import qualified Data.ByteString.Lazy as BL (ByteString) -import Data.List (isInfixOf, isPrefixOf) +import Data.List (isInfixOf) +import qualified Data.Text as T import qualified Data.Map as M (Map, elems, fromList, lookup) import Data.Maybe (fromMaybe, mapMaybe) import qualified Data.Text.Lazy as TL @@ -67,9 +69,9 @@ archiveToEPUB os archive = do -- No need to collapse here as the image path is from the manifest file let coverDoc = fromMaybe mempty (imageToPandoc <$> cover) spine <- parseSpine items content - let escapedSpine = map (escapeURI . takeFileName . fst) spine + let escapedSpine = map (escapeURI . T.pack . takeFileName . fst) spine Pandoc _ bs <- - foldM' (\a b -> ((a <>) . walk (prependHash escapedSpine)) + foldM' (\a b -> ((a <>) . walk (prependHash $ escapedSpine)) `liftM` parseSpineElem root b) mempty spine let ast = coverDoc <> Pandoc meta bs fetchImages (M.elems items) root archive ast @@ -79,7 +81,7 @@ archiveToEPUB os archive = do parseSpineElem :: PandocMonad m => FilePath -> (FilePath, MimeType) -> m Pandoc parseSpineElem (normalise -> r) (normalise -> path, mime) = do doc <- mimeToReader mime r path - let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty + let docSpan = B.doc $ B.para $ B.spanWith (T.pack $ takeFileName path, [], []) mempty return $ docSpan <> doc mimeToReader :: PandocMonad m => MimeType -> FilePath -> FilePath -> m Pandoc mimeToReader "application/xhtml+xml" (unEscapeString -> root) @@ -108,18 +110,19 @@ fetchImages mimes root arc (query iq -> links) = <$> findEntryByPath abslink arc iq :: Inline -> [FilePath] -iq (Image _ _ (url, _)) = [url] +iq (Image _ _ (url, _)) = [T.unpack url] iq _ = [] -- Remove relative paths renameImages :: FilePath -> Inline -> Inline renameImages root img@(Image attr a (url, b)) - | "data:" `isPrefixOf` url = img - | otherwise = Image attr a (collapseFilePath (root </> url), b) + | "data:" `T.isPrefixOf` url = img + | otherwise = Image attr a ( T.pack $ collapseFilePath (root </> T.unpack url) + , b) renameImages _ x = x imageToPandoc :: FilePath -> Pandoc -imageToPandoc s = B.doc . B.para $ B.image s "" mempty +imageToPandoc s = B.doc . B.para $ B.image (T.pack s) "" mempty imageMimes :: [MimeType] imageMimes = ["image/gif", "image/jpeg", "image/png"] @@ -144,7 +147,7 @@ parseManifest content coverId = do uid <- findAttrE (emptyName "id") e href <- findAttrE (emptyName "href") e mime <- findAttrE (emptyName "media-type") e - return (uid, (href, mime)) + return (uid, (href, T.pack mime)) parseSpine :: PandocMonad m => Items -> Element -> m [(FilePath, MimeType)] parseSpine is e = do @@ -172,11 +175,11 @@ parseMeta content = do -- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem parseMetaItem :: Element -> Meta -> Meta parseMetaItem e@(stripNamespace . elName -> field) meta = - addMetaField (renameMeta field) (B.str $ strContent e) meta + addMetaField (renameMeta field) (B.str $ T.pack $ strContent e) meta -renameMeta :: String -> String +renameMeta :: String -> T.Text renameMeta "creator" = "author" -renameMeta s = s +renameMeta s = T.pack s getManifest :: PandocMonad m => Archive -> m (String, Element) getManifest archive = do @@ -197,26 +200,26 @@ getManifest archive = do fixInternalReferences :: FilePath -> Pandoc -> Pandoc fixInternalReferences pathToFile = walk (renameImages root) - . walk (fixBlockIRs filename) + . walk (fixBlockIRs filename) . walk (fixInlineIRs filename) where - (root, escapeURI -> filename) = splitFileName pathToFile + (root, T.unpack . escapeURI . T.pack -> filename) = splitFileName pathToFile fixInlineIRs :: String -> Inline -> Inline fixInlineIRs s (Span as v) = Span (fixAttrs s as) v fixInlineIRs s (Code as code) = Code (fixAttrs s as) code -fixInlineIRs s (Link as is ('#':url, tit)) = +fixInlineIRs s (Link as is (T.uncons -> Just ('#', url), tit)) = Link (fixAttrs s as) is (addHash s url, tit) fixInlineIRs s (Link as is t) = Link (fixAttrs s as) is t fixInlineIRs _ v = v -prependHash :: [String] -> Inline -> Inline +prependHash :: [T.Text] -> Inline -> Inline prependHash ps l@(Link attr is (url, tit)) - | or [s `isPrefixOf` url | s <- ps] = - Link attr is ('#':url, tit) + | or [s `T.isPrefixOf` url | s <- ps] = + Link attr is ("#" <> url, tit) | otherwise = l prependHash _ i = i @@ -230,17 +233,17 @@ fixBlockIRs s (CodeBlock as code) = fixBlockIRs _ b = b fixAttrs :: FilePath -> B.Attr -> B.Attr -fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . null) cs, removeEPUBAttrs kvs) +fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . T.null) cs, removeEPUBAttrs kvs) -addHash :: String -> String -> String +addHash :: String -> T.Text -> T.Text addHash _ "" = "" -addHash s ident = takeFileName s ++ "#" ++ ident +addHash s ident = T.pack (takeFileName s) <> "#" <> ident -removeEPUBAttrs :: [(String, String)] -> [(String, String)] +removeEPUBAttrs :: [(T.Text, T.Text)] -> [(T.Text, T.Text)] removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs -isEPUBAttr :: (String, String) -> Bool -isEPUBAttr (k, _) = "epub:" `isPrefixOf` k +isEPUBAttr :: (T.Text, a) -> Bool +isEPUBAttr (k, _) = "epub:" `T.isPrefixOf` k -- Library @@ -291,4 +294,4 @@ findElementE :: PandocMonad m => QName -> Element -> m Element findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x mkE :: PandocMonad m => String -> Maybe a -> m a -mkE s = maybe (throwError . PandocParseError $ s) return +mkE s = maybe (throwError . PandocParseError $ T.pack $ s) return diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs index 0b25b9fed..6eed3c104 100644 --- a/src/Text/Pandoc/Readers/FB2.hs +++ b/src/Text/Pandoc/Readers/FB2.hs @@ -1,5 +1,6 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.FB2 Copyright : Copyright (C) 2018-2019 Alexander Krotov @@ -27,12 +28,11 @@ import Control.Monad.Except (throwError) import Control.Monad.State.Strict import Data.ByteString.Lazy.Char8 ( pack ) import Data.ByteString.Base64.Lazy -import Data.Char (isSpace, toUpper) import Data.Functor -import Data.List (dropWhileEnd, intersperse) -import Data.List.Split (splitOn) +import Data.List (intersperse) import qualified Data.Map as M import Data.Text (Text) +import qualified Data.Text as T import Data.Default import Data.Maybe import Text.HTML.TagSoup.Entity (lookupEntity) @@ -48,8 +48,8 @@ type FB2 m = StateT FB2State m data FB2State = FB2State{ fb2SectionLevel :: Int , fb2Meta :: Meta - , fb2Authors :: [String] - , fb2Notes :: M.Map String Blocks + , fb2Authors :: [Text] + , fb2Notes :: M.Map Text Blocks } deriving Show instance Default FB2State where @@ -76,19 +76,20 @@ readFB2 _ inp = -- * Utility functions -trim :: String -> String -trim = dropWhileEnd isSpace . dropWhile isSpace +trim :: Text -> Text +trim = T.strip -removeHash :: String -> String -removeHash ('#':xs) = xs -removeHash xs = xs +removeHash :: Text -> Text +removeHash t = case T.uncons t of + Just ('#', xs) -> xs + _ -> t -convertEntity :: String -> String -convertEntity e = fromMaybe (map toUpper e) (lookupEntity e) +convertEntity :: String -> Text +convertEntity e = maybe (T.toUpper $ T.pack e) T.pack $ lookupEntity e parseInline :: PandocMonad m => Content -> FB2 m Inlines parseInline (Elem e) = - case qName $ elName e of + case T.pack $ qName $ elName e of "strong" -> strong <$> parseStyleType e "emphasis" -> emph <$> parseStyleType e "style" -> parseNamedStyle e @@ -96,12 +97,12 @@ parseInline (Elem e) = "strikethrough" -> strikeout <$> parseStyleType e "sub" -> subscript <$> parseStyleType e "sup" -> superscript <$> parseStyleType e - "code" -> pure $ code $ strContent e + "code" -> pure $ code $ T.pack $ strContent e "image" -> parseInlineImageElement e name -> do report $ IgnoredElement name pure mempty -parseInline (Text x) = pure $ text $ cdData x +parseInline (Text x) = pure $ text $ T.pack $ cdData x parseInline (CRef r) = pure $ str $ convertEntity r parseSubtitle :: PandocMonad m => Element -> FB2 m Blocks @@ -111,7 +112,7 @@ parseSubtitle e = headerWith ("", ["unnumbered"], []) <$> gets fb2SectionLevel < parseRootElement :: PandocMonad m => Element -> FB2 m Blocks parseRootElement e = - case qName $ elName e of + case T.pack $ qName $ elName e of "FictionBook" -> do -- Parse notes before parsing the rest of the content. case filterChild isNotesBody e of @@ -144,7 +145,7 @@ parseNote e = Just sectionId -> do content <- mconcat <$> mapM parseSectionChild (dropTitle $ elChildren e) oldNotes <- gets fb2Notes - modify $ \s -> s { fb2Notes = M.insert ("#" ++ sectionId) content oldNotes } + modify $ \s -> s { fb2Notes = M.insert ("#" <> T.pack sectionId) content oldNotes } pure () where isTitle x = qName (elName x) == "title" @@ -156,7 +157,7 @@ parseNote e = -- | Parse a child of @\<FictionBook>@ element. parseFictionBookChild :: PandocMonad m => Element -> FB2 m Blocks parseFictionBookChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "stylesheet" -> pure mempty -- stylesheet is ignored "description" -> mempty <$ mapM_ parseDescriptionChild (elChildren e) "body" -> if isNotesBody e @@ -168,7 +169,7 @@ parseFictionBookChild e = -- | Parse a child of @\<description>@ element. parseDescriptionChild :: PandocMonad m => Element -> FB2 m () parseDescriptionChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "title-info" -> mapM_ parseTitleInfoChild (elChildren e) "src-title-info" -> pure () -- ignore "document-info" -> pure () @@ -176,13 +177,13 @@ parseDescriptionChild e = "custom-info" -> pure () "output" -> pure () name -> do - report $ IgnoredElement $ name ++ " in description" + report $ IgnoredElement $ name <> " in description" pure mempty -- | Parse a child of @\<body>@ element. parseBodyChild :: PandocMonad m => Element -> FB2 m Blocks parseBodyChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "image" -> parseImageElement e "title" -> header <$> gets fb2SectionLevel <*> parseTitleType (elContent e) "epigraph" -> parseEpigraph e @@ -196,25 +197,25 @@ parseBinaryElement e = (Nothing, _) -> report $ IgnoredElement "binary without id attribute" (Just _, Nothing) -> report $ IgnoredElement "binary without content-type attribute" - (Just filename, contentType) -> insertMedia filename contentType (decodeLenient (pack (strContent e))) + (Just filename, contentType) -> insertMedia filename (T.pack <$> contentType) (decodeLenient (pack (strContent e))) -- * Type parsers -- | Parse @authorType@ -parseAuthor :: PandocMonad m => Element -> FB2 m String -parseAuthor e = unwords . catMaybes <$> mapM parseAuthorChild (elChildren e) +parseAuthor :: PandocMonad m => Element -> FB2 m Text +parseAuthor e = T.unwords . catMaybes <$> mapM parseAuthorChild (elChildren e) -parseAuthorChild :: PandocMonad m => Element -> FB2 m (Maybe String) +parseAuthorChild :: PandocMonad m => Element -> FB2 m (Maybe Text) parseAuthorChild e = - case qName $ elName e of - "first-name" -> pure $ Just $ strContent e - "middle-name" -> pure $ Just $ strContent e - "last-name" -> pure $ Just $ strContent e - "nickname" -> pure $ Just $ strContent e - "home-page" -> pure $ Just $ strContent e - "email" -> pure $ Just $ strContent e + case T.pack $ qName $ elName e of + "first-name" -> pure $ Just $ T.pack $ strContent e + "middle-name" -> pure $ Just $ T.pack $ strContent e + "last-name" -> pure $ Just $ T.pack $ strContent e + "nickname" -> pure $ Just $ T.pack $ strContent e + "home-page" -> pure $ Just $ T.pack $ strContent e + "email" -> pure $ Just $ T.pack $ strContent e name -> do - report $ IgnoredElement $ name ++ " in author" + report $ IgnoredElement $ name <> " in author" pure Nothing -- | Parse @titleType@ @@ -236,13 +237,13 @@ parseTitleContent _ = pure Nothing parseImageElement :: PandocMonad m => Element -> FB2 m Blocks parseImageElement e = case href of - Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash src) title alt + Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash $ T.pack src) title alt Nothing -> do report $ IgnoredElement " image without href" pure mempty - where alt = maybe mempty str $ findAttr (unqual "alt") e - title = fromMaybe "" $ findAttr (unqual "title") e - imgId = fromMaybe "" $ findAttr (unqual "id") e + where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e + title = maybe "" T.pack $ findAttr (unqual "title") e + imgId = maybe "" T.pack $ findAttr (unqual "id") e href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e -- | Parse @pType@ @@ -256,7 +257,7 @@ parseCite e = blockQuote . mconcat <$> mapM parseCiteChild (elChildren e) -- | Parse @citeType@ child parseCiteChild :: PandocMonad m => Element -> FB2 m Blocks parseCiteChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "p" -> para <$> parsePType e "poem" -> parsePoem e "empty-line" -> pure horizontalRule @@ -271,13 +272,13 @@ parsePoem e = mconcat <$> mapM parsePoemChild (elChildren e) parsePoemChild :: PandocMonad m => Element -> FB2 m Blocks parsePoemChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "title" -> parseTitle e "subtitle" -> parseSubtitle e "epigraph" -> parseEpigraph e "stanza" -> parseStanza e "text-author" -> para <$> parsePType e - "date" -> pure $ para $ text $ strContent e + "date" -> pure $ para $ text $ T.pack $ strContent e name -> report (UnexpectedXmlElement name "poem") $> mempty parseStanza :: PandocMonad m => Element -> FB2 m Blocks @@ -290,7 +291,7 @@ joinLineBlocks [] = [] parseStanzaChild :: PandocMonad m => Element -> FB2 m Blocks parseStanzaChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "title" -> parseTitle e "subtitle" -> parseSubtitle e "v" -> lineBlock . (:[]) <$> parsePType e @@ -300,11 +301,11 @@ parseStanzaChild e = parseEpigraph :: PandocMonad m => Element -> FB2 m Blocks parseEpigraph e = divWith (divId, ["epigraph"], []) . mconcat <$> mapM parseEpigraphChild (elChildren e) - where divId = fromMaybe "" $ findAttr (unqual "id") e + where divId = maybe "" T.pack $ findAttr (unqual "id") e parseEpigraphChild :: PandocMonad m => Element -> FB2 m Blocks parseEpigraphChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "p" -> para <$> parsePType e "poem" -> parsePoem e "cite" -> parseCite e @@ -318,7 +319,7 @@ parseAnnotation e = mconcat <$> mapM parseAnnotationChild (elChildren e) parseAnnotationChild :: PandocMonad m => Element -> FB2 m Blocks parseAnnotationChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "p" -> para <$> parsePType e "poem" -> parsePoem e "cite" -> parseCite e @@ -332,14 +333,14 @@ parseSection :: PandocMonad m => Element -> FB2 m Blocks parseSection e = do n <- gets fb2SectionLevel modify $ \st -> st{ fb2SectionLevel = n + 1 } - let sectionId = fromMaybe "" $ findAttr (unqual "id") e + let sectionId = maybe "" T.pack $ findAttr (unqual "id") e bs <- divWith (sectionId, ["section"], []) . mconcat <$> mapM parseSectionChild (elChildren e) modify $ \st -> st{ fb2SectionLevel = n } pure bs parseSectionChild :: PandocMonad m => Element -> FB2 m Blocks parseSectionChild e = - case qName $ elName e of + case T.pack $ qName $ elName e of "title" -> parseBodyChild e "epigraph" -> parseEpigraph e "image" -> parseImageElement e @@ -361,16 +362,16 @@ parseStyleType e = mconcat <$> mapM parseInline (elContent e) parseNamedStyle :: PandocMonad m => Element -> FB2 m Inlines parseNamedStyle e = do content <- mconcat <$> mapM parseNamedStyleChild (elContent e) - let lang = maybeToList $ ("lang",) <$> findAttr (QName "lang" Nothing (Just "xml")) e + let lang = maybeToList $ ("lang",) . T.pack <$> findAttr (QName "lang" Nothing (Just "xml")) e case findAttr (unqual "name") e of - Just name -> pure $ spanWith ("", [name], lang) content + Just name -> pure $ spanWith ("", [T.pack name], lang) content Nothing -> do report $ IgnoredElement "link without required name" pure mempty parseNamedStyleChild :: PandocMonad m => Content -> FB2 m Inlines parseNamedStyleChild (Elem e) = - case qName (elName e) of + case T.pack $ qName (elName e) of "strong" -> strong <$> parseStyleType e "emphasis" -> emph <$> parseStyleType e "style" -> parseNamedStyle e @@ -378,10 +379,10 @@ parseNamedStyleChild (Elem e) = "strikethrough" -> strikeout <$> parseStyleType e "sub" -> subscript <$> parseStyleType e "sup" -> superscript <$> parseStyleType e - "code" -> pure $ code $ strContent e + "code" -> pure $ code $ T.pack $ strContent e "image" -> parseInlineImageElement e name -> do - report $ IgnoredElement $ name ++ " in style" + report $ IgnoredElement $ name <> " in style" pure mempty parseNamedStyleChild x = parseInline x @@ -390,7 +391,7 @@ parseLinkType :: PandocMonad m => Element -> FB2 m Inlines parseLinkType e = do content <- mconcat <$> mapM parseStyleLinkType (elContent e) notes <- gets fb2Notes - case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of + case T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of Just href -> case findAttr (QName "type" Nothing Nothing) e of Just "note" -> case M.lookup href notes of Nothing -> pure $ link href "" content @@ -417,19 +418,21 @@ parseTable _ = pure mempty -- TODO: tables are not supported yet -- | Parse @title-infoType@ parseTitleInfoChild :: PandocMonad m => Element -> FB2 m () parseTitleInfoChild e = - case qName (elName e) of + case T.pack $ qName (elName e) of "genre" -> pure () "author" -> parseAuthor e >>= \author -> modify (\st -> st {fb2Authors = author:fb2Authors st}) - "book-title" -> modify (setMeta "title" (text $ strContent e)) + "book-title" -> modify (setMeta "title" (text $ T.pack $ strContent e)) "annotation" -> parseAnnotation e >>= modify . setMeta "abstract" - "keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ splitOn "," $ strContent e)) - "date" -> modify (setMeta "date" (text $ strContent e)) + "keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ T.splitOn "," + $ T.pack + $ strContent e)) + "date" -> modify (setMeta "date" (text $ T.pack $ strContent e)) "coverpage" -> parseCoverPage e "lang" -> pure () "src-lang" -> pure () "translator" -> pure () "sequence" -> pure () - name -> report $ IgnoredElement $ name ++ " in title-info" + name -> report $ IgnoredElement $ name <> " in title-info" parseCoverPage :: PandocMonad m => Element -> FB2 m () parseCoverPage e = @@ -437,7 +440,7 @@ parseCoverPage e = Just img -> case href of Just src -> modify (setMeta "cover-image" (MetaString $ removeHash src)) Nothing -> pure () - where href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img + where href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img Nothing -> pure () -- | Parse @inlineImageType@ element @@ -450,5 +453,5 @@ parseInlineImageElement e = Nothing -> do report $ IgnoredElement "inline image without href" pure mempty - where alt = maybe mempty str $ findAttr (unqual "alt") e - href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e + where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e + href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index e03ac6a97..1c2892d6a 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -1,5 +1,5 @@ -{-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} @@ -35,8 +35,7 @@ import Control.Monad.Reader (ReaderT, ask, asks, lift, local, runReaderT) import Data.Char (isAlphaNum, isLetter) import Data.Default (Default (..), def) import Data.Foldable (for_) -import Data.List (isPrefixOf) -import Data.List.Split (wordsBy, splitWhen) +import Data.List.Split (splitWhen) import qualified Data.Map as M import Data.Maybe (fromMaybe, isJust, isNothing) import Data.Monoid (First (..)) @@ -62,8 +61,8 @@ import Text.Pandoc.Options ( extensionEnabled) import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Shared (addMetaField, blocksToInlines', crFilter, escapeURI, - extractSpaces, htmlSpanLikeElements, - onlySimpleTableCells, safeRead, underlineSpan) + extractSpaces, htmlSpanLikeElements, elemText, splitTextBy, + onlySimpleTableCells, safeRead, underlineSpan, tshow) import Text.Pandoc.Walk import Text.Parsec.Error import Text.TeXMath (readMathML, writeTeX) @@ -93,14 +92,14 @@ readHtml opts inp = do "source" tags case result of Right doc -> return doc - Left err -> throwError $ PandocParseError $ getError err + Left err -> throwError $ PandocParseError $ T.pack $ getError err replaceNotes :: PandocMonad m => [Block] -> TagParser m [Block] replaceNotes bs = do st <- getState return $ walk (replaceNotes' (noteTable st)) bs -replaceNotes' :: [(String, Blocks)] -> Inline -> Inline +replaceNotes' :: [(Text, Blocks)] -> Inline -> Inline replaceNotes' noteTbl (RawInline (Format "noteref") ref) = maybe (Str "") (Note . B.toList) $ lookup ref noteTbl replaceNotes' _ x = x @@ -108,9 +107,9 @@ replaceNotes' _ x = x data HTMLState = HTMLState { parserState :: ParserState, - noteTable :: [(String, Blocks)], + noteTable :: [(Text, Blocks)], baseHref :: Maybe URI, - identifiers :: Set.Set String, + identifiers :: Set.Set Text, logMessages :: [LogMessage], macros :: M.Map Text Macro } @@ -134,7 +133,7 @@ pHtml :: PandocMonad m => TagParser m Blocks pHtml = try $ do (TagOpen "html" attr) <- lookAhead pAny for_ (lookup "lang" attr) $ - updateState . B.setMeta "lang" . B.text . T.unpack + updateState . B.setMeta "lang" . B.text pInTags "html" block pBody :: PandocMonad m => TagParser m Blocks @@ -146,11 +145,11 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAny) setTitle t = mempty <$ updateState (B.setMeta "title" t) pMetaTag = do mt <- pSatisfy (matchTagOpen "meta" []) - let name = T.unpack $ fromAttrib "name" mt - if null name + let name = fromAttrib "name" mt + if T.null name then return mempty else do - let content = T.unpack $ fromAttrib "content" mt + let content = fromAttrib "content" mt updateState $ \s -> let ps = parserState s in s{ parserState = ps{ @@ -187,13 +186,13 @@ block = do , pFigure , pRawHtmlBlock ] - trace (take 60 $ show $ B.toList res) + trace (T.take 60 $ tshow $ B.toList res) return res -namespaces :: PandocMonad m => [(String, TagParser m Inlines)] +namespaces :: PandocMonad m => [(Text, TagParser m Inlines)] namespaces = [(mathMLNamespace, pMath True)] -mathMLNamespace :: String +mathMLNamespace :: Text mathMLNamespace = "http://www.w3.org/1998/Math/MathML" eSwitch :: (PandocMonad m, Monoid a) @@ -233,7 +232,7 @@ eFootnote = try $ do content <- pInTags tag block addNote ident content -addNote :: PandocMonad m => String -> Blocks -> TagParser m () +addNote :: PandocMonad m => Text -> Blocks -> TagParser m () addNote uid cont = updateState (\s -> s {noteTable = (uid, cont) : noteTable s}) eNoteref :: PandocMonad m => TagParser m Inlines @@ -245,8 +244,8 @@ eNoteref = try $ do -> (lookup "type" as <|> lookup "epub:type" as) == Just "noteref" _ -> False) - ident <- case T.unpack <$> lookup "href" attr of - Just ('#':rest) -> return rest + ident <- case lookup "href" attr >>= T.uncons of + Just ('#', rest) -> return rest _ -> mzero _ <- manyTill pAny (pSatisfy (\case TagClose t -> t == tag @@ -287,7 +286,7 @@ pListItem nonItem = do maybe id addId (lookup "id" attr) <$> pInTags "li" block <* skipMany nonItem -parseListStyleType :: String -> ListNumberStyle +parseListStyleType :: Text -> ListNumberStyle parseListStyleType "lower-roman" = LowerRoman parseListStyleType "upper-roman" = UpperRoman parseListStyleType "lower-alpha" = LowerAlpha @@ -295,7 +294,7 @@ parseListStyleType "upper-alpha" = UpperAlpha parseListStyleType "decimal" = Decimal parseListStyleType _ = DefaultStyle -parseTypeAttr :: String -> ListNumberStyle +parseTypeAttr :: Text -> ListNumberStyle parseTypeAttr "i" = LowerRoman parseTypeAttr "I" = UpperRoman parseTypeAttr "a" = LowerAlpha @@ -404,20 +403,19 @@ pDiv = try $ do pRawHtmlBlock :: PandocMonad m => TagParser m Blocks pRawHtmlBlock = do - raw <- T.unpack <$> - (pHtmlBlock "script" <|> pHtmlBlock "style" <|> pHtmlBlock "textarea" - <|> pRawTag) + raw <- (pHtmlBlock "script" <|> pHtmlBlock "style" <|> pHtmlBlock "textarea" + <|> pRawTag) exts <- getOption readerExtensions - if extensionEnabled Ext_raw_html exts && not (null raw) + if extensionEnabled Ext_raw_html exts && not (T.null raw) then return $ B.rawBlock "html" raw else ignore raw -ignore :: (Monoid a, PandocMonad m) => String -> TagParser m a +ignore :: (Monoid a, PandocMonad m) => Text -> TagParser m a ignore raw = do pos <- getPosition -- raw can be null for tags like <!DOCTYPE>; see paRawTag -- in this case we don't want a warning: - unless (null raw) $ + unless (T.null raw) $ logMessage $ SkippedContent raw pos return mempty @@ -438,7 +436,7 @@ eSection = try $ do headerLevel :: Text -> TagParser m Int headerLevel tagtype = - case safeRead (T.unpack (T.drop 1 tagtype)) of + case safeRead (T.drop 1 tagtype) of Just level -> -- try (do -- guardEnabled Ext_epub_html_exts @@ -468,7 +466,7 @@ pHeader = try $ do level <- headerLevel tagtype contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof) let ident = fromMaybe "" $ lookup "id" attr - let classes = maybe [] words $ lookup "class" attr + let classes = maybe [] T.words $ lookup "class" attr let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"] attr'' <- registerHeader (ident, classes, keyvals) contents return $ if bodyTitle @@ -529,14 +527,14 @@ pCol = try $ do optional $ pSatisfy (matchTagClose "col") skipMany pBlank let width = case lookup "width" attribs of - Nothing -> case lookup "style" attribs of - Just ('w':'i':'d':'t':'h':':':xs) | '%' `elem` xs -> - fromMaybe 0.0 $ safeRead (filter - (`notElem` (" \t\r\n%'\";" :: [Char])) xs) - _ -> 0.0 - Just x | not (null x) && last x == '%' -> - fromMaybe 0.0 $ safeRead (init x) - _ -> 0.0 + Nothing -> case lookup "style" attribs of + Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs -> + fromMaybe 0.0 $ safeRead (T.filter + (`notElem` (" \t\r\n%'\";" :: [Char])) xs) + _ -> 0.0 + Just (T.unsnoc -> Just (xs, '%')) -> + fromMaybe 0.0 $ safeRead xs + _ -> 0.0 if width > 0.0 then return $ width / 100.0 else return 0.0 @@ -562,7 +560,7 @@ pCell celltype = try $ do let extractAlign' [] = "" extractAlign' ("text-align":x:_) = x extractAlign' (_:xs) = extractAlign' xs - let extractAlign = extractAlign' . wordsBy (`elem` [' ','\t',';',':']) + let extractAlign = extractAlign' . splitTextBy (`elemText` " \t;:") let align = case maybeFromAttrib "align" tag `mplus` (extractAlign <$> maybeFromAttrib "style" tag) of Just "left" -> AlignLeft @@ -610,7 +608,7 @@ pFigure = try $ do let caption = fromMaybe mempty mbcap case B.toList <$> mbimg of Just [Image attr _ (url, tit)] -> - return $ B.para $ B.imageWith attr url ("fig:" ++ tit) caption + return $ B.para $ B.imageWith attr url ("fig:" <> tit) caption _ -> mzero pCodeBlock :: PandocMonad m => TagParser m Blocks @@ -618,21 +616,21 @@ pCodeBlock = try $ do TagOpen _ attr' <- pSatisfy (matchTagOpen "pre" []) let attr = toStringAttr attr' contents <- manyTill pAny (pCloses "pre" <|> eof) - let rawText = concatMap tagToString contents + let rawText = T.concat $ map tagToText contents -- drop leading newline if any - let result' = case rawText of - '\n':xs -> xs - _ -> rawText + let result' = case T.uncons rawText of + Just ('\n', xs) -> xs + _ -> rawText -- drop trailing newline if any - let result = case reverse result' of - '\n':_ -> init result' - _ -> result' + let result = case T.unsnoc result' of + Just (result'', '\n') -> result'' + _ -> result' return $ B.codeBlockWith (mkAttr attr) result -tagToString :: Tag Text -> String -tagToString (TagText s) = T.unpack s -tagToString (TagOpen "br" _) = "\n" -tagToString _ = "" +tagToText :: Tag Text -> Text +tagToText (TagText s) = s +tagToText (TagOpen "br" _) = "\n" +tagToText _ = "" inline :: PandocMonad m => TagParser m Inlines inline = choice @@ -667,7 +665,7 @@ pLocation = do pSat :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text) pSat f = do pos <- getPosition - token show (const pos) (\x -> if f x then Just x else Nothing) + token tshow (const pos) (\x -> if f x then Just x else Nothing) pSatisfy :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text) pSatisfy f = try $ optional pLocation >> pSat f @@ -688,10 +686,10 @@ pQ = choice $ map try [citedQuote, normalQuote] where citedQuote = do tag <- pSatisfy $ tagOpenLit "q" (any ((=="cite") . fst)) - url <- canonicalizeUrl $ T.unpack $ fromAttrib "cite" tag - let uid = fromMaybe (T.unpack $ fromAttrib "name" tag) $ + url <- canonicalizeUrl $ fromAttrib "cite" tag + let uid = fromMaybe (fromAttrib "name" tag) $ maybeFromAttrib "id" tag - let cls = words $ T.unpack $ fromAttrib "class" tag + let cls = T.words $ fromAttrib "class" tag makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url)]) normalQuote = do @@ -729,7 +727,7 @@ pSpanLike = TagOpen _ attrs <- pSatisfy $ tagOpenLit tagName (const True) let (ids, cs, kvs) = mkAttr . toStringAttr $ attrs content <- mconcat <$> manyTill inline (pCloses tagName <|> eof) - return $ B.spanWith (ids, T.unpack tagName : cs, kvs) content + return $ B.spanWith (ids, tagName : cs, kvs) content pSmall :: PandocMonad m => TagParser m Inlines pSmall = pInlinesInTags "small" (B.spanWith ("",["small"],[])) @@ -753,19 +751,18 @@ pLineBreak = do -- Unlike fromAttrib from tagsoup, this distinguishes -- between a missing attribute and an attribute with empty content. -maybeFromAttrib :: String -> Tag Text -> Maybe String -maybeFromAttrib name (TagOpen _ attrs) = - T.unpack <$> lookup (T.pack name) attrs +maybeFromAttrib :: Text -> Tag Text -> Maybe Text +maybeFromAttrib name (TagOpen _ attrs) = lookup name attrs maybeFromAttrib _ _ = Nothing pLink :: PandocMonad m => TagParser m Inlines pLink = try $ do tag <- pSatisfy $ tagOpenLit "a" (const True) - let title = T.unpack $ fromAttrib "title" tag + let title = fromAttrib "title" tag -- take id from id attribute if present, otherwise name - let uid = fromMaybe (T.unpack $ fromAttrib "name" tag) $ + let uid = fromMaybe (fromAttrib "name" tag) $ maybeFromAttrib "id" tag - let cls = words $ T.unpack $ fromAttrib "class" tag + let cls = T.words $ fromAttrib "class" tag lab <- mconcat <$> manyTill inline (pCloses "a") -- check for href; if href, then a link, otherwise a span case maybeFromAttrib "href" tag of @@ -778,34 +775,33 @@ pLink = try $ do pImage :: PandocMonad m => TagParser m Inlines pImage = do tag <- pSelfClosing (=="img") (isJust . lookup "src") - url <- canonicalizeUrl $ T.unpack $ fromAttrib "src" tag - let title = T.unpack $ fromAttrib "title" tag - let alt = T.unpack $ fromAttrib "alt" tag - let uid = T.unpack $ fromAttrib "id" tag - let cls = words $ T.unpack $ fromAttrib "class" tag + url <- canonicalizeUrl $ fromAttrib "src" tag + let title = fromAttrib "title" tag + let alt = fromAttrib "alt" tag + let uid = fromAttrib "id" tag + let cls = T.words $ fromAttrib "class" tag let getAtt k = case fromAttrib k tag of "" -> [] - v -> [(T.unpack k, T.unpack v)] + v -> [(k, v)] let kvs = concatMap getAtt ["width", "height", "sizes", "srcset"] return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt) -pCodeWithClass :: PandocMonad m => [(T.Text,String)] -> TagParser m Inlines -pCodeWithClass elemToClass = try $ do +pCodeWithClass :: PandocMonad m => [(T.Text,Text)] -> TagParser m Inlines +pCodeWithClass elemToClass = try $ do let tagTest = flip elem . fmap fst $ elemToClass TagOpen open attr' <- pSatisfy $ tagOpen tagTest (const True) result <- manyTill pAny (pCloses open) let (ids,cs,kvs) = mkAttr . toStringAttr $ attr' cs' = maybe cs (:cs) . lookup open $ elemToClass return . B.codeWith (ids,cs',kvs) . - unwords . lines . T.unpack . innerText $ result + T.unwords . T.lines . innerText $ result pCode :: PandocMonad m => TagParser m Inlines pCode = try $ do (TagOpen open attr') <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True) let attr = toStringAttr attr' result <- manyTill pAny (pCloses open) - return $ B.codeWith (mkAttr attr) $ unwords $ lines $ T.unpack $ - innerText result + return $ B.codeWith (mkAttr attr) $ T.unwords $ T.lines $ innerText result pSpan :: PandocMonad m => TagParser m Inlines pSpan = try $ do @@ -817,7 +813,7 @@ pSpan = try $ do where styleAttr = fromMaybe "" $ lookup "style" attr fontVariant = fromMaybe "" $ pickStyleAttrProps ["font-variant"] styleAttr classes = fromMaybe [] $ - words <$> lookup "class" attr + T.words <$> lookup "class" attr let tag = if isSmallCaps then B.smallcaps else B.spanWith (mkAttr attr) return $ tag contents @@ -829,18 +825,17 @@ pRawHtmlInline = do then pSatisfy (not . isBlockTag) else pSatisfy isInlineTag exts <- getOption readerExtensions - let raw = T.unpack $ renderTags' [result] + let raw = renderTags' [result] if extensionEnabled Ext_raw_html exts then return $ B.rawInline "html" raw else ignore raw -mathMLToTeXMath :: String -> Either String String +mathMLToTeXMath :: Text -> Either Text Text mathMLToTeXMath s = writeTeX <$> readMathML s -toStringAttr :: [(Text, Text)] -> [(String, String)] +toStringAttr :: [(Text, Text)] -> [(Text, Text)] toStringAttr = map go - where go (x,y) = (T.unpack (fromMaybe x $ T.stripPrefix "data-" x), - T.unpack y) + where go (x,y) = (fromMaybe x $ T.stripPrefix "data-" x, y) pScriptMath :: PandocMonad m => TagParser m Inlines pScriptMath = try $ do @@ -849,8 +844,7 @@ pScriptMath = try $ do Just x | "math/tex" `T.isPrefixOf` x -> return $ "display" `T.isSuffixOf` x _ -> mzero - contents <- T.unpack . innerText <$> - manyTill pAny (pSatisfy (matchTagClose "script")) + contents <- innerText <$> manyTill pAny (pSatisfy (matchTagClose "script")) return $ (if isdisplay then B.displayMath else B.math) contents pMath :: PandocMonad m => Bool -> TagParser m Inlines @@ -862,11 +856,11 @@ pMath inCase = try $ do unless inCase $ guard (maybe True (== mathMLNamespace) (lookup "xmlns" attr)) contents <- manyTill pAny (pSatisfy (matchTagClose "math")) - case mathMLToTeXMath (T.unpack $ renderTags $ + case mathMLToTeXMath (renderTags $ [open] <> contents <> [TagClose "math"]) of Left _ -> return $ B.spanWith ("",["math"],attr) $ B.text $ - T.unpack $ innerText contents - Right [] -> return mempty + innerText contents + Right "" -> return mempty Right x -> return $ case lookup "display" attr of Just "block" -> B.displayMath x _ -> B.math x @@ -925,7 +919,7 @@ pTagText = try $ do parsed <- lift $ lift $ flip runReaderT qu $ runParserT (many pTagContents) st "text" str case parsed of - Left _ -> throwError $ PandocParseError $ "Could not parse `" <> T.unpack str <> "'" + Left _ -> throwError $ PandocParseError $ "Could not parse `" <> str <> "'" Right result -> return $ mconcat result pBlank :: PandocMonad m => TagParser m () @@ -954,11 +948,11 @@ pRawTeX = do guardEnabled Ext_raw_tex inp <- getInput st <- getState - res <- lift $ runParserT (withRaw rawLaTeXInline) st "chunk" (T.unpack inp) + res <- lift $ runParserT (withRaw rawLaTeXInline) st "chunk" inp case res of Left _ -> mzero Right (contents, raw) -> do - _ <- count (length raw) anyChar + _ <- count (T.length raw) anyChar return $ B.rawInline "tex" contents pStr :: PandocMonad m => InlinesParser m Inlines @@ -966,7 +960,7 @@ pStr = do result <- many1 $ satisfy $ \c -> not (isSpace c) && not (isSpecial c) && not (isBad c) updateLastStrPos - return $ B.str result + return $ B.str $ T.pack result isSpecial :: Char -> Bool isSpecial '"' = True @@ -982,7 +976,7 @@ isSpecial '\8221' = True isSpecial _ = False pSymbol :: PandocMonad m => InlinesParser m Inlines -pSymbol = satisfy isSpecial >>= return . B.str . (:[]) +pSymbol = satisfy isSpecial >>= return . B.str . T.singleton isBad :: Char -> Bool isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML @@ -1019,7 +1013,7 @@ pBad = do '\158' -> '\382' '\159' -> '\376' _ -> '?' - return $ B.str [c'] + return $ B.str $ T.singleton c' pSpace :: PandocMonad m => InlinesParser m Inlines pSpace = many1 (satisfy isSpace) >>= \xs -> @@ -1156,8 +1150,8 @@ _ `closes` _ = False -- | Matches a stretch of HTML in balanced tags. htmlInBalanced :: Monad m - => (Tag String -> Bool) - -> ParserT String st m String + => (Tag Text -> Bool) + -> ParserT Text st m Text htmlInBalanced f = try $ do lookAhead (char '<') inp <- getInput @@ -1174,21 +1168,21 @@ htmlInBalanced f = try $ do (TagClose _ : TagPosition er ec : _) -> do let ls = er - sr let cs = ec - sc - lscontents <- unlines <$> count ls anyLine + lscontents <- T.unlines <$> count ls anyLine cscontents <- count cs anyChar closetag <- do x <- many (satisfy (/='>')) char '>' return (x <> ">") - return (lscontents <> cscontents <> closetag) + return $ lscontents <> T.pack cscontents <> T.pack closetag _ -> mzero _ -> mzero -htmlInBalanced' :: String - -> [Tag String] - -> [Tag String] +htmlInBalanced' :: Text + -> [Tag Text] + -> [Tag Text] htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts - where go :: Int -> [Tag String] -> Maybe [Tag String] + where go :: Int -> [Tag Text] -> Maybe [Tag Text] go n (t@(TagOpen tn' _):rest) | tn' == tagname = (t :) <$> go (n + 1) rest go 1 (t@(TagClose tn'):_) | tn' == tagname = @@ -1204,8 +1198,8 @@ hasTagWarning _ = False -- | Matches a tag meeting a certain condition. htmlTag :: (HasReaderOptions st, Monad m) - => (Tag String -> Bool) - -> ParserT [Char] st m (Tag String, String) + => (Tag Text -> Bool) + -> ParserT Text st m (Tag Text, Text) htmlTag f = try $ do lookAhead (char '<') startpos <- getPosition @@ -1213,7 +1207,7 @@ htmlTag f = try $ do let ts = canonicalizeTags $ parseTagsOptions parseOptions{ optTagWarning = False , optTagPosition = True } - (inp ++ " ") -- add space to ensure that + (inp <> " ") -- add space to ensure that -- we get a TagPosition after the tag (next, ln, col) <- case ts of (TagPosition{} : next : TagPosition ln col : _) @@ -1225,13 +1219,12 @@ htmlTag f = try $ do -- so we exclude . even though it's a valid character -- in XML element names let isNameChar c = isAlphaNum c || c == ':' || c == '-' || c == '_' - let isName s = case s of - [] -> False - (c:cs) -> isLetter c && all isNameChar cs - let isPI s = case s of - ('?':_) -> True -- processing instruction - _ -> False - + let isName s = case T.uncons s of + Nothing -> False + Just (c, cs) -> isLetter c && T.all isNameChar cs + let isPI s = case T.uncons s of + Just ('?', _) -> True -- processing instruction + _ -> False let endpos = if ln == 1 then setSourceColumn startpos (sourceColumn startpos + (col - 1)) @@ -1247,18 +1240,18 @@ htmlTag f = try $ do -- basic sanity check, since the parser is very forgiving -- and finds tags in stuff like x<y) guard $ isName tagname || isPI tagname - guard $ not $ null tagname + guard $ not $ T.null tagname -- <https://example.org> should NOT be a tag either. -- tagsoup will parse it as TagOpen "https:" [("example.org","")] - guard $ last tagname /= ':' + guard $ T.last tagname /= ':' char '<' rendered <- manyTill anyChar endAngle - return (next, "<" ++ rendered ++ ">") + return (next, T.pack $ "<" ++ rendered ++ ">") case next of TagComment s - | "<!--" `isPrefixOf` inp -> do + | "<!--" `T.isPrefixOf` inp -> do string "<!--" - count (length s) anyChar + count (T.length s) anyChar string "-->" stripComments <- getOption readerStripComments if stripComments @@ -1272,12 +1265,12 @@ htmlTag f = try $ do handleTag tagname _ -> mzero -mkAttr :: [(String, String)] -> Attr +mkAttr :: [(Text, Text)] -> Attr mkAttr attr = (attribsId, attribsClasses, attribsKV) where attribsId = fromMaybe "" $ lookup "id" attr - attribsClasses = words (fromMaybe "" $ lookup "class" attr) <> epubTypes + attribsClasses = T.words (fromMaybe "" $ lookup "class" attr) <> epubTypes attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr - epubTypes = words $ fromMaybe "" $ lookup "epub:type" attr + epubTypes = T.words $ fromMaybe "" $ lookup "epub:type" attr -- Strip namespace prefixes stripPrefixes :: [Tag Text] -> [Tag Text] @@ -1304,11 +1297,11 @@ isSpace _ = False -- Utilities -- | Adjusts a url according to the document's base URL. -canonicalizeUrl :: PandocMonad m => String -> TagParser m String +canonicalizeUrl :: PandocMonad m => Text -> TagParser m Text canonicalizeUrl url = do mbBaseHref <- baseHref <$> getState - return $ case (parseURIReference url, mbBaseHref) of - (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs) + return $ case (parseURIReference (T.unpack url), mbBaseHref) of + (Just rel, Just bs) -> tshow (rel `nonStrictRelativeTo` bs) _ -> url diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index 0a048b6e6..3fc2f9715 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE CPP #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Haddock Copyright : Copyright (C) 2013 David Lazar @@ -17,9 +18,10 @@ module Text.Pandoc.Readers.Haddock import Prelude import Control.Monad.Except (throwError) -import Data.List (intersperse, stripPrefix) +import Data.List (intersperse) import Data.Maybe (fromMaybe) import Data.Text (Text, unpack) +import qualified Data.Text as T import Documentation.Haddock.Parser import Documentation.Haddock.Types as H import Text.Pandoc.Builder (Blocks, Inlines) @@ -28,7 +30,7 @@ import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Options -import Text.Pandoc.Shared (crFilter, splitBy, trim) +import Text.Pandoc.Shared (crFilter, splitTextBy, trim) -- | Parse Haddock markup and return a 'Pandoc' document. @@ -51,7 +53,7 @@ docHToBlocks d' = case d' of DocEmpty -> mempty DocAppend (DocParagraph (DocHeader h)) (DocParagraph (DocAName ident)) -> - B.headerWith (ident,[],[]) (headerLevel h) + B.headerWith (T.pack ident,[],[]) (headerLevel h) (docHToInlines False $ headerTitle h) DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2) DocString _ -> inlineFallback @@ -73,12 +75,12 @@ docHToBlocks d' = DocDefList items -> B.definitionList (map (\(d,t) -> (docHToInlines False d, [consolidatePlains $ docHToBlocks t])) items) - DocCodeBlock (DocString s) -> B.codeBlockWith ("",[],[]) s + DocCodeBlock (DocString s) -> B.codeBlockWith ("",[],[]) $ T.pack s DocCodeBlock d -> B.para $ docHToInlines True d DocHyperlink _ -> inlineFallback DocPic _ -> inlineFallback DocAName _ -> inlineFallback - DocProperty s -> B.codeBlockWith ("",["property","haskell"],[]) (trim s) + DocProperty s -> B.codeBlockWith ("",["property","haskell"],[]) (trim $ T.pack s) DocExamples es -> mconcat $ map (\e -> makeExample ">>>" (exampleExpression e) (exampleResult e)) es DocTable H.Table{ tableHeaderRows = headerRows @@ -114,58 +116,58 @@ docHToInlines isCode d' = (docHToInlines isCode d2) DocString s | isCode -> mconcat $ intersperse B.linebreak - $ map B.code $ splitBy (=='\n') s - | otherwise -> B.text s + $ map B.code $ splitTextBy (=='\n') $ T.pack s + | otherwise -> B.text $ T.pack s DocParagraph _ -> mempty DocIdentifier ident -> case toRegular (DocIdentifier ident) of - DocIdentifier s -> B.codeWith ("",["haskell","identifier"],[]) s + DocIdentifier s -> B.codeWith ("",["haskell","identifier"],[]) $ T.pack s _ -> mempty - DocIdentifierUnchecked s -> B.codeWith ("",["haskell","identifier"],[]) s - DocModule s -> B.codeWith ("",["haskell","module"],[]) s + DocIdentifierUnchecked s -> B.codeWith ("",["haskell","identifier"],[]) $ T.pack s + DocModule s -> B.codeWith ("",["haskell","module"],[]) $ T.pack s DocWarning _ -> mempty -- TODO DocEmphasis d -> B.emph (docHToInlines isCode d) - DocMonospaced (DocString s) -> B.code s + DocMonospaced (DocString s) -> B.code $ T.pack s DocMonospaced d -> docHToInlines True d DocBold d -> B.strong (docHToInlines isCode d) - DocMathInline s -> B.math s - DocMathDisplay s -> B.displayMath s + DocMathInline s -> B.math $ T.pack s + DocMathDisplay s -> B.displayMath $ T.pack s DocHeader _ -> mempty DocUnorderedList _ -> mempty DocOrderedList _ -> mempty DocDefList _ -> mempty DocCodeBlock _ -> mempty - DocHyperlink h -> B.link (hyperlinkUrl h) (hyperlinkUrl h) - (maybe (B.text $ hyperlinkUrl h) (docHToInlines isCode) + DocHyperlink h -> B.link (T.pack $ hyperlinkUrl h) (T.pack $ hyperlinkUrl h) + (maybe (B.text $ T.pack $ hyperlinkUrl h) (docHToInlines isCode) (hyperlinkLabel h)) - DocPic p -> B.image (pictureUri p) (fromMaybe (pictureUri p) $ pictureTitle p) - (maybe mempty B.text $ pictureTitle p) - DocAName s -> B.spanWith (s,["anchor"],[]) mempty + DocPic p -> B.image (T.pack $ pictureUri p) (T.pack $ fromMaybe (pictureUri p) $ pictureTitle p) + (maybe mempty (B.text . T.pack) $ pictureTitle p) + DocAName s -> B.spanWith (T.pack s,["anchor"],[]) mempty DocProperty _ -> mempty DocExamples _ -> mempty DocTable _ -> mempty -- | Create an 'Example', stripping superfluous characters as appropriate -makeExample :: String -> String -> [String] -> Blocks +makeExample :: T.Text -> String -> [String] -> Blocks makeExample prompt expression result = B.para $ B.codeWith ("",["prompt"],[]) prompt <> B.space - <> B.codeWith ([], ["haskell","expr"], []) (trim expression) + <> B.codeWith ("", ["haskell","expr"], []) (trim $ T.pack expression) <> B.linebreak <> mconcat (intersperse B.linebreak $ map coder result') where -- 1. drop trailing whitespace from the prompt, remember the prefix - prefix = takeWhile (`elem` " \t") prompt + prefix = T.takeWhile (`elem` (" \t" :: String)) prompt -- 2. drop, if possible, the exact same sequence of whitespace -- characters from each result line -- -- 3. interpret lines that only contain the string "<BLANKLINE>" as an -- empty line - result' = map (substituteBlankLine . tryStripPrefix prefix) result + result' = map (substituteBlankLine . tryStripPrefix prefix . T.pack) result where - tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys + tryStripPrefix xs ys = fromMaybe ys $ T.stripPrefix xs ys substituteBlankLine "<BLANKLINE>" = "" substituteBlankLine line = line - coder = B.codeWith ([], ["result"], []) + coder = B.codeWith ("", ["result"], []) diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs index dbca5a59f..8efc230cc 100644 --- a/src/Text/Pandoc/Readers/Ipynb.hs +++ b/src/Text/Pandoc/Readers/Ipynb.hs @@ -4,6 +4,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Ipynb Copyright : Copyright (C) 2019 John MacFarlane @@ -19,7 +20,6 @@ module Text.Pandoc.Readers.Ipynb ( readIpynb ) where import Prelude import Data.Char (isDigit) -import Data.List (isPrefixOf) import Data.Maybe (fromMaybe) import Data.Digest.Pure.SHA (sha1, showDigest) import Text.Pandoc.Options @@ -30,6 +30,7 @@ import Text.Pandoc.Definition import Data.Ipynb as Ipynb import Text.Pandoc.Class import Text.Pandoc.MIME (extensionFromMimeType) +import Text.Pandoc.Shared (tshow) import Text.Pandoc.UTF8 import Text.Pandoc.Walk (walk) import Text.Pandoc.Error @@ -51,15 +52,15 @@ readIpynb opts t = do Left _ -> case eitherDecode src of Right (notebook3 :: Notebook NbV3) -> notebookToPandoc opts notebook3 - Left err -> throwError $ PandocIpynbDecodingError err + Left err -> throwError $ PandocIpynbDecodingError $ T.pack err notebookToPandoc :: PandocMonad m => ReaderOptions -> Notebook a -> m Pandoc notebookToPandoc opts notebook = do let cells = notebookCells notebook let (fmt,fmtminor) = notebookFormat notebook - let m = M.insert "nbformat" (MetaString $ show fmt) $ - M.insert "nbformat_minor" (MetaString $ show fmtminor) $ + let m = M.insert "nbformat" (MetaString $ tshow fmt) $ + M.insert "nbformat_minor" (MetaString $ tshow fmtminor) $ jsonMetaToMeta (notebookMetadata notebook) let lang = case M.lookup "kernelspec" m of Just (MetaMap ks) -> @@ -72,7 +73,7 @@ notebookToPandoc opts notebook = do return $ Pandoc (Meta $ M.insert "jupyter" (MetaMap m) mempty) blocks cellToBlocks :: PandocMonad m - => ReaderOptions -> String -> Cell a -> m B.Blocks + => ReaderOptions -> Text -> Cell a -> m B.Blocks cellToBlocks opts lang c = do let Source ts = cellSource c let source = mconcat ts @@ -100,19 +101,18 @@ cellToBlocks opts lang c = do "text/markdown" -> "markdown" "text/x-rsrt" -> "rst" _ -> format - return $ B.divWith ("",["cell","raw"],kvs) $ B.rawBlock format' - $ T.unpack source + return $ B.divWith ("",["cell","raw"],kvs) $ B.rawBlock format' source Ipynb.Code{ codeOutputs = outputs, codeExecutionCount = ec } -> do outputBlocks <- mconcat <$> mapM outputToBlock outputs - let kvs' = maybe kvs (\x -> ("execution_count", show x):kvs) ec + let kvs' = maybe kvs (\x -> ("execution_count", tshow x):kvs) ec return $ B.divWith ("",["cell","code"],kvs') $ - B.codeBlockWith ("",[lang],[]) (T.unpack source) + B.codeBlockWith ("",[lang],[]) source <> outputBlocks -- Remove attachment: prefix from images... fixImage :: Inline -> Inline fixImage (Image attr lab (src,tit)) - | "attachment:" `isPrefixOf` src = Image attr lab (drop 11 src, tit) + | "attachment:" `T.isPrefixOf` src = Image attr lab (T.drop 11 src, tit) fixImage x = x addAttachment :: PandocMonad m => (Text, MimeBundle) -> m () @@ -120,19 +120,19 @@ addAttachment (fname, mimeBundle) = do let fp = T.unpack fname case M.toList (unMimeBundle mimeBundle) of (mimeType, BinaryData bs):_ -> - insertMedia fp (Just $ T.unpack mimeType) (BL.fromStrict bs) + insertMedia fp (Just mimeType) (BL.fromStrict bs) (mimeType, TextualData t):_ -> - insertMedia fp (Just $ T.unpack mimeType) + insertMedia fp (Just mimeType) (BL.fromStrict $ TE.encodeUtf8 t) (mimeType, JsonData v):_ -> - insertMedia fp (Just $ T.unpack mimeType) (encode v) - [] -> report $ CouldNotFetchResource fp "no attachment" + insertMedia fp (Just mimeType) (encode v) + [] -> report $ CouldNotFetchResource fname "no attachment" outputToBlock :: PandocMonad m => Output a -> m B.Blocks outputToBlock Stream{ streamName = sName, streamText = Source text } = do - return $ B.divWith ("",["output","stream",T.unpack sName],[]) - $ B.codeBlock $ T.unpack . mconcat $ text + return $ B.divWith ("",["output","stream",sName],[]) + $ B.codeBlock $ T.concat $ text outputToBlock DisplayData{ displayData = data', displayMetadata = metadata' } = B.divWith ("",["output", "display_data"],[]) <$> @@ -140,15 +140,15 @@ outputToBlock DisplayData{ displayData = data', outputToBlock ExecuteResult{ executeCount = ec, executeData = data', executeMetadata = metadata' } = - B.divWith ("",["output", "execute_result"],[("execution_count",show ec)]) + B.divWith ("",["output", "execute_result"],[("execution_count",tshow ec)]) <$> handleData metadata' data' outputToBlock Err{ errName = ename, errValue = evalue, errTraceback = traceback } = do return $ B.divWith ("",["output","error"], - [("ename",T.unpack ename), - ("evalue",T.unpack evalue)]) - $ B.codeBlock $ T.unpack . T.unlines $ traceback + [("ename",ename), + ("evalue",evalue)]) + $ B.codeBlock $ T.unlines $ traceback -- We want to display the richest output possible given -- the output format. @@ -174,54 +174,53 @@ handleData metadata (MimeBundle mb) = let metaPairs = jsonMetaToPairs meta let bl = BL.fromStrict bs -- SHA1 hash for filename - let mt' = T.unpack mt - let fname = showDigest (sha1 bl) ++ - case extensionFromMimeType mt' of + let fname = T.pack (showDigest (sha1 bl)) <> + case extensionFromMimeType mt of Nothing -> "" - Just ext -> '.':ext - insertMedia fname (Just mt') bl + Just ext -> "." <> ext + insertMedia (T.unpack fname) (Just mt) bl return $ B.para $ B.imageWith ("",[],metaPairs) fname "" mempty | otherwise = return mempty dataBlock ("text/html", TextualData t) - = return $ B.rawBlock "html" $ T.unpack t + = return $ B.rawBlock "html" $ t dataBlock ("text/latex", TextualData t) - = return $ B.rawBlock "latex" $ T.unpack t + = return $ B.rawBlock "latex" $ t dataBlock ("text/plain", TextualData t) = - return $ B.codeBlock $ T.unpack t + return $ B.codeBlock $ t dataBlock (_, JsonData v) = - return $ B.codeBlockWith ("",["json"],[]) $ toStringLazy $ encode v + return $ B.codeBlockWith ("",["json"],[]) $ T.pack $ toStringLazy $ encode v dataBlock _ = return mempty -jsonMetaToMeta :: JSONMeta -> M.Map String MetaValue -jsonMetaToMeta = M.mapKeys T.unpack . M.map valueToMetaValue +jsonMetaToMeta :: JSONMeta -> M.Map Text MetaValue +jsonMetaToMeta = M.map valueToMetaValue where valueToMetaValue :: Value -> MetaValue valueToMetaValue x@(Object{}) = case fromJSON x of - Error s -> MetaString s + Error s -> MetaString $ T.pack s Success jm' -> MetaMap $ jsonMetaToMeta jm' valueToMetaValue x@(Array{}) = case fromJSON x of - Error s -> MetaString s + Error s -> MetaString $ T.pack s Success xs -> MetaList $ map valueToMetaValue xs valueToMetaValue (Bool b) = MetaBool b - valueToMetaValue (String t) = MetaString (T.unpack t) + valueToMetaValue (String t) = MetaString t valueToMetaValue (Number n) - | Scientific.isInteger n = MetaString (show (floor n :: Integer)) - | otherwise = MetaString (show n) + | Scientific.isInteger n = MetaString (tshow (floor n :: Integer)) + | otherwise = MetaString (tshow n) valueToMetaValue Aeson.Null = MetaString "" -jsonMetaToPairs :: JSONMeta -> [(String, String)] -jsonMetaToPairs = M.toList . M.mapKeys T.unpack . M.map +jsonMetaToPairs :: JSONMeta -> [(Text, Text)] +jsonMetaToPairs = M.toList . M.map (\case String t | not (T.all isDigit t) , t /= "true" , t /= "false" - -> T.unpack t - x -> UTF8.toStringLazy $ Aeson.encode x) + -> t + x -> T.pack $ UTF8.toStringLazy $ Aeson.encode x) diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index e074599eb..320b9c1dd 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.JATS Copyright : Copyright (C) 2017-2019 Hamish Mackenzie @@ -76,13 +77,13 @@ convertEntity :: String -> String convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) -- convenience function to get an attribute value, defaulting to "" -attrValue :: String -> Element -> String +attrValue :: String -> Element -> Text attrValue attr = fromMaybe "" . maybeAttrValue attr -maybeAttrValue :: String -> Element -> Maybe String +maybeAttrValue :: String -> Element -> Maybe Text maybeAttrValue attr elt = - lookupAttrBy (\x -> qName x == attr) (elAttribs elt) + T.pack <$> lookupAttrBy (\x -> qName x == attr) (elAttribs elt) -- convenience function named :: String -> Element -> Bool @@ -90,7 +91,7 @@ named s e = qName (elName e) == s -- -addMeta :: PandocMonad m => ToMetaValue a => String -> a -> JATS m () +addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> JATS m () addMeta field val = modify (setMeta field val) instance HasMeta JATSState where @@ -126,15 +127,13 @@ isBlockElement (Elem e) = qName (elName e) `S.member` blocktags isBlockElement _ = False -- Trim leading and trailing newline characters -trimNl :: String -> String -trimNl = reverse . go . reverse . go - where go ('\n':xs) = xs - go xs = xs +trimNl :: Text -> Text +trimNl = T.dropAround (== '\n') -- function that is used by both graphic (in parseBlock) -- and inline-graphic (in parseInline) getGraphic :: PandocMonad m - => Maybe (Inlines, String) -> Element -> JATS m Inlines + => Maybe (Inlines, Text) -> Element -> JATS m Inlines getGraphic mbfigdata e = do let atVal a = attrValue a e (ident, title, caption) = @@ -142,7 +141,7 @@ getGraphic mbfigdata e = do Just (capt, i) -> (i, "fig:" <> atVal "title", capt) Nothing -> (atVal "id", atVal "title", text (atVal "alt-text")) - attr = (ident, words $ atVal "role", []) + attr = (ident, T.words $ atVal "role", []) imageUrl = atVal "href" return $ imageWith attr imageUrl title caption @@ -155,8 +154,8 @@ parseBlock :: PandocMonad m => Content -> JATS m Blocks parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE parseBlock (Text (CData _ s _)) = if all isSpace s then return mempty - else return $ plain $ trimInlines $ text s -parseBlock (CRef x) = return $ plain $ str $ map toUpper x + else return $ plain $ trimInlines $ text $ T.pack s +parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x parseBlock (Elem e) = case qName (elName e) of "p" -> parseMixed para (elContent e) @@ -167,7 +166,7 @@ parseBlock (Elem e) = "bullet" -> bulletList <$> listitems listType -> do let start = fromMaybe 1 $ - (strContent <$> (filterElement (named "list-item") e + (textContent <$> (filterElement (named "list-item") e >>= filterElement (named "label"))) >>= safeRead orderedListWith (start, parseListStyleType listType, DefaultDelim) @@ -204,7 +203,7 @@ parseBlock (Elem e) = "" -> [] x -> [x] return $ codeBlockWith (attrValue "id" e, classes', []) - $ trimNl $ strContentRecursive e + $ trimNl $ textContentRecursive e parseBlockquote = do attrib <- case filterChild (named "attribution") e of Nothing -> return mempty @@ -267,9 +266,9 @@ parseBlock (Elem e) = Just "right" -> AlignRight Just "center" -> AlignCenter _ -> AlignDefault - let toWidth c = case findAttr (unqual "colwidth") c of + let toWidth c = case findAttrText (unqual "colwidth") c of Just w -> fromMaybe 0 - $ safeRead $ '0': filter (\x -> + $ safeRead $ "0" <> T.filter (\x -> isDigit x || x == '.') w Nothing -> 0 :: Double let numrows = foldl' max 0 $ map length bodyrows @@ -363,7 +362,7 @@ parseRefList e = do return mempty parseRef :: PandocMonad m - => Element -> JATS m (Map.Map String MetaValue) + => Element -> JATS m (Map.Map Text MetaValue) parseRef e = do let refId = text $ attrValue "id" e let getInlineText n = maybe (return mempty) getInlines . filterChild (named n) @@ -396,7 +395,7 @@ parseRef e = do family <- maybe (return mempty) getInlines $ filterChild (named "surname") nm return $ toMetaValue $ Map.fromList [ - ("given", given) + ("given" :: Text, given) , ("family", family) ] personGroups <- mapM (\pg -> @@ -406,7 +405,7 @@ parseRef e = do toMetaValue names)) personGroups' return $ Map.fromList $ - [ ("id", toMetaValue refId) + [ ("id" :: Text, toMetaValue refId) , ("type", toMetaValue refType) , ("title", toMetaValue refTitle) , ("container-title", toMetaValue refContainerTitle) @@ -415,7 +414,7 @@ parseRef e = do , ("title", toMetaValue refTitle) , ("issued", toMetaValue $ Map.fromList [ - ("year", refYear) + ("year" :: Text, refYear) ]) , ("volume", toMetaValue refVolume) , ("page", toMetaValue refPages) @@ -424,6 +423,15 @@ parseRef e = do Nothing -> return $ Map.insert "id" (toMetaValue refId) mempty -- TODO handle mixed-citation +findAttrText :: QName -> Element -> Maybe Text +findAttrText x = fmap T.pack . findAttr x + +textContent :: Element -> Text +textContent = T.pack . strContent + +textContentRecursive :: Element -> Text +textContentRecursive = T.pack . strContentRecursive + strContentRecursive :: Element -> String strContentRecursive = strContent . (\e' -> e'{ elContent = map elementToStr $ elContent e' }) @@ -433,9 +441,9 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing elementToStr x = x parseInline :: PandocMonad m => Content -> JATS m Inlines -parseInline (Text (CData _ s _)) = return $ text s +parseInline (Text (CData _ s _)) = return $ text $ T.pack s parseInline (CRef ref) = - return $ maybe (text $ map toUpper ref) text $ lookupEntity ref + return $ maybe (text $ T.toUpper $ T.pack ref) text $ T.pack <$> lookupEntity ref parseInline (Elem e) = case qName (elName e) of "italic" -> emph <$> innerInlines @@ -464,7 +472,7 @@ parseInline (Elem e) = "xref" -> do ils <- innerInlines let rid = attrValue "rid" e - let rids = words rid + let rids = T.words rid let refType = ("ref-type",) <$> maybeAttrValue "ref-type" e let attr = (attrValue "id" e, [], maybeToList refType) return $ if refType == Just ("ref-type","bibr") @@ -477,13 +485,13 @@ parseInline (Elem e) = , citationNoteNum = 0 , citationHash = 0}) rids) ils - else linkWith attr ('#' : rid) "" ils + else linkWith attr ("#" <> rid) "" ils "ext-link" -> do ils <- innerInlines - let title = fromMaybe "" $ findAttr (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e + let title = fromMaybe "" $ findAttrText (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of - Just h -> h - _ -> '#' : attrValue "rid" e + Just h -> T.pack h + _ -> "#" <> attrValue "rid" e let ils' = if ils == mempty then str href else ils let attr = (attrValue "id" e, [], []) return $ linkWith attr href title ils' @@ -491,23 +499,23 @@ parseInline (Elem e) = "disp-formula" -> formula displayMath "inline-formula" -> formula math "math" | qPrefix (elName e) == Just "mml" -> return . math $ mathML e - "tex-math" -> return . math $ strContent e + "tex-math" -> return . math $ textContent e - "email" -> return $ link ("mailto:" ++ strContent e) "" - $ str $ strContent e - "uri" -> return $ link (strContent e) "" $ str $ strContent e + "email" -> return $ link ("mailto:" <> textContent e) "" + $ str $ textContent e + "uri" -> return $ link (textContent e) "" $ str $ textContent e "fn" -> (note . mconcat) <$> mapM parseBlock (elContent e) _ -> innerInlines where innerInlines = (trimInlines . mconcat) <$> mapM parseInline (elContent e) mathML x = - case readMathML . showElement $ everywhere (mkT removePrefix) x of + case readMathML . T.pack . showElement $ everywhere (mkT removePrefix) x of Left _ -> mempty Right m -> writeTeX m formula constructor = do let whereToLook = fromMaybe e $ filterElement (named "alternatives") e - texMaths = map strContent $ + texMaths = map textContent $ filterChildren (named "tex-math") whereToLook mathMLs = map mathML $ filterChildren isMathML whereToLook @@ -520,4 +528,4 @@ parseInline (Elem e) = let classes' = case attrValue "language" e of "" -> [] l -> [l] - return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e + return $ codeWith (attrValue "id" e,classes',[]) $ textContentRecursive e diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index be19964a4..5c9a3e69c 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1,9 +1,10 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.LaTeX Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -29,9 +30,9 @@ import Prelude import Control.Applicative (many, optional, (<|>)) import Control.Monad import Control.Monad.Except (throwError) -import Data.Char (isDigit, isLetter, toLower, toUpper, chr) +import Data.Char (isDigit, isLetter, toUpper, chr) import Data.Default -import Data.List (intercalate, isPrefixOf) +import Data.List (intercalate) import qualified Data.Map as M import Data.Maybe (fromMaybe, maybeToList) import qualified Data.Set as Set @@ -44,7 +45,7 @@ import Text.Pandoc.Builder import Text.Pandoc.Class (PandocMonad, PandocPure, getResourcePath, lookupEnv, readFileFromDirs, report, setResourcePath, setTranslations, translateTerm, trace, fileExists) -import Text.Pandoc.Error (PandocError ( PandocParseError, PandocParsecError)) +import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError)) import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension) import Text.Pandoc.ImageSize (numUnit, showFl) import Text.Pandoc.Logging @@ -77,7 +78,7 @@ readLaTeX opts ltx = do (tokenize "source" (crFilter ltx)) case parsed of Right result -> return result - Left e -> throwError $ PandocParsecError (T.unpack ltx) e + Left e -> throwError $ PandocParsecError ltx e parseLaTeX :: PandocMonad m => LP m Pandoc parseLaTeX = do @@ -99,13 +100,13 @@ parseLaTeX = do walk (resolveRefs (sLabels st)) doc' return $ Pandoc meta bs' -resolveRefs :: M.Map String [Inline] -> Inline -> Inline +resolveRefs :: M.Map Text [Inline] -> Inline -> Inline resolveRefs labels x@(Link (ident,classes,kvs) _ _) = case (lookup "reference-type" kvs, lookup "reference" kvs) of (Just "ref", Just lab) -> case M.lookup lab labels of - Just txt -> Link (ident,classes,kvs) txt ('#':lab, "") + Just txt -> Link (ident,classes,kvs) txt ("#" <> lab, "") Nothing -> x _ -> x resolveRefs _ x = x @@ -123,11 +124,11 @@ resolveRefs _ x = x rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => ParserT String s m String + => ParserT Text s m Text rawLaTeXBlock = do lookAhead (try (char '\\' >> letter)) inp <- getInput - let toks = tokenize "source" $ T.pack inp + let toks = tokenize "source" inp snd <$> (rawLaTeXParser toks False (macroDef (const mempty)) blocks <|> (rawLaTeXParser toks True (do choice (map controlSeq @@ -151,14 +152,14 @@ beginOrEndCommand = try $ do (inlineEnvironments :: M.Map Text (LP PandocPure Inlines)) then mzero else return $ rawBlock "latex" - (T.unpack (txt <> untokenize rawargs)) + (txt <> untokenize rawargs) rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => ParserT String s m String + => ParserT Text s m Text rawLaTeXInline = do lookAhead (try (char '\\' >> letter)) inp <- getInput - let toks = tokenize "source" $ T.pack inp + let toks = tokenize "source" inp raw <- snd <$> ( rawLaTeXParser toks True (mempty <$ (controlSeq "input" >> skipMany opt >> braced)) @@ -167,23 +168,23 @@ rawLaTeXInline = do inlines ) finalbraces <- mconcat <$> many (try (string "{}")) -- see #5439 - return $ raw <> finalbraces + return $ raw <> T.pack finalbraces -inlineCommand :: PandocMonad m => ParserT String ParserState m Inlines +inlineCommand :: PandocMonad m => ParserT Text ParserState m Inlines inlineCommand = do lookAhead (try (char '\\' >> letter)) inp <- getInput - let toks = tokenize "source" $ T.pack inp + let toks = tokenize "source" inp fst <$> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand') inlines -- inline elements: word :: PandocMonad m => LP m Inlines -word = (str . T.unpack . untoken) <$> satisfyTok isWordTok +word = (str . untoken) <$> satisfyTok isWordTok regularSymbol :: PandocMonad m => LP m Inlines -regularSymbol = (str . T.unpack . untoken) <$> satisfyTok isRegularSymbol +regularSymbol = (str . untoken) <$> satisfyTok isRegularSymbol where isRegularSymbol (Tok _ Symbol t) = not $ T.any isSpecial t isRegularSymbol _ = False isSpecial c = c `Set.member` specialChars @@ -199,14 +200,14 @@ inlineGroup = do doLHSverb :: PandocMonad m => LP m Inlines doLHSverb = - (codeWith ("",["haskell"],[]) . T.unpack . untokenize) + (codeWith ("",["haskell"],[]) . untokenize) <$> manyTill (satisfyTok (not . isNewlineTok)) (symbol '|') -mkImage :: PandocMonad m => [(String, String)] -> String -> LP m Inlines -mkImage options src = do +mkImage :: PandocMonad m => [(Text, Text)] -> Text -> LP m Inlines +mkImage options (T.unpack -> src) = do let replaceTextwidth (k,v) = case numUnit v of - Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%") + Just (num, "\\textwidth") -> (k, showFl (num * 100) <> "%") _ -> (k, v) let kvs = map replaceTextwidth $ filter (\(k,_) -> k `elem` ["width", "height"]) options @@ -223,10 +224,10 @@ mkImage options src = do then return s' else findFile s es src' <- case takeExtension src of - "" | not (null defaultExt) -> return $ addExtension src defaultExt + "" | not (T.null defaultExt) -> return $ addExtension src $ T.unpack defaultExt | otherwise -> findFile src exts _ -> return src - return $ imageWith attr src' "" alt + return $ imageWith attr (T.pack src') "" alt doxspace :: PandocMonad m => LP m Inlines doxspace = @@ -435,7 +436,7 @@ siUnitMap = M.fromList , ("zetta", str "Z") ] -lit :: String -> LP m Inlines +lit :: Text -> LP m Inlines lit = pure . str removeDoubleQuotes :: Text -> Text @@ -471,7 +472,7 @@ quoted' :: PandocMonad m -> LP m () -> LP m Inlines quoted' f starter ender = do - startchs <- (T.unpack . untokenize) <$> starter + startchs <- untokenize <$> starter smart <- extensionEnabled Ext_smart <$> getOption readerExtensions if smart then do @@ -487,7 +488,7 @@ quoted' f starter ender = do enquote :: PandocMonad m => Bool -> Maybe Text -> LP m Inlines enquote starred mblang = do skipopts - let lang = (T.unpack <$> mblang) >>= babelLangToBCP47 + let lang = mblang >>= babelLangToBCP47 let langspan = case lang of Nothing -> id Just l -> spanWith ("",[],[("lang", renderLang l)]) @@ -503,27 +504,27 @@ blockquote citations mblang = do cs <- cites NormalCitation False return $ para (cite cs mempty) else return mempty - let lang = (T.unpack <$> mblang) >>= babelLangToBCP47 + let lang = mblang >>= babelLangToBCP47 let langdiv = case lang of Nothing -> id Just l -> divWith ("",[],[("lang", renderLang l)]) bs <- grouped block return $ blockQuote . langdiv $ (bs <> citePar) -doAcronym :: PandocMonad m => String -> LP m Inlines +doAcronym :: PandocMonad m => Text -> LP m Inlines doAcronym form = do acro <- braced - return . mconcat $ [spanWith ("",[],[("acronym-label", toksToString acro), - ("acronym-form", "singular+" ++ form)]) - $ str $ toksToString acro] + return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro), + ("acronym-form", "singular+" <> form)]) + $ str $ untokenize acro] -doAcronymPlural :: PandocMonad m => String -> LP m Inlines +doAcronymPlural :: PandocMonad m => Text -> LP m Inlines doAcronymPlural form = do acro <- braced plural <- lit "s" - return . mconcat $ [spanWith ("",[],[("acronym-label", toksToString acro), - ("acronym-form", "plural+" ++ form)]) $ - mconcat [str $ toksToString acro, plural]] + return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro), + ("acronym-form", "plural+" <> form)]) $ + mconcat [str $ untokenize acro, plural]] doverb :: PandocMonad m => LP m Inlines doverb = do @@ -532,7 +533,7 @@ doverb = do Just (c, ts) | T.null ts -> return c _ -> mzero withVerbatimMode $ - (code . T.unpack . untokenize) <$> + (code . untokenize) <$> manyTill (notFollowedBy newlineTok >> verbTok marker) (symbol marker) verbTok :: PandocMonad m => Char -> LP m Tok @@ -547,7 +548,7 @@ verbTok stopchar = do : totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp return $ Tok pos toktype t1 -listingsLanguage :: [(String, String)] -> Maybe String +listingsLanguage :: [(Text, Text)] -> Maybe Text listingsLanguage opts = case lookup "language" opts of Nothing -> Nothing @@ -562,10 +563,10 @@ dolstinline = do domintinline :: PandocMonad m => LP m Inlines domintinline = do skipopts - cls <- toksToString <$> braced + cls <- untokenize <$> braced doinlinecode [cls] -doinlinecode :: PandocMonad m => [String] -> LP m Inlines +doinlinecode :: PandocMonad m => [Text] -> LP m Inlines doinlinecode classes = do Tok _ Symbol t <- anySymbol marker <- case T.uncons t of @@ -573,14 +574,14 @@ doinlinecode classes = do _ -> mzero let stopchar = if marker == '{' then '}' else marker withVerbatimMode $ - (codeWith ("",classes,[]) . map nlToSpace . T.unpack . untokenize) <$> + (codeWith ("",classes,[]) . T.map nlToSpace . untokenize) <$> manyTill (verbTok stopchar) (symbol stopchar) nlToSpace :: Char -> Char nlToSpace '\n' = ' ' nlToSpace x = x -keyval :: PandocMonad m => LP m (String, String) +keyval :: PandocMonad m => LP m (Text, Text) keyval = try $ do Tok _ Word key <- satisfyTok isWordTok optional sp @@ -601,35 +602,34 @@ keyval = try $ do _ -> True)))))) optional (symbol ',') optional sp - return (T.unpack key, T.unpack $ T.strip val) + return (key, T.strip val) -keyvals :: PandocMonad m => LP m [(String, String)] +keyvals :: PandocMonad m => LP m [(Text, Text)] keyvals = try $ symbol '[' >> manyTill keyval (symbol ']') accent :: PandocMonad m => Char -> Maybe Char -> LP m Inlines accent combiningAccent fallBack = try $ do ils <- tok case toList ils of - (Str (x:xs) : ys) -> return $ fromList $ + (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $ -- try to normalize to the combined character: - Str (T.unpack - (Normalize.normalize Normalize.NFC - (T.pack [x, combiningAccent])) ++ xs) : ys - [Space] -> return $ str [fromMaybe combiningAccent fallBack] - [] -> return $ str [fromMaybe combiningAccent fallBack] + Str (Normalize.normalize Normalize.NFC + (T.pack [x, combiningAccent]) <> xs) : ys + [Space] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack + [] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack _ -> return ils -mathDisplay :: String -> Inlines +mathDisplay :: Text -> Inlines mathDisplay = displayMath . trimMath -mathInline :: String -> Inlines +mathInline :: Text -> Inlines mathInline = math . trimMath dollarsMath :: PandocMonad m => LP m Inlines dollarsMath = do symbol '$' display <- option False (True <$ symbol '$') - (do contents <- try $ T.unpack . untokenize <$> pDollarsMath 0 + (do contents <- try $ untokenize <$> pDollarsMath 0 if display then (mathDisplay contents <$ symbol '$') else return $ mathInline contents) @@ -682,10 +682,10 @@ simpleCiteArgs = try $ do } return $ addPrefix pre $ addSuffix suf $ map conv keys -citationLabel :: PandocMonad m => LP m String +citationLabel :: PandocMonad m => LP m Text citationLabel = do optional spaces - toksToString <$> + untokenize <$> (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar) <* optional spaces <* optional (symbol ',') @@ -729,10 +729,10 @@ cites mode multi = try $ do addMprenote _ _ = [] addMpostnote = addSuffix . mpostnote -citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines +citation :: PandocMonad m => Text -> CitationMode -> Bool -> LP m Inlines citation name mode multi = do (c,raw) <- withRaw $ cites mode multi - return $ cite c (rawInline "latex" $ "\\" ++ name ++ toksToString raw) + return $ cite c (rawInline "latex" $ "\\" <> name <> untokenize raw) handleCitationPart :: Inlines -> [Citation] handleCitationPart ils = @@ -756,7 +756,7 @@ complexNatbibCitation mode = try $ do case cs of [] -> mzero (c:cits) -> return $ cite (c{ citationMode = mode }:cits) - (rawInline "latex" $ "\\citetext" ++ toksToString raw) + (rawInline "latex" $ "\\citetext" <> untokenize raw) inNote :: Inlines -> Inlines inNote ils = @@ -780,10 +780,10 @@ tok :: PandocMonad m => LP m Inlines tok = try $ spaces >> grouped inline <|> inlineCommand' <|> singleChar' where singleChar' = do Tok _ _ t <- singleChar - return (str (T.unpack t)) + return $ str t opt :: PandocMonad m => LP m Inlines -opt = bracketed inline <|> (str . T.unpack <$> rawopt) +opt = bracketed inline <|> (str <$> rawopt) paropt :: PandocMonad m => LP m Inlines paropt = parenWrapped inline @@ -822,26 +822,31 @@ overlayTok = inBrackets :: Inlines -> Inlines inBrackets x = str "[" <> x <> str "]" -unescapeURL :: String -> String -unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs - where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String) -unescapeURL (x:xs) = x:unescapeURL xs -unescapeURL [] = "" +unescapeURL :: Text -> Text +unescapeURL = T.concat . go . T.splitOn "\\" + where + isEscapable c = c `elemText` "#$%&~_^\\{}" + go (x:xs) = x : map unescapeInterior xs + go [] = [] + unescapeInterior t + | Just (c, _) <- T.uncons t + , isEscapable c = t + | otherwise = "\\" <> t mathEnvWith :: PandocMonad m => (Inlines -> a) -> Maybe Text -> Text -> LP m a mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name where inner x = case innerEnv of Nothing -> x - Just y -> "\\begin{" ++ T.unpack y ++ "}\n" ++ x ++ - "\\end{" ++ T.unpack y ++ "}" + Just y -> "\\begin{" <> y <> "}\n" <> x <> + "\\end{" <> y <> "}" -mathEnv :: PandocMonad m => Text -> LP m String +mathEnv :: PandocMonad m => Text -> LP m Text mathEnv name = do skipopts optional blankline res <- manyTill anyTok (end_ name) - return $ stripTrailingNewlines $ T.unpack $ untokenize res + return $ stripTrailingNewlines $ untokenize res inlineEnvironment :: PandocMonad m => LP m Inlines inlineEnvironment = try $ do @@ -914,9 +919,9 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList , ("mbox", rawInlineOr "mbox" $ processHBox <$> tok) , ("hbox", rawInlineOr "hbox" $ processHBox <$> tok) , ("lettrine", optional opt >> extractSpaces (spanWith ("",["lettrine"],[])) <$> tok) - , ("(", mathInline . toksToString <$> manyTill anyTok (controlSeq ")")) - , ("[", mathDisplay . toksToString <$> manyTill anyTok (controlSeq "]")) - , ("ensuremath", mathInline . toksToString <$> braced) + , ("(", mathInline . untokenize <$> manyTill anyTok (controlSeq ")")) + , ("[", mathDisplay . untokenize <$> manyTill anyTok (controlSeq "]")) + , ("ensuremath", mathInline . untokenize <$> braced) , ("texorpdfstring", const <$> tok <*> tok) , ("P", lit "¶") , ("S", lit "§") @@ -1008,16 +1013,15 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList , ("lstinline", dolstinline) , ("mintinline", domintinline) , ("Verb", doverb) - , ("url", ((unescapeURL . T.unpack . untokenize) <$> bracedUrl) >>= \url -> + , ("url", ((unescapeURL . untokenize) <$> bracedUrl) >>= \url -> pure (link url "" (str url))) - , ("nolinkurl", ((unescapeURL . T.unpack . untokenize) <$> bracedUrl) >>= \url -> + , ("nolinkurl", ((unescapeURL . untokenize) <$> bracedUrl) >>= \url -> pure (code url)) - , ("href", (unescapeURL . toksToString <$> + , ("href", (unescapeURL . untokenize <$> bracedUrl <* optional sp) >>= \url -> tok >>= \lab -> pure (link url "" lab)) , ("includegraphics", do options <- option [] keyvals - src <- unescapeURL . T.unpack . - removeDoubleQuotes . untokenize <$> braced + src <- unescapeURL . removeDoubleQuotes . untokenize <$> braced mkImage options src) , ("enquote*", enquote True Nothing) , ("enquote", enquote False Nothing) @@ -1172,22 +1176,21 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList ifdim :: PandocMonad m => LP m Inlines ifdim = do contents <- manyTill anyTok (controlSeq "fi") - return $ rawInline "latex" $ T.unpack $ - "\\ifdim" <> untokenize contents <> "\\fi" + return $ rawInline "latex" $ "\\ifdim" <> untokenize contents <> "\\fi" makeUppercase :: Inlines -> Inlines -makeUppercase = fromList . walk (alterStr (map toUpper)) . toList +makeUppercase = fromList . walk (alterStr T.toUpper) . toList makeLowercase :: Inlines -> Inlines -makeLowercase = fromList . walk (alterStr (map toLower)) . toList +makeLowercase = fromList . walk (alterStr T.toLower) . toList -alterStr :: (String -> String) -> Inline -> Inline +alterStr :: (Text -> Text) -> Inline -> Inline alterStr f (Str xs) = Str (f xs) alterStr _ x = x foreignlanguage :: PandocMonad m => LP m Inlines foreignlanguage = do - babelLang <- T.unpack . untokenize <$> braced + babelLang <- untokenize <$> braced case babelLangToBCP47 babelLang of Just lang -> spanWith ("", [], [("lang", renderLang lang)]) <$> tok _ -> tok @@ -1196,24 +1199,24 @@ inlineLanguageCommands :: PandocMonad m => M.Map Text (LP m Inlines) inlineLanguageCommands = M.fromList $ mk <$> M.toList polyglossiaLangToBCP47 where mk (polyglossia, bcp47Func) = - ("text" <> T.pack polyglossia, inlineLanguage bcp47Func) + ("text" <> polyglossia, inlineLanguage bcp47Func) -inlineLanguage :: PandocMonad m => (String -> Lang) -> LP m Inlines +inlineLanguage :: PandocMonad m => (Text -> Lang) -> LP m Inlines inlineLanguage bcp47Func = do - o <- option "" $ (T.unpack . T.filter (\c -> c /= '[' && c /= ']')) + o <- option "" $ T.filter (\c -> c /= '[' && c /= ']') <$> rawopt let lang = renderLang $ bcp47Func o extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok hyperlink :: PandocMonad m => LP m Inlines hyperlink = try $ do - src <- toksToString <$> braced + src <- untokenize <$> braced lab <- tok - return $ link ('#':src) "" lab + return $ link ("#" <> src) "" lab hypertargetBlock :: PandocMonad m => LP m Blocks hypertargetBlock = try $ do - ref <- toksToString <$> braced + ref <- untokenize <$> braced bs <- grouped block case toList bs of [Header 1 (ident,_,_) _] | ident == ref -> return bs @@ -1221,7 +1224,7 @@ hypertargetBlock = try $ do hypertargetInline :: PandocMonad m => LP m Inlines hypertargetInline = try $ do - ref <- toksToString <$> braced + ref <- untokenize <$> braced ils <- grouped inline return $ spanWith (ref, [], []) ils @@ -1231,7 +1234,7 @@ romanNumeralUpper = romanNumeralLower :: (PandocMonad m) => LP m Inlines romanNumeralLower = - str . map toLower . toRomanNumeral <$> romanNumeralArg + str . T.toLower . toRomanNumeral <$> romanNumeralArg romanNumeralArg :: (PandocMonad m) => LP m Int romanNumeralArg = spaces *> (parser <|> inBraces) @@ -1248,18 +1251,18 @@ romanNumeralArg = spaces *> (parser <|> inBraces) let (digits, rest) = T.span isDigit s unless (T.null rest) $ Prelude.fail "Non-digits in argument to \\Rn or \\RN" - safeRead $ T.unpack digits + safeRead digits newToggle :: (Monoid a, PandocMonad m) => [Tok] -> LP m a newToggle name = do updateState $ \st -> - st{ sToggles = M.insert (toksToString name) False (sToggles st) } + st{ sToggles = M.insert (untokenize name) False (sToggles st) } return mempty setToggle :: (Monoid a, PandocMonad m) => Bool -> [Tok] -> LP m a setToggle on name = do updateState $ \st -> - st{ sToggles = M.adjust (const on) (toksToString name) (sToggles st) } + st{ sToggles = M.adjust (const on) (untokenize name) (sToggles st) } return mempty ifToggle :: PandocMonad m => LP m () @@ -1271,7 +1274,7 @@ ifToggle = do no <- braced toggles <- sToggles <$> getState inp <- getInput - let name' = toksToString name + let name' = untokenize name case M.lookup name' toggles of Just True -> setInput (yes ++ inp) Just False -> setInput (no ++ inp) @@ -1294,11 +1297,11 @@ ifstrequal = do else getInput >>= setInput . (ifnotequal ++) return mempty -coloredInline :: PandocMonad m => String -> LP m Inlines +coloredInline :: PandocMonad m => Text -> LP m Inlines coloredInline stylename = do skipopts color <- braced - spanWith ("",[],[("style",stylename ++ ": " ++ toksToString color)]) <$> tok + spanWith ("",[],[("style",stylename <> ": " <> untokenize color)]) <$> tok ttfamily :: PandocMonad m => LP m Inlines ttfamily = (code . stringify . toList) <$> tok @@ -1313,12 +1316,12 @@ rawInlineOr name' fallback = do processHBox :: Inlines -> Inlines processHBox = walk convert where - convert Space = Str [chr 160] -- non-breakable space - convert SoftBreak = Str [chr 160] -- non-breakable space + convert Space = Str $ T.singleton $ chr 160 -- non-breakable space + convert SoftBreak = Str $ T.singleton $ chr 160 -- non-breakable space convert LineBreak = Str "" convert x = x -getRawCommand :: PandocMonad m => Text -> Text -> LP m String +getRawCommand :: PandocMonad m => Text -> Text -> LP m Text getRawCommand name txt = do (_, rawargs) <- withRaw $ case name of @@ -1336,7 +1339,7 @@ getRawCommand name txt = do skipopts option "" (try dimenarg) void $ many braced - return $ T.unpack (txt <> untokenize rawargs) + return $ txt <> untokenize rawargs isFontSizeCommand :: Text -> Bool isFontSizeCommand "tiny" = True @@ -1396,17 +1399,17 @@ treatAsInline = Set.fromList dolabel :: PandocMonad m => LP m Inlines dolabel = do v <- braced - let refstr = toksToString v + let refstr = untokenize v return $ spanWith (refstr,[],[("label", refstr)]) - $ inBrackets $ str $ toksToString v + $ inBrackets $ str $ untokenize v -doref :: PandocMonad m => String -> LP m Inlines +doref :: PandocMonad m => Text -> LP m Inlines doref cls = do v <- braced - let refstr = toksToString v + let refstr = untokenize v return $ linkWith ("",[],[ ("reference-type", cls) , ("reference", refstr)]) - ('#':refstr) + ("#" <> refstr) "" (inBrackets $ str refstr) @@ -1435,11 +1438,11 @@ inline = (mempty <$ comment) <|> (str "\160" <$ symbol '~') <|> dollarsMath <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb) - <|> (str . (:[]) <$> primEscape) + <|> (str . T.singleton <$> primEscape) <|> regularSymbol <|> (do res <- symbolIn "#^'`\"[]&" pos <- getPosition - let s = T.unpack (untoken res) + let s = untoken res report $ ParsingUnescaped s pos return $ str s) @@ -1498,7 +1501,7 @@ include name = do -- note, we can have cc_by_4.0 for example... _ | name == "usepackage" -> addExtension f ".sty" | otherwise -> addExtension f ".tex" - dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" + dirs <- (map T.unpack . splitTextBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" mapM_ (insertIncluded dirs) (map addExt fs) return mempty @@ -1509,19 +1512,19 @@ insertIncluded :: PandocMonad m insertIncluded dirs f = do pos <- getPosition containers <- getIncludeFiles <$> getState - when (f `elem` containers) $ - throwError $ PandocParseError $ "Include file loop at " ++ show pos - updateState $ addIncludeFile f + when (T.pack f `elem` containers) $ + throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos + updateState $ addIncludeFile $ T.pack f mbcontents <- readFileFromDirs dirs f contents <- case mbcontents of Just s -> return s Nothing -> do - report $ CouldNotLoadIncludeFile f pos + report $ CouldNotLoadIncludeFile (T.pack f) pos return "" - getInput >>= setInput . (tokenize f (T.pack contents) ++) + getInput >>= setInput . (tokenize f contents ++) updateState dropLatestIncludeFile -addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m () +addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m () addMeta field val = updateState $ \st -> st{ sMeta = addMetaField field val $ sMeta st } @@ -1536,10 +1539,10 @@ authors = try $ do egroup addMeta "author" (map trimInlines auths) -macroDef :: (PandocMonad m, Monoid a) => (String -> a) -> LP m a +macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a macroDef constructor = do (_, s) <- withRaw (commandDef <|> environmentDef) - (constructor (T.unpack $ untokenize s) <$ + (constructor (untokenize s) <$ guardDisabled Ext_latex_macros) <|> return mempty where commandDef = do @@ -1632,7 +1635,7 @@ newcommand = do case M.lookup name macros of Just macro | mtype == "newcommand" -> do - report $ MacroAlreadyDefined (T.unpack txt) pos + report $ MacroAlreadyDefined txt pos return (name, macro) | mtype == "providecommand" -> return (name, macro) _ -> return (name, Macro ExpandWhenUsed argspecs optarg contents) @@ -1658,7 +1661,7 @@ newenvironment = do case M.lookup name macros of Just _ | mtype == "newenvironment" -> do - report $ MacroAlreadyDefined (T.unpack name) pos + report $ MacroAlreadyDefined name pos return Nothing | mtype == "provideenvironment" -> do return Nothing @@ -1669,7 +1672,7 @@ newenvironment = do bracketedNum :: PandocMonad m => LP m Int bracketedNum = do ds <- untokenize <$> bracketedToks - case safeRead (T.unpack ds) of + case safeRead ds of Just i -> return i _ -> return 0 @@ -1709,7 +1712,7 @@ section (ident, classes, kvs) lvl = do contents <- grouped inline lab <- option ident $ try (spaces >> controlSeq "label" - >> spaces >> toksToString <$> braced) + >> spaces >> untokenize <$> braced) when (lvl == 0) $ updateState $ \st -> st{ sHasChapters = True } unless ("unnumbered" `elem` classes) $ do @@ -1836,9 +1839,9 @@ blockCommands = M.fromList , ("centerline", (para . trimInlines) <$> (skipopts *> tok)) , ("caption", skipopts *> setCaption) , ("bibliography", mempty <$ (skipopts *> braced >>= - addMeta "bibliography" . splitBibs . toksToString)) + addMeta "bibliography" . splitBibs . untokenize)) , ("addbibresource", mempty <$ (skipopts *> braced >>= - addMeta "bibliography" . splitBibs . toksToString)) + addMeta "bibliography" . splitBibs . untokenize)) , ("endinput", mempty <$ skipMany tok) -- includes , ("lstinputlisting", inputListing) @@ -1941,18 +1944,18 @@ rawEnv name = do (bs, raw) <- withRaw $ env name blocks if parseRaw then return $ rawBlock "latex" - $ T.unpack $ beginCommand <> untokenize raw + $ beginCommand <> untokenize raw else do - report $ SkippedContent (T.unpack beginCommand) pos1 + report $ SkippedContent beginCommand pos1 pos2 <- getPosition - report $ SkippedContent ("\\end{" ++ T.unpack name ++ "}") pos2 + report $ SkippedContent ("\\end{" <> name <> "}") pos2 return bs rawVerbEnv :: PandocMonad m => Text -> LP m Blocks rawVerbEnv name = do pos <- getPosition (_, raw) <- withRaw $ verbEnv name - let raw' = "\\begin{" ++ T.unpack name ++ "}" ++ toksToString raw + let raw' = "\\begin{" <> name <> "}" <> untokenize raw exts <- getOption readerExtensions let parseRaw = extensionEnabled Ext_raw_tex exts if parseRaw @@ -1961,12 +1964,11 @@ rawVerbEnv name = do report $ SkippedContent raw' pos return mempty -verbEnv :: PandocMonad m => Text -> LP m String +verbEnv :: PandocMonad m => Text -> LP m Text verbEnv name = withVerbatimMode $ do optional blankline res <- manyTill anyTok (end_ name) - return $ T.unpack - $ stripTrailingNewline + return $ stripTrailingNewline $ untokenize $ res @@ -2010,11 +2012,11 @@ minted = do mintedAttr :: PandocMonad m => LP m Attr mintedAttr = do options <- option [] keyvals - lang <- toksToString <$> braced + lang <- untokenize <$> braced let kvs = [ (if k == "firstnumber" then "startFrom" else k, v) | (k,v) <- options ] - let classes = [ lang | not (null lang) ] ++ + let classes = [ lang | not (T.null lang) ] ++ [ "numberLines" | lookup "linenos" options == Just "true" ] return ("",classes,kvs) @@ -2023,14 +2025,14 @@ inputMinted :: PandocMonad m => LP m Blocks inputMinted = do pos <- getPosition attr <- mintedAttr - f <- filter (/='"') . toksToString <$> braced - dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" - mbCode <- readFileFromDirs dirs f + f <- T.filter (/='"') . untokenize <$> braced + dirs <- (map T.unpack . splitTextBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" + mbCode <- readFileFromDirs dirs (T.unpack f) rawcode <- case mbCode of Just s -> return s Nothing -> do report $ CouldNotLoadIncludeFile f pos - return [] + return "" return $ B.codeBlockWith attr rawcode letterContents :: PandocMonad m => LP m Blocks @@ -2052,10 +2054,10 @@ figure = try $ do addImageCaption :: PandocMonad m => Blocks -> LP m Blocks addImageCaption = walkM go where go (Image attr@(_, cls, kvs) alt (src,tit)) - | not ("fig:" `isPrefixOf` tit) = do + | not ("fig:" `T.isPrefixOf` tit) = do (mbcapt, mblab) <- sCaption <$> getState let (alt', tit') = case mbcapt of - Just ils -> (toList ils, "fig:" ++ tit) + Just ils -> (toList ils, "fig:" <> tit) Nothing -> (alt, tit) attr' = case mblab of Just lab -> (lab, cls, kvs) @@ -2090,23 +2092,23 @@ addImageCaption = walkM go return $ Image attr' alt' (src, tit') go x = return x -coloredBlock :: PandocMonad m => String -> LP m Blocks +coloredBlock :: PandocMonad m => Text -> LP m Blocks coloredBlock stylename = try $ do skipopts color <- braced notFollowedBy (grouped inline) - let constructor = divWith ("",[],[("style",stylename ++ ": " ++ toksToString color)]) + let constructor = divWith ("",[],[("style",stylename <> ": " <> untokenize color)]) constructor <$> grouped block graphicsPath :: PandocMonad m => LP m Blocks graphicsPath = do - ps <- map toksToString <$> + ps <- map (T.unpack . untokenize) <$> (bgroup *> spaces *> manyTill (braced <* spaces) egroup) - getResourcePath >>= setResourcePath . (++ ps) + getResourcePath >>= setResourcePath . (<> ps) return mempty -splitBibs :: String -> [Inlines] -splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',') +splitBibs :: Text -> [Inlines] +splitBibs = map (str . T.pack . flip replaceExtension "bib" . T.unpack . trim) . splitTextBy (==',') alltt :: Blocks -> Blocks alltt = walk strToCode @@ -2115,7 +2117,7 @@ alltt = walk strToCode strToCode SoftBreak = LineBreak strToCode x = x -parseListingsOptions :: [(String, String)] -> Attr +parseListingsOptions :: [(Text, Text)] -> Attr parseListingsOptions options = let kvs = [ (if k == "firstnumber" then "startFrom" @@ -2129,23 +2131,23 @@ inputListing :: PandocMonad m => LP m Blocks inputListing = do pos <- getPosition options <- option [] keyvals - f <- filter (/='"') . toksToString <$> braced - dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" - mbCode <- readFileFromDirs dirs f + f <- T.filter (/='"') . untokenize <$> braced + dirs <- (map T.unpack . splitTextBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS" + mbCode <- readFileFromDirs dirs (T.unpack f) codeLines <- case mbCode of - Just s -> return $ lines s + Just s -> return $ T.lines s Nothing -> do report $ CouldNotLoadIncludeFile f pos return [] let (ident,classes,kvs) = parseListingsOptions options let classes' = (case listingsLanguage options of - Nothing -> (take 1 (languagesByExtension (takeExtension f)) ++) + Nothing -> (take 1 (languagesByExtension (T.pack $ takeExtension $ T.unpack f)) <>) Just _ -> id) classes let firstline = fromMaybe 1 $ lookup "firstline" options >>= safeRead let lastline = fromMaybe (length codeLines) $ lookup "lastline" options >>= safeRead - let codeContents = intercalate "\n" $ take (1 + lastline - firstline) $ + let codeContents = T.intercalate "\n" $ take (1 + lastline - firstline) $ drop (firstline - 1) codeLines return $ codeBlockWith (ident,classes',kvs) codeContents @@ -2176,12 +2178,12 @@ orderedList' = try $ do spaces let markerSpec = do symbol '[' - ts <- toksToString <$> manyTill anyTok (symbol ']') + ts <- untokenize <$> manyTill anyTok (symbol ']') case runParser anyOrderedListMarker def "option" ts of Right r -> return r Left _ -> do pos <- getPosition - report $ SkippedContent ("[" ++ ts ++ "]") pos + report $ SkippedContent ("[" <> ts <> "]") pos return (1, DefaultStyle, DefaultDelim) (_, style, delim) <- option (1, DefaultStyle, DefaultDelim) markerSpec spaces @@ -2191,17 +2193,17 @@ orderedList' = try $ do spaces start <- option 1 $ try $ do pos <- getPosition controlSeq "setcounter" - ctr <- toksToString <$> braced - guard $ "enum" `isPrefixOf` ctr - guard $ all (`elem` ['i','v']) (drop 4 ctr) + ctr <- untokenize <$> braced + guard $ "enum" `T.isPrefixOf` ctr + guard $ T.all (`elem` ['i','v']) (T.drop 4 ctr) optional sp - num <- toksToString <$> braced + num <- untokenize <$> braced case safeRead num of Just i -> return (i + 1 :: Int) Nothing -> do report $ SkippedContent - ("\\setcounter{" ++ ctr ++ - "}{" ++ num ++ "}") pos + ("\\setcounter{" <> ctr <> + "}{" <> num <> "}") pos return 1 bs <- listenv "enumerate" (many item) return $ orderedListWith (start, style, delim) bs @@ -2235,7 +2237,7 @@ splitWordTok = do inp <- getInput case inp of (Tok spos Word t : rest) -> - setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) ++ rest + setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest _ -> return () parseAligns :: PandocMonad m => LP m [(Alignment, Double, ([Tok], [Tok]))] @@ -2256,7 +2258,7 @@ parseAligns = try $ do let alignSuffix = symbol '<' >> braced let colWidth = try $ do symbol '{' - ds <- trim . toksToString <$> manyTill anyTok (controlSeq "linewidth") + ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth") spaces symbol '}' case safeRead ds of @@ -2266,7 +2268,7 @@ parseAligns = try $ do pref <- option [] alignPrefix spaces al <- alignChar - width <- colWidth <|> option 0.0 (do s <- toksToString <$> braced + width <- colWidth <|> option 0.0 (do s <- untokenize <$> braced pos <- getPosition report $ SkippedContent s pos return 0.0) @@ -2276,13 +2278,13 @@ parseAligns = try $ do let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro symbol '*' spaces - ds <- trim . toksToString <$> braced + ds <- trim . untokenize <$> braced spaces spec <- braced case safeRead ds of Just n -> getInput >>= setInput . (mconcat (replicate n spec) ++) - Nothing -> Prelude.fail $ "Could not parse " ++ ds ++ " as number" + Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number" bgroup spaces maybeBar @@ -2379,7 +2381,7 @@ block = do <|> blockCommand <|> paragraph <|> grouped block - trace (take 60 $ show $ B.toList res) + trace (T.take 60 $ tshow $ B.toList res) return res blocks :: PandocMonad m => LP m Blocks @@ -2387,9 +2389,9 @@ blocks = mconcat <$> many block setDefaultLanguage :: PandocMonad m => LP m Blocks setDefaultLanguage = do - o <- option "" $ (T.unpack . T.filter (\c -> c /= '[' && c /= ']')) + o <- option "" $ T.filter (\c -> c /= '[' && c /= ']') <$> rawopt - polylang <- toksToString <$> braced + polylang <- untokenize <$> braced case M.lookup polylang polyglossiaLangToBCP47 of Nothing -> return mempty -- TODO mzero? warning? Just langFunc -> do diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs index b21398f93..7ec432a4a 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.LaTeX.Lang Copyright : Copyright (C) 2018-2019 John MacFarlane @@ -18,11 +19,12 @@ module Text.Pandoc.Readers.LaTeX.Lang where import Prelude import qualified Data.Map as M +import qualified Data.Text as T import Text.Pandoc.BCP47 (Lang(..)) -polyglossiaLangToBCP47 :: M.Map String (String -> Lang) +polyglossiaLangToBCP47 :: M.Map T.Text (T.Text -> Lang) polyglossiaLangToBCP47 = M.fromList - [ ("arabic", \o -> case filter (/=' ') o of + [ ("arabic", \o -> case T.filter (/=' ') o of "locale=algeria" -> Lang "ar" "" "DZ" [] "locale=mashriq" -> Lang "ar" "" "SY" [] "locale=libya" -> Lang "ar" "" "LY" [] @@ -30,7 +32,7 @@ polyglossiaLangToBCP47 = M.fromList "locale=mauritania" -> Lang "ar" "" "MR" [] "locale=tunisia" -> Lang "ar" "" "TN" [] _ -> Lang "ar" "" "" []) - , ("german", \o -> case filter (/=' ') o of + , ("german", \o -> case T.filter (/=' ') o of "spelling=old" -> Lang "de" "" "DE" ["1901"] "variant=austrian,spelling=old" -> Lang "de" "" "AT" ["1901"] @@ -40,11 +42,11 @@ polyglossiaLangToBCP47 = M.fromList "variant=swiss" -> Lang "de" "" "CH" [] _ -> Lang "de" "" "" []) , ("lsorbian", \_ -> Lang "dsb" "" "" []) - , ("greek", \o -> case filter (/=' ') o of + , ("greek", \o -> case T.filter (/=' ') o of "variant=poly" -> Lang "el" "" "polyton" [] "variant=ancient" -> Lang "grc" "" "" [] _ -> Lang "el" "" "" []) - , ("english", \o -> case filter (/=' ') o of + , ("english", \o -> case T.filter (/=' ') o of "variant=australian" -> Lang "en" "" "AU" [] "variant=canadian" -> Lang "en" "" "CA" [] "variant=british" -> Lang "en" "" "GB" [] @@ -52,7 +54,7 @@ polyglossiaLangToBCP47 = M.fromList "variant=american" -> Lang "en" "" "US" [] _ -> Lang "en" "" "" []) , ("usorbian", \_ -> Lang "hsb" "" "" []) - , ("latin", \o -> case filter (/=' ') o of + , ("latin", \o -> case T.filter (/=' ') o of "variant=classic" -> Lang "la" "" "" ["x-classic"] _ -> Lang "la" "" "" []) , ("slovenian", \_ -> Lang "sl" "" "" []) @@ -133,7 +135,7 @@ polyglossiaLangToBCP47 = M.fromList , ("vietnamese", \_ -> Lang "vi" "" "" []) ] -babelLangToBCP47 :: String -> Maybe Lang +babelLangToBCP47 :: T.Text -> Maybe Lang babelLangToBCP47 s = case s of "austrian" -> Just $ Lang "de" "" "AT" ["1901"] diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index 14cb408b0..a01abda46 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -97,8 +97,8 @@ import Text.Parsec.Pos newtype DottedNum = DottedNum [Int] deriving (Show) -renderDottedNum :: DottedNum -> String -renderDottedNum (DottedNum xs) = +renderDottedNum :: DottedNum -> T.Text +renderDottedNum (DottedNum xs) = T.pack $ intercalate "." (map show xs) incrementDottedNum :: Int -> DottedNum -> DottedNum @@ -111,18 +111,18 @@ data LaTeXState = LaTeXState{ sOptions :: ReaderOptions , sMeta :: Meta , sQuoteContext :: QuoteContext , sMacros :: M.Map Text Macro - , sContainers :: [String] + , sContainers :: [Text] , sLogMessages :: [LogMessage] - , sIdentifiers :: Set.Set String + , sIdentifiers :: Set.Set Text , sVerbatimMode :: Bool - , sCaption :: (Maybe Inlines, Maybe String) + , sCaption :: (Maybe Inlines, Maybe Text) , sInListItem :: Bool , sInTableCell :: Bool , sLastHeaderNum :: DottedNum , sLastFigureNum :: DottedNum - , sLabels :: M.Map String [Inline] + , sLabels :: M.Map Text [Inline] , sHasChapters :: Bool - , sToggles :: M.Map String Bool + , sToggles :: M.Map Text Bool , sExpanded :: Bool } deriving Show @@ -202,7 +202,7 @@ withVerbatimMode parser = do rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s) => [Tok] -> Bool -> LP m a -> LP m a - -> ParserT String s m (a, String) + -> ParserT Text s m (a, Text) rawLaTeXParser toks retokenize parser valParser = do pstate <- getState let lstate = def{ sOptions = extractReaderOptions pstate } @@ -233,16 +233,16 @@ rawLaTeXParser toks retokenize parser valParser = do , not (" " `T.isSuffixOf` result) -> result <> " " _ -> result - return (val, T.unpack result') + return (val, result') applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => String -> ParserT String s m String + => Text -> ParserT Text s m Text applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|> - do let retokenize = toksToString <$> many (satisfyTok (const True)) + do let retokenize = untokenize <$> many (satisfyTok (const True)) pstate <- getState let lstate = def{ sOptions = extractReaderOptions pstate , sMacros = extractMacros pstate } - res <- runParserT retokenize lstate "math" (tokenize "math" (T.pack s)) + res <- runParserT retokenize lstate "math" (tokenize "math" s) case res of Left e -> Prelude.fail (show e) Right s' -> return s' @@ -307,7 +307,7 @@ totoks pos t = : totoks (incSourceColumn pos 2) rest' | c == '#' -> let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest - in case safeRead (T.unpack t1) of + in case safeRead t1 of Just i -> Tok pos (Arg i) ("#" <> t1) : totoks (incSourceColumn pos (1 + T.length t1)) t2 @@ -447,7 +447,7 @@ doMacros' n inp = do handleMacros n' spos name ts = do when (n' > 20) -- detect macro expansion loops - $ throwError $ PandocMacroLoop (T.unpack name) + $ throwError $ PandocMacroLoop name macros <- sMacros <$> getState case M.lookup name macros of Nothing -> mzero @@ -588,7 +588,7 @@ primEscape = do | c >= '\64' && c <= '\127' -> return (chr (ord c - 64)) | otherwise -> return (chr (ord c + 64)) Nothing -> Prelude.fail "Empty content of Esc1" - Esc2 -> case safeRead ('0':'x':T.unpack (T.drop 2 t)) of + Esc2 -> case safeRead ("0x" <> T.drop 2 t) of Just x -> return (chr x) Nothing -> Prelude.fail $ "Could not read: " ++ T.unpack t _ -> Prelude.fail "Expected an Esc1 or Esc2 token" -- should not happen @@ -677,7 +677,7 @@ dimenarg = try $ do guard $ rest `elem` ["", "pt","pc","in","bp","cm","mm","dd","cc","sp"] return $ T.pack ['=' | ch] <> minus <> s -ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a +ignore :: (Monoid a, PandocMonad m) => Text -> ParserT s u m a ignore raw = do pos <- getPosition report $ SkippedContent raw pos diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs index ddf469222..feacb8450 100644 --- a/src/Text/Pandoc/Readers/Man.hs +++ b/src/Text/Pandoc/Readers/Man.hs @@ -1,4 +1,6 @@ -{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Man Copyright : Copyright (C) 2018-2019 Yan Pashkovsky and John MacFarlane @@ -63,7 +65,7 @@ readWithMTokens :: PandocMonad m -> [RoffToken] -- ^ input -> m (Either PandocError a) readWithMTokens parser state input = - let leftF = PandocParsecError . intercalate "\n" $ show <$> input + let leftF = PandocParsecError . T.pack . intercalate "\n" $ show <$> input in mapLeft leftF `liftM` runParserT parser state "source" input parseMan :: PandocMonad m => ManParser m Pandoc @@ -141,7 +143,7 @@ parseTable = do isHrule ([cellfmt], _) = columnType cellfmt `elem` ['_','-','='] isHrule (_, [RoffTokens ss]) = case Foldable.toList ss of - [TextLine [RoffStr [c]]] -> c `elem` ['_','-','='] + [TextLine [RoffStr (T.unpack -> [c])]] -> c `elem` ['_','-','='] _ -> False isHrule _ = False @@ -191,7 +193,7 @@ memptyLine = msatisfy isEmptyLine where isEmptyLine EmptyLine = True isEmptyLine _ = False -mmacro :: PandocMonad m => String -> ManParser m RoffToken +mmacro :: PandocMonad m => T.Text -> ManParser m RoffToken mmacro mk = msatisfy isControlLine where isControlLine (ControlLine mk' _ _) | mk == mk' = True | otherwise = False @@ -284,7 +286,7 @@ parseInline = try $ do _ -> mzero handleInlineMacro :: PandocMonad m - => String -> [Arg] -> SourcePos -> ManParser m Inlines + => T.Text -> [Arg] -> SourcePos -> ManParser m Inlines handleInlineMacro mname args _pos = do case mname of "UR" -> parseLink args @@ -339,7 +341,7 @@ bareIP = msatisfy isBareIP where isBareIP (ControlLine "IP" [] _) = True isBareIP _ = False -endmacro :: PandocMonad m => String -> ManParser m () +endmacro :: PandocMonad m => T.Text -> ManParser m () endmacro name = void (mmacro name) <|> lookAhead (void newBlockMacro) <|> lookAhead eof @@ -356,7 +358,7 @@ parseCodeBlock = try $ do toks <- (mmacro "nf" *> manyTill codeline (endmacro "fi")) <|> (mmacro "EX" *> manyTill codeline (endmacro "EE")) optional (mmacro "in") - return $ codeBlock (intercalate "\n" $ catMaybes toks) + return $ codeBlock (T.intercalate "\n" $ catMaybes toks) where @@ -366,7 +368,7 @@ parseCodeBlock = try $ do ControlLine "PP" _ _ -> return $ Just "" -- .PP sometimes used for blank line ControlLine mname args pos -> do (Just . query getText <$> handleInlineMacro mname args pos) <|> - do report $ SkippedContent ('.':mname) pos + do report $ SkippedContent ("." <> mname) pos return Nothing Tbl _ _ pos -> do report $ SkippedContent "TABLE" pos @@ -375,12 +377,12 @@ parseCodeBlock = try $ do TextLine ss | not (null ss) , all isFontToken ss -> return Nothing - | otherwise -> return $ Just $ linePartsToString ss + | otherwise -> return $ Just $ linePartsToText ss isFontToken Font{} = True isFontToken _ = False - getText :: Inline -> String + getText :: Inline -> T.Text getText (Str s) = s getText Space = " " getText (Code _ s) = s @@ -416,8 +418,8 @@ listItem mbListType = try $ do (ControlLine _ args _) <- mmacro "IP" case args of (arg1 : _) -> do - let cs = linePartsToString arg1 - let cs' = if not ('.' `elem` cs || ')' `elem` cs) then cs ++ "." else cs + let cs = linePartsToText arg1 + let cs' = if not (T.any (== '.') cs || T.any (== ')') cs) then cs <> "." else cs let lt = case Parsec.runParser anyOrderedListMarker defaultParserState "list marker" cs' of Right (start, listtype, listdelim) @@ -467,7 +469,7 @@ parseLink args = do ControlLine _ endargs _ <- mmacro "UE" let url = case args of [] -> "" - (x:_) -> linePartsToString x + (x:_) -> linePartsToText x return $ link url "" contents <> case endargs of [] -> mempty @@ -479,7 +481,7 @@ parseEmailLink args = do ControlLine _ endargs _ <- mmacro "ME" let url = case args of [] -> "" - (x:_) -> "mailto:" ++ linePartsToString x + (x:_) -> "mailto:" <> linePartsToText x return $ link url "" contents <> case endargs of [] -> mempty @@ -490,6 +492,6 @@ skipUnknownMacro = do tok <- mmacroAny case tok of ControlLine mkind _ pos -> do - report $ SkippedContent ('.':mkind) pos + report $ SkippedContent ("." <> mkind) pos return mempty _ -> Prelude.fail "the impossible happened" diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 4807baada..f8349ea99 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1,7 +1,9 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE RelaxedPolyRec #-} {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE TupleSections #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.Markdown Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -19,14 +21,15 @@ import Prelude import Control.Monad import Control.Monad.Except (throwError) import qualified Data.ByteString.Lazy as BS -import Data.Char (isAlphaNum, isPunctuation, isSpace, toLower) -import Data.List (intercalate, sortBy, transpose, elemIndex) +import Data.Char (isAlphaNum, isPunctuation, isSpace) +import Data.List (sortBy, transpose, elemIndex) import qualified Data.Map as M import Data.Maybe import Data.Ord (comparing) import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import qualified Data.YAML as YAML import qualified Data.YAML.Event as YE import System.FilePath (addExtension, takeExtension) @@ -47,7 +50,7 @@ import Text.Pandoc.Shared import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.XML (fromEntities) -type MarkdownParser m = ParserT [Char] ParserState m +type MarkdownParser m = ParserT Text ParserState m -- | Read markdown from an input string and return a Pandoc document. readMarkdown :: PandocMonad m @@ -56,7 +59,7 @@ readMarkdown :: PandocMonad m -> m Pandoc readMarkdown opts s = do parsed <- readWithM parseMarkdown def{ stateOptions = opts } - (T.unpack (crFilter s) ++ "\n\n") + (crFilter s <> "\n\n") case parsed of Right result -> return result Left e -> throwError e @@ -77,7 +80,7 @@ isHruleChar '-' = True isHruleChar '_' = True isHruleChar _ = False -setextHChars :: String +setextHChars :: [Char] setextHChars = "=-" isBlank :: Char -> Bool @@ -96,30 +99,30 @@ inList = do ctx <- stateParserContext <$> getState guard (ctx == ListItemState) -spnl :: PandocMonad m => ParserT [Char] st m () +spnl :: PandocMonad m => ParserT Text st m () spnl = try $ do skipSpaces optional newline skipSpaces notFollowedBy (char '\n') -spnl' :: PandocMonad m => ParserT [Char] st m String +spnl' :: PandocMonad m => ParserT Text st m Text spnl' = try $ do xs <- many spaceChar ys <- option "" $ try $ (:) <$> newline <*> (many spaceChar <* notFollowedBy (char '\n')) - return (xs ++ ys) + return $ T.pack $ xs ++ ys -indentSpaces :: PandocMonad m => MarkdownParser m String +indentSpaces :: PandocMonad m => MarkdownParser m Text indentSpaces = try $ do tabStop <- getOption readerTabStop - count tabStop (char ' ') <|> - string "\t" <?> "indentation" + countChar tabStop (char ' ') <|> + textStr "\t" <?> "indentation" -nonindentSpaces :: PandocMonad m => MarkdownParser m String +nonindentSpaces :: PandocMonad m => MarkdownParser m Text nonindentSpaces = do n <- skipNonindentSpaces - return $ replicate n ' ' + return $ T.replicate n " " -- returns number of spaces parsed skipNonindentSpaces :: PandocMonad m => MarkdownParser m Int @@ -139,8 +142,9 @@ inlinesInBalancedBrackets :: PandocMonad m => MarkdownParser m (F Inlines) inlinesInBalancedBrackets = try $ char '[' >> withRaw (go 1) >>= parseFromString inlines . stripBracket . snd - where stripBracket [] = [] - stripBracket xs = if last xs == ']' then init xs else xs + where stripBracket t = case T.unsnoc t of + Just (t', ']') -> t' + _ -> t go :: PandocMonad m => Int -> MarkdownParser m () go 0 = return () go openBrackets = @@ -160,7 +164,7 @@ inlinesInBalancedBrackets = -- document structure -- -rawTitleBlockLine :: PandocMonad m => MarkdownParser m String +rawTitleBlockLine :: PandocMonad m => MarkdownParser m Text rawTitleBlockLine = do char '%' skipSpaces @@ -169,7 +173,7 @@ rawTitleBlockLine = do notFollowedBy blankline skipSpaces anyLine - return $ trim $ unlines (first:rest) + return $ trim $ T.unlines (first:rest) titleLine :: PandocMonad m => MarkdownParser m (F Inlines) titleLine = try $ do @@ -222,9 +226,9 @@ yamlMetaBlock = try $ do notFollowedBy blankline -- if --- is followed by a blank it's an HRULE rawYamlLines <- manyTill anyLine stopLine -- by including --- and ..., we allow yaml blocks with just comments: - let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."])) + let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."])) optional blanklines - newMetaF <- yamlBsToMeta $ UTF8.fromStringLazy rawYaml + newMetaF <- yamlBsToMeta $ UTF8.fromTextLazy $ TL.fromStrict rawYaml -- Since `<>` is left-biased, existing values are not touched: updateState $ \st -> st{ stateMeta' = (stateMeta' st) <> newMetaF } return mempty @@ -255,7 +259,7 @@ yamlBsToMeta bstr = do return . return $ mempty Left (_pos, err') -> do logMessage $ CouldNotParseYamlMetadata - err' pos + (T.pack err') pos return . return $ mempty nodeToKey :: PandocMonad m => YAML.Node YE.Pos -> m Text @@ -270,11 +274,11 @@ toMetaValue x = -- Note: a standard quoted or unquoted YAML value will -- not end in a newline, but a "block" set off with -- `|` or `>` will. - if (T.pack "\n") `T.isSuffixOf` x - then parseFromString' (asBlocks <$> parseBlocks) (xstring <> "\n") + if "\n" `T.isSuffixOf` x + then parseFromString' (asBlocks <$> parseBlocks) (x <> "\n") else parseFromString' ((asInlines <$> try pInlines) <|> (asBlocks <$> parseBlocks)) - xstring + x where pInlines = trimInlinesF . mconcat <$> manyTill inline eof asBlocks p = do p' <- p @@ -282,7 +286,6 @@ toMetaValue x = asInlines p = do p' <- p return $ MetaInlines (B.toList p') - xstring = T.unpack x checkBoolean :: Text -> Maybe Bool checkBoolean t = @@ -298,8 +301,8 @@ yamlToMetaValue (YAML.Scalar _ x) = case x of YAML.SStr t -> toMetaValue t YAML.SBool b -> return $ return $ MetaBool b - YAML.SFloat d -> return $ return $ MetaString (show d) - YAML.SInt i -> return $ return $ MetaString (show i) + YAML.SFloat d -> return $ return $ MetaString $ tshow d + YAML.SInt i -> return $ return $ MetaString $ tshow i YAML.SUnknown _ t -> case checkBoolean t of Just b -> return $ return $ MetaBool b @@ -315,7 +318,7 @@ yamlToMetaValue _ = return $ return $ MetaString "" yamlMap :: PandocMonad m => M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos) - -> MarkdownParser m (F (M.Map String MetaValue)) + -> MarkdownParser m (F (M.Map Text MetaValue)) yamlMap o = do kvs <- forM (M.toList o) $ \(key, v) -> do k <- nodeToKey key @@ -323,12 +326,12 @@ yamlMap o = do let kvs' = filter (not . ignorable . fst) kvs (fmap M.fromList . sequence) <$> mapM toMeta kvs' where - ignorable t = (T.pack "_") `T.isSuffixOf` t + ignorable t = "_" `T.isSuffixOf` t toMeta (k, v) = do fv <- yamlToMetaValue v return $ do v' <- fv - return (T.unpack k, v') + return (k, v') stopLine :: PandocMonad m => MarkdownParser m () stopLine = try $ (string "---" <|> string "...") >> blankline >> return () @@ -343,14 +346,14 @@ mmdTitleBlock = try $ do updateState $ \st -> st{ stateMeta' = stateMeta' st <> return (Meta $ M.fromList kvPairs) } -kvPair :: PandocMonad m => Bool -> MarkdownParser m (String, MetaValue) +kvPair :: PandocMonad m => Bool -> MarkdownParser m (Text, MetaValue) kvPair allowEmpty = try $ do - key <- many1Till (alphaNum <|> oneOf "_- ") (char ':') - val <- trim <$> manyTill anyChar + key <- many1TillChar (alphaNum <|> oneOf "_- ") (char ':') + val <- trim <$> manyTillChar anyChar (try $ newline >> lookAhead (blankline <|> nonspaceChar)) - guard $ allowEmpty || not (null val) - let key' = concat $ words $ map toLower key - let val' = MetaBlocks $ B.toList $ B.plain $B.text val + guard $ allowEmpty || not (T.null val) + let key' = T.concat $ T.words $ T.toLower key + let val' = MetaBlocks $ B.toList $ B.plain $ B.text val return (key',val') parseMarkdown :: PandocMonad m => MarkdownParser m Pandoc @@ -380,13 +383,13 @@ referenceKey = try $ do (_,raw) <- reference char ':' skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[') - let sourceURL = fmap unwords $ many $ try $ do + let sourceURL = fmap T.unwords $ many $ try $ do skipMany spaceChar notFollowedBy' referenceTitle notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes notFollowedBy' (() <$ reference) - many1 $ notFollowedBy space >> litChar - let betweenAngles = try $ char '<' >> manyTill litChar (char '>') + many1Char $ notFollowedBy space >> litChar + let betweenAngles = try $ char '<' >> manyTillChar litChar (char '>') src <- try betweenAngles <|> sourceURL tit <- option "" referenceTitle attr <- option nullAttr $ try $ @@ -411,20 +414,20 @@ referenceKey = try $ do updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys } return $ return mempty -referenceTitle :: PandocMonad m => MarkdownParser m String +referenceTitle :: PandocMonad m => MarkdownParser m Text referenceTitle = try $ do skipSpaces >> optional newline >> skipSpaces quotedTitle '"' <|> quotedTitle '\'' <|> charsInBalanced '(' ')' litChar -- A link title in quotes -quotedTitle :: PandocMonad m => Char -> MarkdownParser m String +quotedTitle :: PandocMonad m => Char -> MarkdownParser m Text quotedTitle c = try $ do char c notFollowedBy spaces let pEnder = try $ char c >> notFollowedBy (satisfy isAlphaNum) - let regChunk = many1 (noneOf ['\\','\n','&',c]) <|> count 1 litChar - let nestedChunk = (\x -> [c] ++ x ++ [c]) <$> quotedTitle c - unwords . words . concat <$> manyTill (nestedChunk <|> regChunk) pEnder + let regChunk = many1Char (noneOf ['\\','\n','&',c]) <|> countChar 1 litChar + let nestedChunk = (\x -> T.singleton c <> x <> T.singleton c) <$> quotedTitle c + T.unwords . T.words . T.concat <$> manyTill (nestedChunk <|> regChunk) pEnder -- | PHP Markdown Extra style abbreviation key. Currently -- we just skip them, since Pandoc doesn't have an element for @@ -440,21 +443,21 @@ abbrevKey = do blanklines return $ return mempty -noteMarker :: PandocMonad m => MarkdownParser m String -noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']') +noteMarker :: PandocMonad m => MarkdownParser m Text +noteMarker = string "[^" >> many1TillChar (satisfy $ not . isBlank) (char ']') -rawLine :: PandocMonad m => MarkdownParser m String +rawLine :: PandocMonad m => MarkdownParser m Text rawLine = try $ do notFollowedBy blankline notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker optional indentSpaces anyLine -rawLines :: PandocMonad m => MarkdownParser m String +rawLines :: PandocMonad m => MarkdownParser m Text rawLines = do first <- anyLine rest <- many rawLine - return $ unlines (first:rest) + return $ T.unlines (first:rest) noteBlock :: PandocMonad m => MarkdownParser m (F Blocks) noteBlock = try $ do @@ -466,7 +469,7 @@ noteBlock = try $ do optional indentSpaces first <- rawLines rest <- many $ try $ blanklines >> indentSpaces >> rawLines - let raw = unlines (first:rest) ++ "\n" + let raw = T.unlines (first:rest) <> "\n" optional blanklines parsed <- parseFromString' parseBlocks raw oldnotes <- stateNotes' <$> getState @@ -510,7 +513,7 @@ block = do , para , plain ] <?> "block" - trace (take 60 $ show $ B.toList $ runF res defaultParserState) + trace (T.take 60 $ tshow $ B.toList $ runF res defaultParserState) return res -- @@ -570,7 +573,7 @@ mmdHeaderIdentifier :: PandocMonad m => MarkdownParser m Attr mmdHeaderIdentifier = do (_, raw) <- reference let raw' = trim $ stripFirstAndLast raw - let ident = concat $ words $ map toLower raw' + let ident = T.concat $ T.words $ T.toLower raw' let attr = (ident, [], []) guardDisabled Ext_implicit_header_references <|> registerImplicitHeader raw' attr @@ -600,20 +603,20 @@ setextHeader = try $ do <|> registerImplicitHeader raw attr' return $ B.headerWith attr' level <$> text -registerImplicitHeader :: PandocMonad m => String -> Attr -> MarkdownParser m () +registerImplicitHeader :: PandocMonad m => Text -> Attr -> MarkdownParser m () registerImplicitHeader raw attr@(ident, _, _) - | null raw = return () + | T.null raw = return () | otherwise = do - let key = toKey $ "[" ++ raw ++ "]" + let key = toKey $ "[" <> raw <> "]" updateState $ \s -> - s { stateHeaderKeys = M.insert key (('#':ident,""), attr) + s { stateHeaderKeys = M.insert key (("#" <> ident,""), attr) (stateHeaderKeys s) } -- -- hrule block -- -hrule :: PandocMonad m => ParserT [Char] st m (F Blocks) +hrule :: PandocMonad m => ParserT Text st m (F Blocks) hrule = try $ do skipSpaces start <- satisfy isHruleChar @@ -627,13 +630,13 @@ hrule = try $ do -- code blocks -- -indentedLine :: PandocMonad m => MarkdownParser m String +indentedLine :: PandocMonad m => MarkdownParser m Text indentedLine = indentSpaces >> anyLineNewline blockDelimiter :: PandocMonad m => (Char -> Bool) -> Maybe Int - -> ParserT [Char] ParserState m Int + -> ParserT Text ParserState m Int blockDelimiter f len = try $ do skipNonindentSpaces c <- lookAhead (satisfy f) @@ -652,11 +655,11 @@ attributes = try $ do attribute :: PandocMonad m => MarkdownParser m (Attr -> Attr) attribute = identifierAttr <|> classAttr <|> keyValAttr <|> specialAttr -identifier :: PandocMonad m => MarkdownParser m String +identifier :: PandocMonad m => MarkdownParser m Text identifier = do first <- letter rest <- many $ alphaNum <|> oneOf "-_:." - return (first:rest) + return $ T.pack (first:rest) identifierAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) identifierAttr = try $ do @@ -674,15 +677,15 @@ keyValAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) keyValAttr = try $ do key <- identifier char '=' - val <- enclosed (char '"') (char '"') litChar - <|> enclosed (char '\'') (char '\'') litChar + val <- T.pack <$> enclosed (char '"') (char '"') litChar + <|> T.pack <$> enclosed (char '\'') (char '\'') litChar <|> ("" <$ try (string "\"\"")) <|> ("" <$ try (string "''")) - <|> many (escapedChar' <|> noneOf " \t\n\r}") + <|> manyChar (escapedChar' <|> noneOf " \t\n\r}") return $ \(id',cs,kvs) -> case key of "id" -> (val,cs,kvs) - "class" -> (id',cs ++ words val,kvs) + "class" -> (id',cs ++ T.words val,kvs) _ -> (id',cs,kvs ++ [(key,val)]) specialAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr) @@ -690,12 +693,12 @@ specialAttr = do char '-' return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs) -rawAttribute :: PandocMonad m => MarkdownParser m String +rawAttribute :: PandocMonad m => MarkdownParser m Text rawAttribute = do char '{' skipMany spaceChar char '=' - format <- many1 $ satisfy (\c -> isAlphaNum c || c `elem` "-_") + format <- many1Char $ satisfy (\c -> isAlphaNum c || c `elem` ['-', '_']) skipMany spaceChar char '}' return format @@ -703,7 +706,7 @@ rawAttribute = do codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks) codeBlockFenced = try $ do indentchars <- nonindentSpaces - let indentLevel = length indentchars + let indentLevel = T.length indentchars c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~')) <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`')) size <- blockDelimiter (== c) Nothing @@ -713,9 +716,9 @@ codeBlockFenced = try $ do <|> (Right <$> option ("",[],[]) (try (guardEnabled Ext_fenced_code_attributes >> attributes) - <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar))) + <|> ((\x -> ("",[toLanguageId x],[])) <$> many1Char nonspaceChar))) blankline - contents <- intercalate "\n" <$> + contents <- T.intercalate "\n" <$> manyTill (gobbleAtMostSpaces indentLevel >> anyLine) (try $ do blockDelimiter (== c) (Just size) @@ -726,8 +729,8 @@ codeBlockFenced = try $ do Right attr -> B.codeBlockWith attr contents -- correctly handle github language identifiers -toLanguageId :: String -> String -toLanguageId = map toLower . go +toLanguageId :: Text -> Text +toLanguageId = T.toLower . go where go "c++" = "cpp" go "objective-c" = "objectivec" go x = x @@ -737,11 +740,11 @@ codeBlockIndented = do contents <- many1 (indentedLine <|> try (do b <- blanklines l <- indentedLine - return $ b ++ l)) + return $ b <> l)) optional blanklines classes <- getOption readerIndentedCodeClasses return $ return $ B.codeBlockWith ("", classes, []) $ - stripTrailingNewlines $ concat contents + stripTrailingNewlines $ T.concat contents lhsCodeBlock :: PandocMonad m => MarkdownParser m (F Blocks) lhsCodeBlock = do @@ -751,33 +754,33 @@ lhsCodeBlock = do <|> (return . B.codeBlockWith ("",["haskell"],[]) <$> lhsCodeBlockInverseBird) -lhsCodeBlockLaTeX :: PandocMonad m => MarkdownParser m String +lhsCodeBlockLaTeX :: PandocMonad m => MarkdownParser m Text lhsCodeBlockLaTeX = try $ do string "\\begin{code}" manyTill spaceChar newline - contents <- many1Till anyChar (try $ string "\\end{code}") + contents <- many1TillChar anyChar (try $ string "\\end{code}") blanklines return $ stripTrailingNewlines contents -lhsCodeBlockBird :: PandocMonad m => MarkdownParser m String +lhsCodeBlockBird :: PandocMonad m => MarkdownParser m Text lhsCodeBlockBird = lhsCodeBlockBirdWith '>' -lhsCodeBlockInverseBird :: PandocMonad m => MarkdownParser m String +lhsCodeBlockInverseBird :: PandocMonad m => MarkdownParser m Text lhsCodeBlockInverseBird = lhsCodeBlockBirdWith '<' -lhsCodeBlockBirdWith :: PandocMonad m => Char -> MarkdownParser m String +lhsCodeBlockBirdWith :: PandocMonad m => Char -> MarkdownParser m Text lhsCodeBlockBirdWith c = try $ do pos <- getPosition when (sourceColumn pos /= 1) $ Prelude.fail "Not in first column" lns <- many1 $ birdTrackLine c -- if (as is normal) there is always a space after >, drop it - let lns' = if all (\ln -> null ln || take 1 ln == " ") lns - then map (drop 1) lns + let lns' = if all (\ln -> T.null ln || T.take 1 ln == " ") lns + then map (T.drop 1) lns else lns blanklines - return $ intercalate "\n" lns' + return $ T.intercalate "\n" lns' -birdTrackLine :: PandocMonad m => Char -> ParserT [Char] st m String +birdTrackLine :: PandocMonad m => Char -> ParserT Text st m Text birdTrackLine c = try $ do char c -- allow html tags on left margin: @@ -791,12 +794,12 @@ birdTrackLine c = try $ do emailBlockQuoteStart :: PandocMonad m => MarkdownParser m Char emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' <* optional (char ' ') -emailBlockQuote :: PandocMonad m => MarkdownParser m [String] +emailBlockQuote :: PandocMonad m => MarkdownParser m [Text] emailBlockQuote = try $ do emailBlockQuoteStart - let emailLine = many $ nonEndline <|> try - (endline >> notFollowedBy emailBlockQuoteStart >> - return '\n') + let emailLine = manyChar $ nonEndline <|> try + (endline >> notFollowedBy emailBlockQuoteStart >> + return '\n') let emailSep = try (newline >> emailBlockQuoteStart) first <- emailLine rest <- many $ try $ emailSep >> emailLine @@ -809,7 +812,7 @@ blockQuote :: PandocMonad m => MarkdownParser m (F Blocks) blockQuote = do raw <- emailBlockQuote -- parse the extracted block, which may contain various block elements: - contents <- parseFromString' parseBlocks $ intercalate "\n" raw ++ "\n\n" + contents <- parseFromString' parseBlocks $ T.intercalate "\n" raw <> "\n\n" return $ B.blockQuote <$> contents -- @@ -833,7 +836,7 @@ orderedListStart mbstydelim = try $ do skipNonindentSpaces notFollowedBy $ string "p." >> spaceChar >> digit -- page number (do guardDisabled Ext_fancy_lists - start <- many1 digit >>= safeRead + start <- many1Char digit >>= safeRead char '.' gobbleSpaces 1 <|> () <$ lookAhead newline optional $ try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar) @@ -857,7 +860,7 @@ orderedListStart mbstydelim = try $ do listStart :: PandocMonad m => MarkdownParser m () listStart = bulletListStart <|> Control.Monad.void (orderedListStart Nothing) -listLine :: PandocMonad m => Int -> MarkdownParser m String +listLine :: PandocMonad m => Int -> MarkdownParser m Text listLine continuationIndent = try $ do notFollowedBy' (do gobbleSpaces continuationIndent skipMany spaceChar @@ -867,19 +870,19 @@ listLine continuationIndent = try $ do optional (() <$ gobbleSpaces continuationIndent) listLineCommon -listLineCommon :: PandocMonad m => MarkdownParser m String -listLineCommon = concat <$> manyTill - ( many1 (satisfy $ \c -> c `notElem` ['\n', '<', '`']) +listLineCommon :: PandocMonad m => MarkdownParser m Text +listLineCommon = T.concat <$> manyTill + ( many1Char (satisfy $ \c -> c `notElem` ['\n', '<', '`']) <|> fmap snd (withRaw code) <|> fmap snd (htmlTag isCommentTag) - <|> count 1 anyChar + <|> countChar 1 anyChar ) newline -- parse raw text for one list item, excluding start marker and continuations rawListItem :: PandocMonad m => Bool -- four space rule -> MarkdownParser m a - -> MarkdownParser m (String, Int) + -> MarkdownParser m (Text, Int) rawListItem fourSpaceRule start = try $ do pos1 <- getPosition start @@ -892,14 +895,14 @@ rawListItem fourSpaceRule start = try $ do notFollowedBy (() <$ codeBlockFenced) notFollowedBy blankline listLine continuationIndent) - blanks <- many blankline - let result = unlines (first:rest) ++ blanks + blanks <- manyChar blankline + let result = T.unlines (first:rest) <> blanks return (result, continuationIndent) -- continuation of a list item - indented and separated by blankline -- or (in compact lists) endline. -- note: nested lists are parsed as continuations -listContinuation :: PandocMonad m => Int -> MarkdownParser m String +listContinuation :: PandocMonad m => Int -> MarkdownParser m Text listContinuation continuationIndent = try $ do x <- try $ do notFollowedBy blankline @@ -913,12 +916,12 @@ listContinuation continuationIndent = try $ do notFollowedByDivCloser gobbleSpaces continuationIndent <|> notFollowedBy' listStart anyLineNewline - blanks <- many blankline - return $ concat (x:xs) ++ blanks + blanks <- manyChar blankline + return $ T.concat (x:xs) <> blanks -- Variant of blanklines that doesn't require blank lines -- before a fence or eof. -blanklines' :: PandocMonad m => MarkdownParser m [Char] +blanklines' :: PandocMonad m => MarkdownParser m Text blanklines' = blanklines <|> try checkDivCloser where checkDivCloser = do guardEnabled Ext_fenced_divs @@ -954,7 +957,7 @@ listItem fourSpaceRule start = try $ do (first, continuationIndent) <- rawListItem fourSpaceRule start continuations <- many (listContinuation continuationIndent) -- parse the extracted block, which may contain various block elements: - let raw = concat (first:continuations) + let raw = T.concat (first:continuations) contents <- parseFromString' parseBlocks raw updateState (\st -> st {stateParserContext = oldContext}) exts <- getOption readerExtensions @@ -990,7 +993,7 @@ defListMarker = do sps <- nonindentSpaces char ':' <|> char '~' tabStop <- getOption readerTabStop - let remaining = tabStop - (length sps + 1) + let remaining = tabStop - (T.length sps + 1) if remaining > 0 then try (count remaining (char ' ')) <|> string "\t" <|> many1 spaceChar else mzero @@ -1001,11 +1004,11 @@ definitionListItem compact = try $ do rawLine' <- anyLine raw <- many1 $ defRawBlock compact term <- parseFromString' (trimInlinesF <$> inlines) rawLine' - contents <- mapM (parseFromString' parseBlocks . (++"\n")) raw + contents <- mapM (parseFromString' parseBlocks . (<> "\n")) raw optional blanklines return $ liftM2 (,) term (sequence contents) -defRawBlock :: PandocMonad m => Bool -> MarkdownParser m String +defRawBlock :: PandocMonad m => Bool -> MarkdownParser m Text defRawBlock compact = try $ do hasBlank <- option False $ blankline >> return True defListMarker @@ -1020,13 +1023,13 @@ defRawBlock compact = try $ do <|> notFollowedBy defListMarker anyLine ) rawlines <- many dline - cont <- fmap concat $ many $ try $ do + cont <- fmap T.concat $ many $ try $ do trailing <- option "" blanklines ln <- indentSpaces >> notFollowedBy blankline >> anyLine lns <- many dline - return $ trailing ++ unlines (ln:lns) - return $ trimr (firstline ++ unlines rawlines ++ cont) ++ - if hasBlank || not (null cont) then "\n\n" else "" + return $ trailing <> T.unlines (ln:lns) + return $ trimr (firstline <> T.unlines rawlines <> cont) <> + if hasBlank || not (T.null cont) then "\n\n" else "" definitionList :: PandocMonad m => MarkdownParser m (F Blocks) definitionList = try $ do @@ -1063,7 +1066,7 @@ para = try $ do | not (null alt) -> -- the fig: at beginning of title indicates a figure return $ B.singleton - $ Image attr alt (src,'f':'i':'g':':':tit) + $ Image attr alt (src, "fig:" <> tit) _ -> return x' | otherwise = x result <- implicitFigures . trimInlinesF <$> inlines1 @@ -1082,7 +1085,7 @@ para = try $ do inHtmlBlock <- stateInHtmlBlock <$> getState case inHtmlBlock of Just "div" -> () <$ - lookAhead (htmlTag (~== TagClose "div")) + lookAhead (htmlTag (~== TagClose ("div" :: Text))) _ -> mzero <|> do guardEnabled Ext_fenced_divs divLevel <- stateFencedDivLevel <$> getState @@ -1098,7 +1101,7 @@ plain = fmap B.plain . trimInlinesF <$> inlines1 -- raw html -- -htmlElement :: PandocMonad m => MarkdownParser m String +htmlElement :: PandocMonad m => MarkdownParser m Text htmlElement = rawVerbatimBlock <|> strictHtmlBlock <|> fmap snd (htmlTag isBlockTag) @@ -1132,14 +1135,14 @@ htmlBlock' = try $ do first <- htmlElement skipMany spaceChar optional blanklines - return $ if null first + return $ if T.null first then mempty else return $ B.rawBlock "html" first -strictHtmlBlock :: PandocMonad m => MarkdownParser m String +strictHtmlBlock :: PandocMonad m => MarkdownParser m Text strictHtmlBlock = htmlInBalanced (not . isInlineTag) -rawVerbatimBlock :: PandocMonad m => MarkdownParser m String +rawVerbatimBlock :: PandocMonad m => MarkdownParser m Text rawVerbatimBlock = htmlInBalanced isVerbTag where isVerbTag (TagOpen "pre" _) = True isVerbTag (TagOpen "style" _) = True @@ -1150,13 +1153,13 @@ rawVerbatimBlock = htmlInBalanced isVerbTag rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks) rawTeXBlock = do guardEnabled Ext_raw_tex - result <- (B.rawBlock "tex" . trim . concat <$> - many1 ((++) <$> rawConTeXtEnvironment <*> spnl')) - <|> (B.rawBlock "tex" . trim . concat <$> - many1 ((++) <$> rawLaTeXBlock <*> spnl')) + result <- (B.rawBlock "tex" . trim . T.concat <$> + many1 ((<>) <$> rawConTeXtEnvironment <*> spnl')) + <|> (B.rawBlock "tex" . trim . T.concat <$> + many1 ((<>) <$> rawLaTeXBlock <*> spnl')) return $ case B.toList result of [RawBlock _ cs] - | all (`elem` [' ','\t','\n']) cs -> return mempty + | T.all (`elem` [' ','\t','\n']) cs -> return mempty -- don't create a raw block for suppressed macro defs _ -> return result @@ -1186,7 +1189,7 @@ rawHtmlBlocks = do return result -- remove markdown="1" attribute -stripMarkdownAttribute :: String -> String +stripMarkdownAttribute :: Text -> Text stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s where filterAttrib (TagOpen t as) = TagOpen t [(k,v) | (k,v) <- as, k /= "markdown"] @@ -1211,7 +1214,7 @@ lineBlock = try $ do -- and the length including trailing space. dashedLine :: PandocMonad m => Char - -> ParserT [Char] st m (Int, Int) + -> ParserT Text st m (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many spaceChar @@ -1232,9 +1235,9 @@ simpleTableHeader headless = try $ do dashes <- many1 (dashedLine '-') newline let (lengths, lines') = unzip dashes - let indices = scanl (+) (length initSp) lines' + let indices = scanl (+) (T.length initSp) lines' -- If no header, calculate alignment on basis of first row of text - rawHeads <- fmap (tail . splitStringByIndices (init indices)) $ + rawHeads <- fmap (tail . splitTextByIndices (init indices)) $ if headless then lookAhead anyLine else return rawContent @@ -1250,15 +1253,15 @@ simpleTableHeader headless = try $ do -- Returns an alignment type for a table, based on a list of strings -- (the rows of the column header) and a number (the length of the -- dashed line under the rows. -alignType :: [String] +alignType :: [Text] -> Int -> Alignment alignType [] _ = AlignDefault alignType strLst len = - let nonempties = filter (not . null) $ map trimr strLst + let nonempties = filter (not . T.null) $ map trimr strLst (leftSpace, rightSpace) = - case sortBy (comparing length) nonempties of - (x:_) -> (head x `elem` " \t", length x < len) + case sortBy (comparing T.length) nonempties of + (x:_) -> (T.head x `elem` [' ', 't'], T.length x < len) [] -> (False, False) in case (leftSpace, rightSpace) of (True, False) -> AlignRight @@ -1267,7 +1270,7 @@ alignType strLst len = (False, False) -> AlignDefault -- Parse a table footer - dashed lines followed by blank line. -tableFooter :: PandocMonad m => MarkdownParser m String +tableFooter :: PandocMonad m => MarkdownParser m Text tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines' -- Parse a table separator - dashed line. @@ -1277,12 +1280,12 @@ tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> char '\n' -- Parse a raw line and split it into chunks by indices. rawTableLine :: PandocMonad m => [Int] - -> MarkdownParser m [String] + -> MarkdownParser m [Text] rawTableLine indices = do notFollowedBy' (blanklines' <|> tableFooter) - line <- many1Till anyChar newline + line <- many1TillChar anyChar newline return $ map trim $ tail $ - splitStringByIndices (init indices) line + splitTextByIndices (init indices) line -- Parse a table line and return a list of lists of blocks (columns). tableLine :: PandocMonad m @@ -1297,7 +1300,7 @@ multilineRow :: PandocMonad m -> MarkdownParser m (F [Blocks]) multilineRow indices = do colLines <- many1 (rawTableLine indices) - let cols = map unlines $ transpose colLines + let cols = map T.unlines $ transpose colLines fmap sequence $ mapM (parseFromString' (mconcat <$> many plain)) cols -- Parses a table caption: inlines beginning with 'Table:' @@ -1344,7 +1347,7 @@ multilineTableHeader headless = try $ do dashes <- many1 (dashedLine '-') newline let (lengths, lines') = unzip dashes - let indices = scanl (+) (length initSp) lines' + let indices = scanl (+) (T.length initSp) lines' -- compensate for the fact that intercolumn spaces are -- not included in the last index: let indices' = case reverse indices of @@ -1352,14 +1355,14 @@ multilineTableHeader headless = try $ do (x:xs) -> reverse (x+1:xs) rawHeadsList <- if headless then fmap (map (:[]) . tail . - splitStringByIndices (init indices')) $ lookAhead anyLine + splitTextByIndices (init indices')) $ lookAhead anyLine else return $ transpose $ map - (tail . splitStringByIndices (init indices')) + (tail . splitTextByIndices (init indices')) rawContent let aligns = zipWith alignType rawHeadsList lengths let rawHeads = if headless then replicate (length dashes) "" - else map (unlines . map trim) rawHeadsList + else map (T.unlines . map trim) rawHeadsList heads <- fmap sequence $ mapM ((parseFromString' (mconcat <$> many plain)).trim) rawHeads return (heads, aligns, indices') @@ -1393,7 +1396,7 @@ pipeTable = try $ do lines' <- many pipeTableRow let lines'' = map (take (length aligns) <$>) lines' let maxlength = maximum $ - map (\x -> length . stringify $ runF x def) (heads' : lines'') + map (\x -> T.length . stringify $ runF x def) (heads' : lines'') numColumns <- getOption readerColumns let widths = if maxlength > numColumns then map (\len -> @@ -1430,7 +1433,7 @@ pipeTableCell = return $ B.plain <$> result) <|> return mempty -pipeTableHeaderPart :: PandocMonad m => ParserT [Char] st m (Alignment, Int) +pipeTableHeaderPart :: PandocMonad m => ParserT Text st m (Alignment, Int) pipeTableHeaderPart = try $ do skipMany spaceChar left <- optionMaybe (char ':') @@ -1446,12 +1449,12 @@ pipeTableHeaderPart = try $ do (Just _,Just _) -> AlignCenter, len) -- Succeed only if current line contains a pipe. -scanForPipe :: PandocMonad m => ParserT [Char] st m () +scanForPipe :: PandocMonad m => ParserT Text st m () scanForPipe = do inp <- getInput - case break (\c -> c == '\n' || c == '|') inp of - (_,'|':_) -> return () - _ -> mzero + case T.break (\c -> c == '\n' || c == '|') inp of + (_, T.uncons -> Just ('|', _)) -> return () + _ -> mzero -- | Parse a table using 'headerParser', 'rowParser', -- 'lineParser', and 'footerParser'. Variant of the version in @@ -1561,7 +1564,7 @@ escapedChar = do result <- escapedChar' case result of ' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space - _ -> return $ return $ B.str [result] + _ -> return $ return $ B.str $ T.singleton result ltSign :: PandocMonad m => MarkdownParser m (F Inlines) ltSign = do @@ -1574,12 +1577,12 @@ exampleRef :: PandocMonad m => MarkdownParser m (F Inlines) exampleRef = try $ do guardEnabled Ext_example_lists char '@' - lab <- many1 (alphaNum <|> oneOf "-_") + lab <- many1Char (alphaNum <|> oneOf "-_") return $ do st <- askF return $ case M.lookup lab (stateExamples st) of - Just n -> B.str (show n) - Nothing -> B.str ('@':lab) + Just n -> B.str $ tshow n + Nothing -> B.str $ "@" <> lab symbol :: PandocMonad m => MarkdownParser m (F Inlines) symbol = do @@ -1587,16 +1590,16 @@ symbol = do <|> try (do lookAhead $ char '\\' notFollowedBy' (() <$ rawTeXBlock) char '\\') - return $ return $ B.str [result] + return $ return $ B.str $ T.singleton result -- parses inline code, between n `s and n `s code :: PandocMonad m => MarkdownParser m (F Inlines) code = try $ do starts <- many1 (char '`') skipSpaces - result <- (trim . concat) <$> + result <- (trim . T.concat) <$> manyTill (notFollowedBy (inList >> listStart) >> - (many1 (noneOf "`\n") <|> many1 (char '`') <|> + (many1Char (noneOf "`\n") <|> many1Char (char '`') <|> (char '\n' >> notFollowedBy' blankline >> return " "))) (try (skipSpaces >> count (length starts) (char '`') >> notFollowedBy (char '`'))) @@ -1627,10 +1630,10 @@ enclosure c = do guardDisabled Ext_intraword_underscores <|> guard (c == '*') <|> (guard =<< notAfterString) - cs <- many1 (char c) + cs <- many1Char (char c) (return (B.str cs) <>) <$> whitespace <|> - case length cs of + case T.length cs of 3 -> three c 2 -> two c mempty 1 -> one c mempty @@ -1653,7 +1656,7 @@ three c = do (ender c 3 >> updateLastStrPos >> return ((B.strong . B.emph) <$> contents)) <|> (ender c 2 >> updateLastStrPos >> one c (B.strong <$> contents)) <|> (ender c 1 >> updateLastStrPos >> two c (B.emph <$> contents)) - <|> return (return (B.str [c,c,c]) <> contents) + <|> return (return (B.str $ T.pack [c,c,c]) <> contents) -- Parse inlines til you hit two c's, and emit strong. -- If you never do hit two cs, emit ** plus inlines parsed. @@ -1662,7 +1665,7 @@ two c prefix' = do contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline) (ender c 2 >> updateLastStrPos >> return (B.strong <$> (prefix' <> contents))) - <|> return (return (B.str [c,c]) <> (prefix' <> contents)) + <|> return (return (B.str $ T.pack [c,c]) <> (prefix' <> contents)) -- Parse inlines til you hit a c, and emit emph. -- If you never hit a c, emit * plus inlines parsed. @@ -1673,7 +1676,7 @@ one c prefix' = do notFollowedBy (ender c 1) >> two c mempty) ) (ender c 1 >> updateLastStrPos >> return (B.emph <$> (prefix' <> contents))) - <|> return (return (B.str [c]) <> (prefix' <> contents)) + <|> return (return (B.str $ T.singleton c) <> (prefix' <> contents)) strongOrEmph :: PandocMonad m => MarkdownParser m (F Inlines) strongOrEmph = enclosure '*' <|> enclosure '_' @@ -1717,16 +1720,16 @@ whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace" where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak) regsp = skipMany spaceChar >> return B.space -nonEndline :: PandocMonad m => ParserT [Char] st m Char +nonEndline :: PandocMonad m => ParserT Text st m Char nonEndline = satisfy (/='\n') str :: PandocMonad m => MarkdownParser m (F Inlines) str = do - result <- many1 (alphaNum <|> try (char '.' <* notFollowedBy (char '.'))) + result <- many1Char (alphaNum <|> try (char '.' <* notFollowedBy (char '.'))) updateLastStrPos (do guardEnabled Ext_smart abbrevs <- getOption readerAbbreviations - if not (null result) && last result == '.' && result `Set.member` abbrevs + if not (T.null result) && T.last result == '.' && result `Set.member` abbrevs then try (do ils <- whitespace -- ?? lookAhead alphaNum -- replace space after with nonbreaking space @@ -1766,36 +1769,36 @@ endline = try $ do -- -- a reference label for a link -reference :: PandocMonad m => MarkdownParser m (F Inlines, String) +reference :: PandocMonad m => MarkdownParser m (F Inlines, Text) reference = do guardDisabled Ext_footnotes <|> notFollowedBy' (string "[^") guardDisabled Ext_citations <|> notFollowedBy' (string "[@") withRaw $ trimInlinesF <$> inlinesInBalancedBrackets -parenthesizedChars :: PandocMonad m => MarkdownParser m [Char] +parenthesizedChars :: PandocMonad m => MarkdownParser m Text parenthesizedChars = do result <- charsInBalanced '(' ')' litChar - return $ '(' : result ++ ")" + return $ "(" <> result <> ")" -- source for a link, with optional title -source :: PandocMonad m => MarkdownParser m (String, String) +source :: PandocMonad m => MarkdownParser m (Text, Text) source = do char '(' skipSpaces let urlChunk = try parenthesizedChars - <|> (notFollowedBy (oneOf " )") >> count 1 litChar) - <|> try (many1 spaceChar <* notFollowedBy (oneOf "\"')")) - let sourceURL = (unwords . words . concat) <$> many urlChunk + <|> (notFollowedBy (oneOf " )") >> countChar 1 litChar) + <|> try (many1Char spaceChar <* notFollowedBy (oneOf "\"')")) + let sourceURL = (T.unwords . T.words . T.concat) <$> many urlChunk let betweenAngles = try $ - char '<' >> manyTill litChar (char '>') + char '<' >> manyTillChar litChar (char '>') src <- try betweenAngles <|> sourceURL tit <- option "" $ try $ spnl >> linkTitle skipSpaces char ')' return (escapeURI $ trimr src, tit) -linkTitle :: PandocMonad m => MarkdownParser m String +linkTitle :: PandocMonad m => MarkdownParser m Text linkTitle = quotedTitle '"' <|> quotedTitle '\'' link :: PandocMonad m => MarkdownParser m (F Inlines) @@ -1823,13 +1826,13 @@ isSmallCaps :: Attr -> Bool isSmallCaps ("",["smallcaps"],[]) = True isSmallCaps ("",[],kvs) = case lookup "style" kvs of - Just s -> map toLower (filter (`notElem` " \t;") s) == + Just s -> T.toLower (T.filter (`notElem` [' ', '\t', ';']) s) == "font-variant:small-caps" Nothing -> False isSmallCaps _ = False regLink :: PandocMonad m - => (Attr -> String -> String -> Inlines -> Inlines) + => (Attr -> Text -> Text -> Inlines -> Inlines) -> F Inlines -> MarkdownParser m (F Inlines) regLink constructor lab = try $ do @@ -1840,8 +1843,8 @@ regLink constructor lab = try $ do -- a link like [this][ref] or [this][] or [this] referenceLink :: PandocMonad m - => (Attr -> String -> String -> Inlines -> Inlines) - -> (F Inlines, String) + => (Attr -> Text -> Text -> Inlines -> Inlines) + -> (F Inlines, Text) -> MarkdownParser m (F Inlines) referenceLink constructor (lab, raw) = do sp <- (True <$ lookAhead (char ' ')) <|> return False @@ -1863,7 +1866,7 @@ referenceLink constructor (lab, raw) = do parsedRaw' <- parsedRaw fallback' <- fallback return $ B.str "[" <> fallback' <> B.str "]" <> - (if sp && not (null raw) then B.space else mempty) <> + (if sp && not (T.null raw) then B.space else mempty) <> parsedRaw' return $ do keys <- asksF stateKeys @@ -1878,19 +1881,19 @@ referenceLink constructor (lab, raw) = do else makeFallback Just ((src,tit), attr) -> constructor attr src tit <$> lab -dropBrackets :: String -> String -dropBrackets = reverse . dropRB . reverse . dropLB - where dropRB (']':xs) = xs - dropRB xs = xs - dropLB ('[':xs) = xs - dropLB xs = xs +dropBrackets :: Text -> Text +dropBrackets = dropRB . dropLB + where dropRB (T.unsnoc -> Just (xs,']')) = xs + dropRB xs = xs + dropLB (T.uncons -> Just ('[',xs)) = xs + dropLB xs = xs bareURL :: PandocMonad m => MarkdownParser m (F Inlines) bareURL = try $ do guardEnabled Ext_autolink_bare_uris getState >>= guard . stateAllowLinks (cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress) - notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a") + notFollowedBy $ try $ spaces >> htmlTag (~== TagClose ("a" :: Text)) return $ return $ B.linkWith ("",[cls],[]) src "" (B.str orig) autoLink :: PandocMonad m => MarkdownParser m (F Inlines) @@ -1902,19 +1905,20 @@ autoLink = try $ do -- is finished, because the uri parser tries to avoid parsing -- final punctuation. for example: in `<http://hi---there>`, -- the URI parser will stop before the dashes. - extra <- fromEntities <$> manyTill nonspaceChar (char '>') + extra <- fromEntities <$> manyTillChar nonspaceChar (char '>') attr <- option ("", [cls], []) $ try $ guardEnabled Ext_link_attributes >> attributes - return $ return $ B.linkWith attr (src ++ escapeURI extra) "" - (B.str $ orig ++ extra) + return $ return $ B.linkWith attr (src <> escapeURI extra) "" + (B.str $ orig <> extra) image :: PandocMonad m => MarkdownParser m (F Inlines) image = try $ do char '!' (lab,raw) <- reference defaultExt <- getOption readerDefaultImageExtension - let constructor attr' src = case takeExtension src of - "" -> B.imageWith attr' (addExtension src defaultExt) + let constructor attr' src = case takeExtension (T.unpack src) of + "" -> B.imageWith attr' (T.pack $ addExtension (T.unpack src) + $ T.unpack defaultExt) _ -> B.imageWith attr' src regLink constructor lab <|> referenceLink constructor (lab,raw) @@ -1926,7 +1930,7 @@ note = try $ do return $ do notes <- asksF stateNotes' case M.lookup ref notes of - Nothing -> return $ B.str $ "[^" ++ ref ++ "]" + Nothing -> return $ B.str $ "[^" <> ref <> "]" Just (_pos, contents) -> do st <- askF -- process the note in a context that doesn't resolve @@ -1949,29 +1953,29 @@ rawLaTeXInline' = try $ do s <- rawLaTeXInline return $ return $ B.rawInline "tex" s -- "tex" because it might be context -rawConTeXtEnvironment :: PandocMonad m => ParserT [Char] st m String +rawConTeXtEnvironment :: PandocMonad m => ParserT Text st m Text rawConTeXtEnvironment = try $ do string "\\start" completion <- inBrackets (letter <|> digit <|> spaceChar) - <|> many1 letter - contents <- manyTill (rawConTeXtEnvironment <|> count 1 anyChar) - (try $ string "\\stop" >> string completion) - return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion + <|> many1Char letter + contents <- manyTill (rawConTeXtEnvironment <|> countChar 1 anyChar) + (try $ string "\\stop" >> textStr completion) + return $ "\\start" <> completion <> T.concat contents <> "\\stop" <> completion -inBrackets :: PandocMonad m => ParserT [Char] st m Char -> ParserT [Char] st m String +inBrackets :: PandocMonad m => ParserT Text st m Char -> ParserT Text st m Text inBrackets parser = do char '[' - contents <- many parser + contents <- manyChar parser char ']' - return $ "[" ++ contents ++ "]" + return $ "[" <> contents <> "]" spanHtml :: PandocMonad m => MarkdownParser m (F Inlines) spanHtml = try $ do guardEnabled Ext_native_spans - (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" []) - contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span")) + (TagOpen _ attrs, _) <- htmlTag (~== TagOpen ("span" :: Text) []) + contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose ("span" :: Text))) let ident = fromMaybe "" $ lookup "id" attrs - let classes = maybe [] words $ lookup "class" attrs + let classes = maybe [] T.words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] return $ if isSmallCaps (ident, classes, keyvals) then B.smallcaps <$> contents @@ -1980,20 +1984,20 @@ spanHtml = try $ do divHtml :: PandocMonad m => MarkdownParser m (F Blocks) divHtml = try $ do guardEnabled Ext_native_divs - (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" []) + (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen ("div" :: Text) []) -- we set stateInHtmlBlock so that closing tags that can be either block or -- inline will not be parsed as inline tags oldInHtmlBlock <- stateInHtmlBlock <$> getState updateState $ \st -> st{ stateInHtmlBlock = Just "div" } bls <- option "" (blankline >> option "" blanklines) contents <- mconcat <$> - many (notFollowedBy' (htmlTag (~== TagClose "div")) >> block) - closed <- option False (True <$ htmlTag (~== TagClose "div")) + many (notFollowedBy' (htmlTag (~== TagClose ("div" :: Text))) >> block) + closed <- option False (True <$ htmlTag (~== TagClose ("div" :: Text))) if closed then do updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock } let ident = fromMaybe "" $ lookup "id" attrs - let classes = maybe [] words $ lookup "class" attrs + let classes = maybe [] T.words $ lookup "class" attrs let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] return $ B.divWith (ident, classes, keyvals) <$> contents else -- avoid backtracing @@ -2005,7 +2009,7 @@ divFenced = try $ do string ":::" skipMany (char ':') skipMany spaceChar - attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1 nonspaceChar) + attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1Char nonspaceChar) skipMany spaceChar skipMany (char ':') blankline @@ -2047,7 +2051,7 @@ emoji :: PandocMonad m => MarkdownParser m (F Inlines) emoji = try $ do guardEnabled Ext_emoji char ':' - emojikey <- many1 (oneOf emojiChars) + emojikey <- many1Char (oneOf emojiChars) char ':' case emojiToInline emojikey of Just i -> return (return $ B.singleton i) @@ -2077,14 +2081,14 @@ textualCite = try $ do mbrest <- option Nothing $ try $ spnl >> Just <$> withRaw normalCite case mbrest of Just (rest, raw) -> - return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw) . (first:)) + return $ (flip B.cite (B.text $ "@" <> key <> " " <> raw) . (first:)) <$> rest Nothing -> (do (cs, raw) <- withRaw $ bareloc first - let (spaces',raw') = span isSpace raw - spc | null spaces' = mempty - | otherwise = B.space + let (spaces',raw') = T.span isSpace raw + spc | T.null spaces' = mempty + | otherwise = B.space lab <- parseFromString' inlines $ dropBrackets raw' fallback <- referenceLink B.linkWith (lab,raw') return $ do @@ -2092,12 +2096,12 @@ textualCite = try $ do cs' <- cs return $ case B.toList fallback' of - Link{}:_ -> B.cite [first] (B.str $ '@':key) <> spc <> fallback' - _ -> B.cite cs' (B.text $ '@':key ++ " " ++ raw)) + Link{}:_ -> B.cite [first] (B.str $ "@" <> key) <> spc <> fallback' + _ -> B.cite cs' (B.text $ "@" <> key <> " " <> raw)) <|> return (do st <- askF return $ case M.lookup key (stateExamples st) of - Just n -> B.str (show n) - _ -> B.cite [first] $ B.str $ '@':key) + Just n -> B.str $ tshow n + _ -> B.cite [first] $ B.str $ "@" <> key) bareloc :: PandocMonad m => Citation -> MarkdownParser m (F [Citation]) bareloc c = try $ do diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 35bb8e3eb..07240e951 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE RelaxedPolyRec #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RelaxedPolyRec #-} -- RelaxedPolyRec needed for inlinesBetween on GHC < 7 {- | Module : Text.Pandoc.Readers.MediaWiki @@ -24,11 +25,12 @@ import Control.Monad import Control.Monad.Except (throwError) import Data.Char (isDigit, isSpace) import qualified Data.Foldable as F -import Data.List (intercalate, intersperse, isPrefixOf) +import Data.List (intersperse) import Data.Maybe (fromMaybe, maybeToList) import Data.Sequence (ViewL (..), viewl, (<|)) import qualified Data.Set as Set -import Data.Text (Text, unpack) +import Data.Text (Text) +import qualified Data.Text as T import Text.HTML.TagSoup import Text.Pandoc.Builder (Blocks, Inlines, trimInlines) import qualified Text.Pandoc.Builder as B @@ -39,7 +41,7 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (nested) import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag) import Text.Pandoc.Shared (crFilter, safeRead, stringify, stripTrailingNewlines, - trim) + trim, splitTextBy, tshow) import Text.Pandoc.Walk (walk) import Text.Pandoc.XML (fromEntities) @@ -57,7 +59,7 @@ readMediaWiki opts s = do , mwLogMessages = [] , mwInTT = False } - (unpack (crFilter s) ++ "\n") + (crFilter s <> "\n") case parsed of Right result -> return result Left e -> throwError e @@ -66,12 +68,12 @@ data MWState = MWState { mwOptions :: ReaderOptions , mwMaxNestingLevel :: Int , mwNextLinkNumber :: Int , mwCategoryLinks :: [Inlines] - , mwIdentifierList :: Set.Set String + , mwIdentifierList :: Set.Set Text , mwLogMessages :: [LogMessage] , mwInTT :: Bool } -type MWParser m = ParserT [Char] MWState m +type MWParser m = ParserT Text MWState m instance HasReaderOptions MWState where extractReaderOptions = mwOptions @@ -105,58 +107,58 @@ specialChars = "'[]<=&*{}|\":\\" spaceChars :: [Char] spaceChars = " \n\t" -sym :: PandocMonad m => String -> MWParser m () -sym s = () <$ try (string s) +sym :: PandocMonad m => Text -> MWParser m () +sym s = () <$ try (string $ T.unpack s) -newBlockTags :: [String] +newBlockTags :: [Text] newBlockTags = ["haskell","syntaxhighlight","source","gallery","references"] -isBlockTag' :: Tag String -> Bool +isBlockTag' :: Tag Text -> Bool isBlockTag' tag@(TagOpen t _) = (isBlockTag tag || t `elem` newBlockTags) && t `notElem` eitherBlockOrInline isBlockTag' tag@(TagClose t) = (isBlockTag tag || t `elem` newBlockTags) && t `notElem` eitherBlockOrInline isBlockTag' tag = isBlockTag tag -isInlineTag' :: Tag String -> Bool +isInlineTag' :: Tag Text -> Bool isInlineTag' (TagComment _) = True isInlineTag' t = not (isBlockTag' t) -eitherBlockOrInline :: [String] +eitherBlockOrInline :: [Text] eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins", "map", "area", "object"] htmlComment :: PandocMonad m => MWParser m () htmlComment = () <$ htmlTag isCommentTag -inlinesInTags :: PandocMonad m => String -> MWParser m Inlines +inlinesInTags :: PandocMonad m => Text -> MWParser m Inlines inlinesInTags tag = try $ do (_,raw) <- htmlTag (~== TagOpen tag []) - if '/' `elem` raw -- self-closing tag + if T.any (== '/') raw -- self-closing tag then return mempty else trimInlines . mconcat <$> manyTill inline (htmlTag (~== TagClose tag)) -blocksInTags :: PandocMonad m => String -> MWParser m Blocks +blocksInTags :: PandocMonad m => Text -> MWParser m Blocks blocksInTags tag = try $ do (_,raw) <- htmlTag (~== TagOpen tag []) let closer = if tag == "li" - then htmlTag (~== TagClose "li") + then htmlTag (~== TagClose ("li" :: Text)) <|> lookAhead ( - htmlTag (~== TagOpen "li" []) - <|> htmlTag (~== TagClose "ol") - <|> htmlTag (~== TagClose "ul")) + htmlTag (~== TagOpen ("li" :: Text) []) + <|> htmlTag (~== TagClose ("ol" :: Text)) + <|> htmlTag (~== TagClose ("ul" :: Text))) else htmlTag (~== TagClose tag) - if '/' `elem` raw -- self-closing tag + if T.any (== '/') raw -- self-closing tag then return mempty else mconcat <$> manyTill block closer -charsInTags :: PandocMonad m => String -> MWParser m [Char] -charsInTags tag = try $ do +textInTags :: PandocMonad m => Text -> MWParser m Text +textInTags tag = try $ do (_,raw) <- htmlTag (~== TagOpen tag []) - if '/' `elem` raw -- self-closing tag + if T.any (== '/') raw -- self-closing tag then return "" - else manyTill anyChar (htmlTag (~== TagClose tag)) + else T.pack <$> manyTill anyChar (htmlTag (~== TagClose tag)) -- -- main parser @@ -192,7 +194,7 @@ block = do <|> blockTag <|> (B.rawBlock "mediawiki" <$> template) <|> para - trace (take 60 $ show $ B.toList res) + trace (T.take 60 $ tshow $ B.toList res) return res para :: PandocMonad m => MWParser m Blocks @@ -234,16 +236,16 @@ table = do else (replicate cols mempty, hdr:rows') return $ B.table caption cellspecs headers rows -parseAttrs :: PandocMonad m => MWParser m [(String,String)] +parseAttrs :: PandocMonad m => MWParser m [(Text,Text)] parseAttrs = many1 parseAttr -parseAttr :: PandocMonad m => MWParser m (String, String) +parseAttr :: PandocMonad m => MWParser m (Text, Text) parseAttr = try $ do skipMany spaceChar - k <- many1 letter + k <- many1Char letter char '=' - v <- (char '"' >> many1Till (satisfy (/='\n')) (char '"')) - <|> many1 (satisfy $ \c -> not (isSpace c) && c /= '|') + v <- (char '"' >> many1TillChar (satisfy (/='\n')) (char '"')) + <|> many1Char (satisfy $ \c -> not (isSpace c) && c /= '|') return (k,v) tableStart :: PandocMonad m => MWParser m () @@ -293,8 +295,8 @@ tableCell = try $ do notFollowedBy (char '|') skipMany spaceChar pos' <- getPosition - ls <- concat <$> many (notFollowedBy (cellsep <|> rowsep <|> tableEnd) *> - ((snd <$> withRaw table) <|> count 1 anyChar)) + ls <- T.concat <$> many (notFollowedBy (cellsep <|> rowsep <|> tableEnd) *> + ((snd <$> withRaw table) <|> countChar 1 anyChar)) bs <- parseFromString (do setPosition pos' mconcat <$> many block) ls let align = case lookup "align" attrs of @@ -307,48 +309,49 @@ tableCell = try $ do Nothing -> 0.0 return ((align, width), bs) -parseWidth :: String -> Maybe Double +parseWidth :: Text -> Maybe Double parseWidth s = - case reverse s of - ('%':ds) | all isDigit ds -> safeRead ('0':'.':reverse ds) - _ -> Nothing + case T.unsnoc s of + Just (ds, '%') | T.all isDigit ds -> safeRead $ "0." <> ds + _ -> Nothing -template :: PandocMonad m => MWParser m String +template :: PandocMonad m => MWParser m Text template = try $ do string "{{" notFollowedBy (char '{') lookAhead $ letter <|> digit <|> char ':' - let chunk = template <|> variable <|> many1 (noneOf "{}") <|> count 1 anyChar + let chunk = template <|> variable <|> many1Char (noneOf "{}") <|> countChar 1 anyChar contents <- manyTill chunk (try $ string "}}") - return $ "{{" ++ concat contents ++ "}}" + return $ "{{" <> T.concat contents <> "}}" blockTag :: PandocMonad m => MWParser m Blocks blockTag = do (tag, _) <- lookAhead $ htmlTag isBlockTag' case tag of TagOpen "blockquote" _ -> B.blockQuote <$> blocksInTags "blockquote" - TagOpen "pre" _ -> B.codeBlock . trimCode <$> charsInTags "pre" + TagOpen "pre" _ -> B.codeBlock . trimCode <$> textInTags "pre" TagOpen "syntaxhighlight" attrs -> syntaxhighlight "syntaxhighlight" attrs TagOpen "source" attrs -> syntaxhighlight "source" attrs TagOpen "haskell" _ -> B.codeBlockWith ("",["haskell"],[]) . trimCode <$> - charsInTags "haskell" + textInTags "haskell" TagOpen "gallery" _ -> blocksInTags "gallery" TagOpen "p" _ -> mempty <$ htmlTag (~== tag) TagClose "p" -> mempty <$ htmlTag (~== tag) _ -> B.rawBlock "html" . snd <$> htmlTag (~== tag) -trimCode :: String -> String -trimCode ('\n':xs) = stripTrailingNewlines xs -trimCode xs = stripTrailingNewlines xs +trimCode :: Text -> Text +trimCode t = case T.uncons t of + Just ('\n', xs) -> stripTrailingNewlines xs + _ -> stripTrailingNewlines t -syntaxhighlight :: PandocMonad m => String -> [Attribute String] -> MWParser m Blocks +syntaxhighlight :: PandocMonad m => Text -> [Attribute Text] -> MWParser m Blocks syntaxhighlight tag attrs = try $ do let mblang = lookup "lang" attrs let mbstart = lookup "start" attrs let mbline = lookup "line" attrs let classes = maybeToList mblang ++ maybe [] (const ["numberLines"]) mbline let kvs = maybe [] (\x -> [("startFrom",x)]) mbstart - contents <- charsInTags tag + contents <- textInTags tag return $ B.codeBlockWith ("",classes,kvs) $ trimCode contents hrule :: PandocMonad m => MWParser m Blocks @@ -362,17 +365,17 @@ preformatted = try $ do guardColumnOne char ' ' let endline' = B.linebreak <$ try (newline <* char ' ') - let whitespace' = B.str <$> many1 ('\160' <$ spaceChar) + let whitespace' = B.str <$> many1Char ('\160' <$ spaceChar) let spToNbsp ' ' = '\160' spToNbsp x = x let nowiki' = mconcat . intersperse B.linebreak . map B.str . - lines . fromEntities . map spToNbsp <$> try - (htmlTag (~== TagOpen "nowiki" []) *> - manyTill anyChar (htmlTag (~== TagClose "nowiki"))) + T.lines . fromEntities . T.map spToNbsp <$> try + (htmlTag (~== TagOpen ("nowiki" :: Text) []) *> + manyTillChar anyChar (htmlTag (~== TagClose ("nowiki" :: Text)))) let inline' = whitespace' <|> endline' <|> nowiki' <|> try (notFollowedBy newline *> inline) contents <- mconcat <$> many1 inline' - let spacesStr (Str xs) = all isSpace xs + let spacesStr (Str xs) = T.all isSpace xs spacesStr _ = False if F.all spacesStr contents then return mempty @@ -385,7 +388,7 @@ encode = B.fromList . normalizeCode . B.toList . walk strToCode strToCode x = x normalizeCode [] = [] normalizeCode (Code a1 x : Code a2 y : zs) | a1 == a2 = - normalizeCode $ Code a1 (x ++ y) : zs + normalizeCode $ Code a1 (x <> y) : zs normalizeCode (x:xs) = x : normalizeCode xs header :: PandocMonad m => MWParser m Blocks @@ -400,22 +403,22 @@ header = try $ do -- See #4731: modifyIdentifier :: Attr -> Attr modifyIdentifier (ident,cl,kv) = (ident',cl,kv) - where ident' = map (\c -> if c == '-' then '_' else c) ident + where ident' = T.map (\c -> if c == '-' then '_' else c) ident bulletList :: PandocMonad m => MWParser m Blocks bulletList = B.bulletList <$> ( many1 (listItem '*') - <|> (htmlTag (~== TagOpen "ul" []) *> spaces *> many (listItem '*' <|> li) <* - optional (htmlTag (~== TagClose "ul"))) ) + <|> (htmlTag (~== TagOpen ("ul" :: Text) []) *> spaces *> many (listItem '*' <|> li) <* + optional (htmlTag (~== TagClose ("ul" :: Text)))) ) orderedList :: PandocMonad m => MWParser m Blocks orderedList = (B.orderedList <$> many1 (listItem '#')) <|> try - (do (tag,_) <- htmlTag (~== TagOpen "ol" []) + (do (tag,_) <- htmlTag (~== TagOpen ("ol" :: Text) []) spaces items <- many (listItem '#' <|> li) - optional (htmlTag (~== TagClose "ol")) + optional (htmlTag (~== TagClose ("ol" :: Text))) let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items) @@ -452,7 +455,7 @@ anyListStart :: PandocMonad m => MWParser m Char anyListStart = guardColumnOne >> oneOf "*#:;" li :: PandocMonad m => MWParser m Blocks -li = lookAhead (htmlTag (~== TagOpen "li" [])) *> +li = lookAhead (htmlTag (~== TagOpen ("li" :: Text) [])) *> (firstParaToPlain <$> blocksInTags "li") <* spaces listItem :: PandocMonad m => Char -> MWParser m Blocks @@ -464,13 +467,13 @@ listItem c = try $ do else do skipMany spaceChar pos' <- getPosition - first <- concat <$> manyTill listChunk newline + first <- T.concat <$> manyTill listChunk newline rest <- many (try $ string extras *> lookAhead listStartChar *> - (concat <$> manyTill listChunk newline)) + (T.concat <$> manyTill listChunk newline)) contents <- parseFromString (do setPosition pos' many1 $ listItem' c) - (unlines (first : rest)) + (T.unlines (first : rest)) case c of '*' -> return $ B.bulletList contents '#' -> return $ B.orderedList contents @@ -484,20 +487,20 @@ listItem c = try $ do -- }} -- * next list item -- which seems to be valid mediawiki. -listChunk :: PandocMonad m => MWParser m String -listChunk = template <|> count 1 anyChar +listChunk :: PandocMonad m => MWParser m Text +listChunk = template <|> countChar 1 anyChar listItem' :: PandocMonad m => Char -> MWParser m Blocks listItem' c = try $ do listStart c skipMany spaceChar pos' <- getPosition - first <- concat <$> manyTill listChunk newline + first <- T.concat <$> manyTill listChunk newline rest <- many (try $ char c *> lookAhead listStartChar *> - (concat <$> manyTill listChunk newline)) + (T.concat <$> manyTill listChunk newline)) parseFromString (do setPosition pos' firstParaToPlain . mconcat <$> many1 block) - $ unlines $ first : rest + $ T.unlines $ first : rest firstParaToPlain :: Blocks -> Blocks firstParaToPlain contents = @@ -528,23 +531,23 @@ inline = whitespace <|> special str :: PandocMonad m => MWParser m Inlines -str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars) +str = B.str <$> many1Char (noneOf $ specialChars ++ spaceChars) math :: PandocMonad m => MWParser m Inlines -math = (B.displayMath . trim <$> try (many1 (char ':') >> charsInTags "math")) - <|> (B.math . trim <$> charsInTags "math") - <|> (B.displayMath . trim <$> try (dmStart *> manyTill anyChar dmEnd)) - <|> (B.math . trim <$> try (mStart *> manyTill (satisfy (/='\n')) mEnd)) +math = (B.displayMath . trim <$> try (many1 (char ':') >> textInTags "math")) + <|> (B.math . trim <$> textInTags "math") + <|> (B.displayMath . trim <$> try (dmStart *> manyTillChar anyChar dmEnd)) + <|> (B.math . trim <$> try (mStart *> manyTillChar (satisfy (/='\n')) mEnd)) where dmStart = string "\\[" dmEnd = try (string "\\]") mStart = string "\\(" mEnd = try (string "\\)") -variable :: PandocMonad m => MWParser m String +variable :: PandocMonad m => MWParser m Text variable = try $ do string "{{{" - contents <- manyTill anyChar (try $ string "}}}") - return $ "{{{" ++ contents ++ "}}}" + contents <- manyTillChar anyChar (try $ string "}}}") + return $ "{{{" <> contents <> "}}}" inlineTag :: PandocMonad m => MWParser m Inlines inlineTag = do @@ -553,11 +556,11 @@ inlineTag = do TagOpen "ref" _ -> B.note . B.plain <$> inlinesInTags "ref" TagOpen "nowiki" _ -> try $ do (_,raw) <- htmlTag (~== tag) - if '/' `elem` raw + if T.any (== '/') raw then return mempty else B.text . fromEntities <$> - manyTill anyChar (htmlTag (~== TagClose "nowiki")) - TagOpen "br" _ -> B.linebreak <$ (htmlTag (~== TagOpen "br" []) -- will get /> too + manyTillChar anyChar (htmlTag (~== TagClose ("nowiki" :: Text))) + TagOpen "br" _ -> B.linebreak <$ (htmlTag (~== TagOpen ("br" :: Text) []) -- will get /> too *> optional blankline) TagOpen "strike" _ -> B.strikeout <$> inlinesInTags "strike" TagOpen "del" _ -> B.strikeout <$> inlinesInTags "del" @@ -570,12 +573,12 @@ inlineTag = do result <- encode <$> inlinesInTags "tt" updateState $ \st -> st{ mwInTT = inTT } return result - TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask" + TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> textInTags "hask" _ -> B.rawInline "html" . snd <$> htmlTag (~== tag) special :: PandocMonad m => MWParser m Inlines -special = B.str <$> count 1 (notFollowedBy' (htmlTag isBlockTag') *> - oneOf specialChars) +special = B.str <$> countChar 1 (notFollowedBy' (htmlTag isBlockTag') *> + oneOf specialChars) inlineHtml :: PandocMonad m => MWParser m Inlines inlineHtml = B.rawInline "html" . snd <$> htmlTag isInlineTag' @@ -594,7 +597,7 @@ endline = () <$ try (newline <* notFollowedBy anyListStart) imageIdentifiers :: PandocMonad m => [MWParser m ()] -imageIdentifiers = [sym (identifier ++ ":") | identifier <- identifiers] +imageIdentifiers = [sym (identifier <> ":") | identifier <- identifiers] where identifiers = ["File", "Image", "Archivo", "Datei", "Fichier", "Bild"] @@ -602,9 +605,9 @@ image :: PandocMonad m => MWParser m Inlines image = try $ do sym "[[" choice imageIdentifiers - fname <- addUnderscores <$> many1 (noneOf "|]") + fname <- addUnderscores <$> many1Char (noneOf "|]") _ <- many imageOption - dims <- try (char '|' *> sepBy (many digit) (char 'x') <* string "px") + dims <- try (char '|' *> sepBy (manyChar digit) (char 'x') <* string "px") <|> return [] _ <- many imageOption let kvs = case dims of @@ -614,9 +617,9 @@ image = try $ do let attr = ("", [], kvs) caption <- (B.str fname <$ sym "]]") <|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]"))) - return $ B.imageWith attr fname ("fig:" ++ stringify caption) caption + return $ B.imageWith attr fname ("fig:" <> stringify caption) caption -imageOption :: PandocMonad m => MWParser m String +imageOption :: PandocMonad m => MWParser m Text imageOption = try $ char '|' *> opt where opt = try (oneOfStrings [ "border", "thumbnail", "frameless" @@ -624,30 +627,27 @@ imageOption = try $ char '|' *> opt , "center", "none", "baseline", "sub" , "super", "top", "text-top", "middle" , "bottom", "text-bottom" ]) - <|> try (string "frame") + <|> try (textStr "frame") <|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]")) -collapseUnderscores :: String -> String -collapseUnderscores [] = [] -collapseUnderscores ('_':'_':xs) = collapseUnderscores ('_':xs) -collapseUnderscores (x:xs) = x : collapseUnderscores xs - -addUnderscores :: String -> String -addUnderscores = collapseUnderscores . intercalate "_" . words +addUnderscores :: Text -> Text +addUnderscores = T.intercalate "_" . splitTextBy sep + where + sep c = isSpace c || c == '_' internalLink :: PandocMonad m => MWParser m Inlines internalLink = try $ do sym "[[" - pagename <- unwords . words <$> many (noneOf "|]") + pagename <- T.unwords . T.words <$> manyChar (noneOf "|]") label <- option (B.text pagename) $ char '|' *> ( (mconcat <$> many1 (notFollowedBy (char ']') *> inline)) -- the "pipe trick" -- [[Help:Contents|] -> "Contents" - <|> return (B.text $ drop 1 $ dropWhile (/=':') pagename) ) + <|> return (B.text $ T.drop 1 $ T.dropWhile (/=':') pagename) ) sym "]]" - linktrail <- B.text <$> many letter + linktrail <- B.text <$> manyChar letter let link = B.link (addUnderscores pagename) "wikilink" (label <> linktrail) - if "Category:" `isPrefixOf` pagename + if "Category:" `T.isPrefixOf` pagename then do updateState $ \st -> st{ mwCategoryLinks = link : mwCategoryLinks st } return mempty @@ -662,7 +662,7 @@ externalLink = try $ do <|> do char ']' num <- mwNextLinkNumber <$> getState updateState $ \st -> st{ mwNextLinkNumber = num + 1 } - return $ B.str $ show num + return $ B.str $ tshow num return $ B.link src "" lab url :: PandocMonad m => MWParser m Inlines diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index b8cbe2f26..4ade61294 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -1,6 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE TupleSections #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Muse Copyright : Copyright (C) 2017-2019 Alexander Krotov @@ -24,12 +25,12 @@ import Control.Monad.Reader import Control.Monad.Except (throwError) import Data.Bifunctor import Data.Default -import Data.List (intercalate, transpose, uncons) -import Data.List.Split (splitOn) +import Data.List (transpose, uncons) import qualified Data.Map as M import qualified Data.Set as Set import Data.Maybe (fromMaybe, isNothing, maybeToList) -import Data.Text (Text, unpack) +import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Builder (Blocks, Inlines) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad (..)) @@ -38,7 +39,7 @@ import Text.Pandoc.Error (PandocError (PandocParsecError)) import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (F) -import Text.Pandoc.Shared (crFilter, trimr, underlineSpan) +import Text.Pandoc.Shared (crFilter, trimr, underlineSpan, tshow) -- | Read Muse from an input string and return a Pandoc document. readMuse :: PandocMonad m @@ -49,18 +50,18 @@ readMuse opts s = do let input = crFilter s res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } "source" input case res of - Left e -> throwError $ PandocParsecError (unpack input) e + Left e -> throwError $ PandocParsecError input e Right d -> return d type F = Future MuseState data MuseState = MuseState { museMeta :: F Meta -- ^ Document metadata , museOptions :: ReaderOptions - , museIdentifierList :: Set.Set String + , museIdentifierList :: Set.Set Text , museLastSpacePos :: Maybe SourcePos -- ^ Position after last space or newline parsed , museLastStrPos :: Maybe SourcePos -- ^ Position after last str parsed , museLogMessages :: [LogMessage] - , museNotes :: M.Map String (SourcePos, F Blocks) + , museNotes :: M.Map Text (SourcePos, F Blocks) } instance Default MuseState where @@ -116,22 +117,27 @@ parseMuse = do -- * Utility functions -- | Trim up to one newline from the beginning of the string. -lchop :: String -> String -lchop ('\n':xs) = xs -lchop s = s +lchop :: Text -> Text +lchop s = case T.uncons s of + Just ('\n', xs) -> xs + _ -> s -- | Trim up to one newline from the end of the string. -rchop :: String -> String -rchop = reverse . lchop . reverse +rchop :: Text -> Text +rchop s = case T.unsnoc s of + Just (xs, '\n') -> xs + _ -> s -unindent :: String -> String -unindent = rchop . intercalate "\n" . dropSpacePrefix . splitOn "\n" . lchop +unindent :: Text -> Text +unindent = rchop . T.intercalate "\n" . dropSpacePrefix . T.splitOn "\n" . lchop -dropSpacePrefix :: [String] -> [String] -dropSpacePrefix lns = drop maxIndent <$> lns +dropSpacePrefix :: [Text] -> [Text] +dropSpacePrefix lns = T.drop maxIndent <$> lns where isSpaceChar c = c == ' ' || c == '\t' - maxIndent = length $ takeWhile (isSpaceChar . head) $ takeWhile same $ transpose lns - same = and . (zipWith (==) <*> drop 1) + maxIndent = length $ takeWhile (isSpaceChar . T.head) $ takeWhile same $ T.transpose lns + same t = case T.uncons t of + Just (c, cs) -> T.all (== c) cs + Nothing -> True atStart :: PandocMonad m => MuseParser m () atStart = do @@ -160,29 +166,29 @@ getIndent = subtract 1 . sourceColumn <$ many spaceChar <*> getPosition -- ** HTML parsers -openTag :: PandocMonad m => String -> MuseParser m [(String, String)] +openTag :: PandocMonad m => Text -> MuseParser m [(Text, Text)] openTag tag = try $ - char '<' *> string tag *> manyTill attr (char '>') + char '<' *> textStr tag *> manyTill attr (char '>') where attr = try $ (,) <$ many1 spaceChar - <*> many1 (noneOf "=\n") + <*> many1Char (noneOf "=\n") <* string "=\"" - <*> manyTill (noneOf "\"") (char '"') + <*> manyTillChar (noneOf "\"") (char '"') -closeTag :: PandocMonad m => String -> MuseParser m () -closeTag tag = try $ string "</" *> string tag *> void (char '>') +closeTag :: PandocMonad m => Text -> MuseParser m () +closeTag tag = try $ string "</" *> textStr tag *> void (char '>') -- | Convert HTML attributes to Pandoc 'Attr' -htmlAttrToPandoc :: [(String, String)] -> Attr +htmlAttrToPandoc :: [(Text, Text)] -> Attr htmlAttrToPandoc attrs = (ident, classes, keyvals) where ident = fromMaybe "" $ lookup "id" attrs - classes = maybe [] words $ lookup "class" attrs + classes = maybe [] T.words $ lookup "class" attrs keyvals = [(k,v) | (k,v) <- attrs, k /= "id", k /= "class"] parseHtmlContent :: PandocMonad m - => String -- ^ Tag name + => Text -- ^ Tag name -> MuseParser m (Attr, F Blocks) parseHtmlContent tag = try $ getIndent >>= \indent -> (,) <$> fmap htmlAttrToPandoc (openTag tag) @@ -193,16 +199,16 @@ parseHtmlContent tag = try $ getIndent >>= \indent -> (,) -- ** Directive parsers -- While not documented, Emacs Muse allows "-" in directive name -parseDirectiveKey :: PandocMonad m => MuseParser m String -parseDirectiveKey = char '#' *> many (letter <|> char '-') +parseDirectiveKey :: PandocMonad m => MuseParser m Text +parseDirectiveKey = char '#' *> manyChar (letter <|> char '-') -parseEmacsDirective :: PandocMonad m => MuseParser m (String, F Inlines) +parseEmacsDirective :: PandocMonad m => MuseParser m (Text, F Inlines) parseEmacsDirective = (,) <$> parseDirectiveKey <* spaceChar <*> (trimInlinesF . mconcat <$> manyTill inline' eol) -parseAmuseDirective :: PandocMonad m => MuseParser m (String, F Inlines) +parseAmuseDirective :: PandocMonad m => MuseParser m (Text, F Inlines) parseAmuseDirective = (,) <$> parseDirectiveKey <* many1 spaceChar @@ -289,7 +295,7 @@ listItemContentsUntil col pre end = p parseBlock :: PandocMonad m => MuseParser m (F Blocks) parseBlock = do res <- blockElements <|> para - trace (take 60 $ show $ B.toList $ runF res def) + trace (T.take 60 $ tshow $ B.toList $ runF res def) return res where para = fst <$> paraUntil (try (eof <|> void (lookAhead blockElements))) @@ -337,7 +343,7 @@ pagebreak = try $ pure (B.divWith ("", [], [("style", "page-break-before: always <* string "* * * * *" <* manyTill spaceChar eol -headingStart :: PandocMonad m => MuseParser m (String, Int) +headingStart :: PandocMonad m => MuseParser m (Text, Int) headingStart = try $ (,) <$> option "" (try (parseAnchor <* manyTill spaceChar eol)) <* firstColumn @@ -371,14 +377,14 @@ example :: PandocMonad m => MuseParser m (F Blocks) example = try $ pure . B.codeBlock <$ string "{{{" <* many spaceChar - <*> (unindent <$> manyTill anyChar (string "}}}")) + <*> (unindent <$> manyTillChar anyChar (string "}}}")) -- | Parse an @\<example>@ tag. exampleTag :: PandocMonad m => MuseParser m (F Blocks) exampleTag = try $ fmap pure $ B.codeBlockWith <$ many spaceChar <*> (htmlAttrToPandoc <$> openTag "example") - <*> (unindent <$> manyTill anyChar (closeTag "example")) + <*> (unindent <$> manyTillChar anyChar (closeTag "example")) <* manyTill spaceChar eol -- | Parse a @\<literal>@ tag as a raw block. @@ -388,7 +394,7 @@ literalTag = try $ fmap pure $ B.rawBlock <$ many spaceChar <*> (fromMaybe "html" . lookup "style" <$> openTag "literal") -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML <* manyTill spaceChar eol - <*> (unindent <$> manyTill anyChar (closeTag "literal")) + <*> (unindent <$> manyTillChar anyChar (closeTag "literal")) <* manyTill spaceChar eol -- | Parse @\<center>@ tag. @@ -428,7 +434,7 @@ playTag = do verseLine :: PandocMonad m => MuseParser m (F Inlines) verseLine = (<>) - <$> fmap pure (option mempty (B.str <$> many1 ('\160' <$ char ' '))) + <$> fmap pure (option mempty (B.str <$> many1Char ('\160' <$ char ' '))) <*> fmap (trimInlinesF . mconcat) (manyTill inline' eol) -- | Parse @\<verse>@ tag. @@ -466,17 +472,17 @@ paraUntil end = do noteMarker' :: PandocMonad m => Char -> Char - -> MuseParser m String -noteMarker' l r = try $ (\x y -> l:x:y ++ [r]) + -> MuseParser m Text +noteMarker' l r = try $ (\x y -> T.pack $ l:x:y ++ [r]) <$ char l <*> oneOf "123456789" <*> manyTill digit (char r) -noteMarker :: PandocMonad m => MuseParser m String +noteMarker :: PandocMonad m => MuseParser m Text noteMarker = noteMarker' '[' ']' <|> noteMarker' '{' '}' addNote :: PandocMonad m - => String + => Text -> SourcePos -> F Blocks -> MuseParser m () @@ -674,15 +680,15 @@ museGridTableRow :: PandocMonad m -> MuseParser m (F [Blocks]) museGridTableRow indent indices = try $ do lns <- many1 $ try (indentWith indent *> museGridTableRawLine indices) - let cols = map (unlines . map trimr) $ transpose lns + let cols = map (T.unlines . map trimr) $ transpose lns indentWith indent *> museGridTableHeader sequence <$> mapM (parseFromString' parseBlocks) cols museGridTableRawLine :: PandocMonad m => [Int] - -> MuseParser m [String] + -> MuseParser m [Text] museGridTableRawLine indices = - char '|' *> forM indices (\n -> count n anyChar <* char '|') <* manyTill spaceChar eol + char '|' *> forM indices (\n -> countChar n anyChar <* char '|') <* manyTill spaceChar eol museGridTable :: PandocMonad m => MuseParser m (F Blocks) museGridTable = try $ do @@ -767,12 +773,12 @@ inline = endline <|> inline' endline :: PandocMonad m => MuseParser m (F Inlines) endline = try $ pure B.softbreak <$ newline <* notFollowedBy blankline <* updateLastSpacePos -parseAnchor :: PandocMonad m => MuseParser m String -parseAnchor = try $ (:) +parseAnchor :: PandocMonad m => MuseParser m Text +parseAnchor = try $ T.cons <$ firstColumn <* char '#' <*> letter - <*> many (letter <|> digit <|> char '-') + <*> manyChar (letter <|> digit <|> char '-') anchor :: PandocMonad m => MuseParser m (F Inlines) anchor = try $ do @@ -813,7 +819,7 @@ emphasisBetween p = try $ trimInlinesF . mconcat -- | Parse an inline tag, such as @\<em>@ and @\<strong>@. inlineTag :: PandocMonad m - => String -- ^ Tag name + => Text -- ^ Tag name -> MuseParser m (F Inlines) inlineTag tag = try $ mconcat <$ openTag tag @@ -862,12 +868,12 @@ strikeoutTag = fmap B.strikeout <$> inlineTag "del" verbatimTag :: PandocMonad m => MuseParser m (F Inlines) verbatimTag = return . B.text <$ openTag "verbatim" - <*> manyTill anyChar (closeTag "verbatim") + <*> manyTillChar anyChar (closeTag "verbatim") -- | Parse @\<class>@ tag. classTag :: PandocMonad m => MuseParser m (F Inlines) classTag = do - classes <- maybe [] words . lookup "name" <$> openTag "class" + classes <- maybe [] T.words . lookup "name" <$> openTag "class" fmap (B.spanWith ("", classes, [])) . mconcat <$> manyTill inline (closeTag "class") -- | Parse @\<\<\<RTL>>>@ text. @@ -886,43 +892,43 @@ nbsp = try $ pure (B.str "\160") <$ string "~~" -- | Parse code markup, indicated by @\'=\'@ characters. code :: PandocMonad m => MuseParser m (F Inlines) -code = try $ fmap pure $ B.code . uncurry (++) +code = try $ fmap pure $ B.code . uncurry (<>) <$ atStart <* char '=' <* notFollowedBy (spaceChar <|> newline) - <*> manyUntil (noneOf "\n\r" <|> (newline <* notFollowedBy newline)) (try $ fmap pure $ noneOf " \t\n\r=" <* char '=') + <*> manyUntilChar (noneOf "\n\r" <|> (newline <* notFollowedBy newline)) (try $ fmap T.singleton $ noneOf " \t\n\r=" <* char '=') <* notFollowedBy alphaNum -- | Parse @\<code>@ tag. codeTag :: PandocMonad m => MuseParser m (F Inlines) codeTag = fmap pure $ B.codeWith <$> (htmlAttrToPandoc <$> openTag "code") - <*> manyTill anyChar (closeTag "code") + <*> manyTillChar anyChar (closeTag "code") -- | Parse @\<math>@ tag. -- @\<math>@ tag is an Emacs Muse extension enabled by @(require 'muse-latex2png)@ mathTag :: PandocMonad m => MuseParser m (F Inlines) mathTag = return . B.math <$ openTag "math" - <*> manyTill anyChar (closeTag "math") + <*> manyTillChar anyChar (closeTag "math") -- | Parse inline @\<literal>@ tag as a raw inline. inlineLiteralTag :: PandocMonad m => MuseParser m (F Inlines) inlineLiteralTag = try $ fmap pure $ B.rawInline <$> (fromMaybe "html" . lookup "style" <$> openTag "literal") -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML - <*> manyTill anyChar (closeTag "literal") + <*> manyTillChar anyChar (closeTag "literal") str :: PandocMonad m => MuseParser m (F Inlines) -str = return . B.str <$> many1 alphaNum <* updateLastStrPos +str = return . B.str <$> many1Char alphaNum <* updateLastStrPos -- | Consume asterisks that were not used as emphasis opening. -- This prevents series of asterisks from being split into -- literal asterisk and emphasis opening. asterisks :: PandocMonad m => MuseParser m (F Inlines) -asterisks = pure . B.str <$> many1 (char '*') +asterisks = pure . B.str <$> many1Char (char '*') symbol :: PandocMonad m => MuseParser m (F Inlines) -symbol = pure . B.str . pure <$> nonspaceChar +symbol = pure . B.str . T.singleton <$> nonspaceChar -- | Parse a link or image. linkOrImage :: PandocMonad m => MuseParser m (F Inlines) @@ -934,12 +940,12 @@ linkContent = trimInlinesF . mconcat <*> manyTill inline (char ']') -- | Parse a link starting with (possibly null) prefix -link :: PandocMonad m => String -> MuseParser m (F Inlines) +link :: PandocMonad m => Text -> MuseParser m (F Inlines) link prefix = try $ do inLink <- asks museInLink guard $ not inLink - string $ "[[" ++ prefix - url <- manyTill anyChar $ char ']' + textStr $ "[[" <> prefix + url <- manyTillChar anyChar $ char ']' content <- option (pure $ B.str url) (local (\s -> s { museInLink = True }) linkContent) char ']' return $ B.link url "" <$> content @@ -947,27 +953,27 @@ link prefix = try $ do image :: PandocMonad m => MuseParser m (F Inlines) image = try $ do string "[[" - (url, (ext, width, align)) <- manyUntil (noneOf "]") (imageExtensionAndOptions <* char ']') + (url, (ext, width, align)) <- manyUntilChar (noneOf "]") (imageExtensionAndOptions <* char ']') content <- option mempty linkContent char ']' let widthAttr = case align of - Just 'f' -> [("width", fromMaybe "100" width ++ "%"), ("height", "75%")] - _ -> maybeToList (("width",) . (++ "%") <$> width) + Just 'f' -> [("width", fromMaybe "100" width <> "%"), ("height", "75%")] + _ -> maybeToList (("width",) . (<> "%") <$> width) let alignClass = case align of Just 'r' -> ["align-right"] Just 'l' -> ["align-left"] Just 'f' -> [] _ -> [] - return $ B.imageWith ("", alignClass, widthAttr) (url ++ ext) mempty <$> content + return $ B.imageWith ("", alignClass, widthAttr) (url <> ext) mempty <$> content where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"] - imageExtension = choice (try . string <$> imageExtensions) + imageExtension = choice (try . textStr <$> imageExtensions) imageExtensionAndOptions = do ext <- imageExtension (width, align) <- option (Nothing, Nothing) imageAttrs return (ext, width, align) imageAttrs = (,) <$ many1 spaceChar - <*> optionMaybe (many1 digit) + <*> optionMaybe (many1Char digit) <* many spaceChar <*> optionMaybe (oneOf "rlf") diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs index 9e3c118d8..34d3c5e8f 100644 --- a/src/Text/Pandoc/Readers/Native.hs +++ b/src/Text/Pandoc/Readers/Native.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Native Copyright : Copyright (C) 2011-2019 John MacFarlane @@ -19,7 +20,7 @@ import Text.Pandoc.Options (ReaderOptions) import Text.Pandoc.Shared (safeRead) import Control.Monad.Except (throwError) -import Data.Text (Text, unpack) +import Data.Text (Text) import Text.Pandoc.Class import Text.Pandoc.Error @@ -38,18 +39,18 @@ readNative :: PandocMonad m -> Text -- ^ String to parse (assuming @'\n'@ line endings) -> m Pandoc readNative _ s = - case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead (unpack s)) of + case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of Right doc -> return doc Left _ -> throwError $ PandocParseError "couldn't read native" readBlocks :: Text -> Either PandocError [Block] -readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead (unpack s)) +readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s) readBlock :: Text -> Either PandocError Block -readBlock s = maybe (Plain <$> readInlines s) Right (safeRead (unpack s)) +readBlock s = maybe (Plain <$> readInlines s) Right (safeRead s) readInlines :: Text -> Either PandocError [Inline] -readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead (unpack s)) +readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead s) readInline :: Text -> Either PandocError Inline -readInline s = maybe (Left . PandocParseError $ "Could not read: " ++ unpack s) Right (safeRead (unpack s)) +readInline s = maybe (Left . PandocParseError $ "Could not read: " <> s) Right (safeRead s) diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index 2c3b0367f..5330b0238 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.OPML Copyright : Copyright (C) 2013-2019 John MacFarlane @@ -18,7 +19,8 @@ import Data.Char (toUpper) import Data.Default import Data.Generics import Data.Maybe (fromMaybe) -import Data.Text (Text, pack, unpack) +import Data.Text (Text) +import qualified Data.Text as T import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc.Builder import Text.Pandoc.Class (PandocMonad) @@ -50,7 +52,7 @@ readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readOPML opts inp = do (bs, st') <- runStateT (mapM parseBlock $ normalizeTree $ - parseXML (unpack (crFilter inp))) def{ opmlOptions = opts } + parseXML (T.unpack (crFilter inp))) def{ opmlOptions = opts } return $ setTitle (opmlDocTitle st') $ setAuthors (opmlDocAuthors st') $ @@ -76,23 +78,26 @@ convertEntity :: String -> String convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) -- convenience function to get an attribute value, defaulting to "" -attrValue :: String -> Element -> String +attrValue :: String -> Element -> Text attrValue attr elt = - fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) + maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) + +textContent :: Element -> Text +textContent = T.pack . strContent -- exceptT :: PandocMonad m => Either PandocError a -> OPML m a -- exceptT = either throwError return -asHtml :: PandocMonad m => String -> OPML m Inlines +asHtml :: PandocMonad m => Text -> OPML m Inlines asHtml s = do opts <- gets opmlOptions - Pandoc _ bs <- readHtml def{ readerExtensions = readerExtensions opts } (pack s) + Pandoc _ bs <- readHtml def{ readerExtensions = readerExtensions opts } s return $ blocksToInlines' bs -asMarkdown :: PandocMonad m => String -> OPML m Blocks +asMarkdown :: PandocMonad m => Text -> OPML m Blocks asMarkdown s = do opts <- gets opmlOptions - Pandoc _ bs <- readMarkdown def{ readerExtensions = readerExtensions opts } (pack s) + Pandoc _ bs <- readMarkdown def{ readerExtensions = readerExtensions opts } s return $ fromList bs getBlocks :: PandocMonad m => Element -> OPML m Blocks @@ -102,11 +107,11 @@ parseBlock :: PandocMonad m => Content -> OPML m Blocks parseBlock (Elem e) = case qName (elName e) of "ownerName" -> mempty <$ modify (\st -> - st{opmlDocAuthors = [text $ strContent e]}) + st{opmlDocAuthors = [text $ textContent e]}) "dateModified" -> mempty <$ modify (\st -> - st{opmlDocDate = text $ strContent e}) + st{opmlDocDate = text $ textContent e}) "title" -> mempty <$ modify (\st -> - st{opmlDocTitle = text $ strContent e}) + st{opmlDocTitle = text $ textContent e}) "outline" -> gets opmlSectionLevel >>= sect . (+1) "?xml" -> return mempty _ -> getBlocks e @@ -115,7 +120,7 @@ parseBlock (Elem e) = modify $ \st -> st{ opmlSectionLevel = n } bs <- getBlocks e modify $ \st -> st{ opmlSectionLevel = n - 1 } - let headerText' = case map toUpper (attrValue "type" e) of + let headerText' = case T.toUpper (attrValue "type" e) of "LINK" -> link (attrValue "url" e) "" headerText _ -> headerText diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs index dfa019932..f9b78d5bf 100644 --- a/src/Text/Pandoc/Readers/Odt.hs +++ b/src/Text/Pandoc/Readers/Odt.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Reader.Odt Copyright : Copyright (C) 2015 Martin Linnemann diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs index d8e5ba272..ff8cdc5fa 100644 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs @@ -1,11 +1,12 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE Arrows #-} -{-# LANGUAGE DeriveFoldable #-} +{-# LANGUAGE Arrows #-} +{-# LANGUAGE DeriveFoldable #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} -{-# LANGUAGE PatternGuards #-} -{-# LANGUAGE RecordWildCards #-} -{-# LANGUAGE TupleSections #-} -{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Odt.ContentReader Copyright : Copyright (C) 2015 Martin Linnemann @@ -29,8 +30,9 @@ import Control.Arrow import qualified Data.ByteString.Lazy as B import Data.Foldable (fold) -import Data.List (find, intercalate, stripPrefix) +import Data.List (find, stripPrefix) import qualified Data.Map as M +import qualified Data.Text as T import Data.Maybe import Data.Semigroup (First(..), Option(..)) @@ -59,7 +61,7 @@ import qualified Data.Set as Set -- State -------------------------------------------------------------------------------- -type Anchor = String +type Anchor = T.Text type Media = [(FilePath, B.ByteString)] data ReaderState @@ -204,21 +206,21 @@ updateMediaWithResource = keepingTheValue ( ) >>^ fst -lookupResource :: OdtReaderSafe String (FilePath, B.ByteString) +lookupResource :: OdtReaderSafe FilePath (FilePath, B.ByteString) lookupResource = proc target -> do state <- getExtraState -< () case lookup target (getMediaEnv state) of Just bs -> returnV (target, bs) -<< () Nothing -> returnV ("", B.empty) -< () -type AnchorPrefix = String +type AnchorPrefix = T.Text -- | An adaptation of 'uniqueIdent' from "Text.Pandoc.Shared" that generates a -- unique identifier but without assuming that the id should be for a header. -- Second argument is a list of already used identifiers. uniqueIdentFrom :: AnchorPrefix -> [Anchor] -> Anchor uniqueIdentFrom baseIdent usedIdents = - let numIdent n = baseIdent ++ "-" ++ show n + let numIdent n = baseIdent <> "-" <> T.pack (show n) in if baseIdent `elem` usedIdents then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of Just x -> numIdent x @@ -305,7 +307,7 @@ withNewStyle a = proc x -> do isCodeStyle _ = False inlineCode :: Inlines -> Inlines - inlineCode = code . intercalate "" . map stringify . toList + inlineCode = code . T.concat . map stringify . toList type PropertyTriple = (ReaderState, TextProperties, Maybe StyleFamily) type InlineModifier = Inlines -> Inlines @@ -535,7 +537,6 @@ matchChildContent :: (Monoid result) -> OdtReaderSafe _x result matchChildContent ls fallback = returnV mempty >>> matchContent ls fallback - -------------------------------------------- -- Matchers -------------------------------------------- @@ -556,8 +557,8 @@ read_plain_text = fst ^&&& read_plain_text' >>% recover ) >>?% mappend -- - extractText :: XML.Content -> Fallible String - extractText (XML.Text cData) = succeedWith (XML.cdData cData) + extractText :: XML.Content -> Fallible T.Text + extractText (XML.Text cData) = succeedWith (T.pack $ XML.cdData cData) extractText _ = failEmpty read_text_seq :: InlineMatcher @@ -675,8 +676,8 @@ read_list_item = matchingElement NsText "list-item" read_link :: InlineMatcher read_link = matchingElement NsText "a" $ liftA3 link - ( findAttrWithDefault NsXLink "href" "" ) - ( findAttrWithDefault NsOffice "title" "" ) + ( findAttrTextWithDefault NsXLink "href" "" ) + ( findAttrTextWithDefault NsOffice "title" "" ) ( matchChildContent [ read_span , read_note , read_citation @@ -709,12 +710,12 @@ read_citation :: InlineMatcher read_citation = matchingElement NsText "bibliography-mark" $ liftA2 cite ( liftA2 makeCitation - ( findAttrWithDefault NsText "identifier" "" ) + ( findAttrTextWithDefault NsText "identifier" "" ) ( readAttrWithDefault NsText "number" 0 ) ) ( matchChildContent [] read_plain_text ) where - makeCitation :: String -> Int -> [Citation] + makeCitation :: T.Text -> Int -> [Citation] makeCitation citeId num = [Citation citeId [] [] NormalCitation num 0] @@ -779,17 +780,17 @@ read_frame_img = let exts = extensionsFromList [Ext_auto_identifiers] resource <- lookupResource -< src' _ <- updateMediaWithResource -< resource - w <- findAttr' NsSVG "width" -< () - h <- findAttr' NsSVG "height" -< () + w <- findAttrText' NsSVG "width" -< () + h <- findAttrText' NsSVG "height" -< () titleNodes <- matchChildContent' [ read_frame_title ] -< () alt <- matchChildContent [] read_plain_text -< () arr (firstMatch . uncurry4 imageWith) -< - (image_attributes w h, src', inlineListToIdentifier exts (toList titleNodes), alt) + (image_attributes w h, T.pack src', inlineListToIdentifier exts (toList titleNodes), alt) read_frame_title :: InlineMatcher read_frame_title = matchingElement NsSVG "title" (matchChildContent [] read_plain_text) -image_attributes :: Maybe String -> Maybe String -> Attr +image_attributes :: Maybe T.Text -> Maybe T.Text -> Attr image_attributes x y = ( "", [], (dim "width" x) ++ (dim "height" y)) where @@ -806,7 +807,7 @@ read_frame_mathml = src' -> do let path = fromMaybe src' (stripPrefix "./" src') ++ "/content.xml" (_, mathml) <- lookupResource -< path - case readMathML (UTF8.toString $ B.toStrict mathml) of + case readMathML (UTF8.toText $ B.toStrict mathml) of Left _ -> returnV mempty -< () Right exps -> arr (firstMatch . displayMath . writeTeX) -< exps @@ -817,9 +818,9 @@ read_frame_text_box = proc box -> do read_img_with_caption :: [Block] -> FirstMatch Inlines read_img_with_caption (Para [Image attr alt (src,title)] : _) = - firstMatch $ singleton (Image attr alt (src, 'f':'i':'g':':':title)) -- no text, default caption + firstMatch $ singleton (Image attr alt (src, "fig:" <> title)) -- no text, default caption read_img_with_caption (Para (Image attr _ (src,title) : txt) : _) = - firstMatch $ singleton (Image attr txt (src, 'f':'i':'g':':':title) ) -- override caption with the text that follows + firstMatch $ singleton (Image attr txt (src, "fig:" <> title) ) -- override caption with the text that follows read_img_with_caption ( Para (_ : xs) : ys) = read_img_with_caption (Para xs : ys) read_img_with_caption _ = @@ -829,12 +830,12 @@ read_img_with_caption _ = -- Internal links ---------------------- -_ANCHOR_PREFIX_ :: String +_ANCHOR_PREFIX_ :: T.Text _ANCHOR_PREFIX_ = "anchor" -- readAnchorAttr :: OdtReader _x Anchor -readAnchorAttr = findAttr NsText "name" +readAnchorAttr = findAttrText NsText "name" -- | Beware: may fail findAnchorName :: OdtReader AnchorPrefix Anchor @@ -875,7 +876,7 @@ read_reference_start = matchingElement NsText "reference-mark-start" -- | Beware: may fail findAnchorRef :: OdtReader _x Anchor -findAnchorRef = ( findAttr NsText "ref-name" +findAnchorRef = ( findAttrText NsText "ref-name" >>?^ (_ANCHOR_PREFIX_,) ) >>?! getPrettyAnchor @@ -890,7 +891,7 @@ maybeInAnchorRef = proc inlines -> do Left _ -> returnA -< inlines where toAnchorRef :: Anchor -> Inlines -> Inlines - toAnchorRef anchor = link ('#':anchor) "" -- no title + toAnchorRef anchor = link ("#" <> anchor) "" -- no title -- read_bookmark_ref :: InlineMatcher diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs index ccbaf6fc4..59d1b8abd 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs @@ -38,8 +38,11 @@ module Text.Pandoc.Readers.Odt.Generic.XMLConverter , lookupAttr' , lookupDefaultingAttr , findAttr' +, findAttrText' , findAttr +, findAttrText , findAttrWithDefault +, findAttrTextWithDefault , readAttr , readAttr' , readAttrWithDefault @@ -59,6 +62,7 @@ import Control.Arrow import Data.Either ( rights ) import qualified Data.Map as M +import qualified Data.Text as T import Data.Default import Data.Maybe @@ -79,6 +83,7 @@ import Text.Pandoc.Readers.Odt.Generic.Fallible type ElementName = String type AttributeName = String type AttributeValue = String +type TextAttributeValue = T.Text -- type NameSpacePrefix = String @@ -466,6 +471,16 @@ findAttr' nsID attrName = qualifyName nsID attrName &&& getCurrentElement >>% XML.findAttr +-- | Return value as a (Maybe Text) +findAttrText' :: (NameSpaceID nsID) + => nsID -> AttributeName + -> XMLConverter nsID extraState x (Maybe TextAttributeValue) +findAttrText' nsID attrName + = qualifyName nsID attrName + &&& getCurrentElement + >>% XML.findAttr + >>^ fmap T.pack + -- | Return value as string or fail findAttr :: (NameSpaceID nsID) => nsID -> AttributeName @@ -473,6 +488,15 @@ findAttr :: (NameSpaceID nsID) findAttr nsID attrName = findAttr' nsID attrName >>> maybeToChoice +-- | Return value as text or fail +findAttrText :: (NameSpaceID nsID) + => nsID -> AttributeName + -> FallibleXMLConverter nsID extraState x TextAttributeValue +findAttrText nsID attrName + = findAttr' nsID attrName + >>^ fmap T.pack + >>> maybeToChoice + -- | Return value as string or return provided default value findAttrWithDefault :: (NameSpaceID nsID) => nsID -> AttributeName @@ -482,6 +506,15 @@ findAttrWithDefault nsID attrName deflt = findAttr' nsID attrName >>^ fromMaybe deflt +-- | Return value as string or return provided default value +findAttrTextWithDefault :: (NameSpaceID nsID) + => nsID -> AttributeName + -> TextAttributeValue + -> XMLConverter nsID extraState x TextAttributeValue +findAttrTextWithDefault nsID attrName deflt + = findAttr' nsID attrName + >>^ maybe deflt T.pack + -- | Read and return value or fail readAttr :: (NameSpaceID nsID, Read attrValue) => nsID -> AttributeName diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs index 79e8d7aea..99fa05880 100644 --- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs +++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs @@ -548,11 +548,11 @@ readListLevelStyle :: ListLevelType -> StyleReader _x (Int, ListLevelStyle) readListLevelStyle levelType = readAttr NsText "level" >>?! keepingTheValue ( liftA5 toListLevelStyle - ( returnV levelType ) - ( findAttr' NsStyle "num-prefix" ) - ( findAttr' NsStyle "num-suffix" ) - ( getAttr NsStyle "num-format" ) - ( findAttr' NsText "start-value" ) + ( returnV levelType ) + ( findAttr' NsStyle "num-prefix" ) + ( findAttr' NsStyle "num-suffix" ) + ( getAttr NsStyle "num-format" ) + ( findAttrText' NsText "start-value" ) ) where toListLevelStyle _ p s LinfNone b = ListLevelStyle LltBullet p s LinfNone (startValue b) diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 2c88c7776..99ece152c 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -26,7 +27,6 @@ import Control.Monad.Except (throwError) import Control.Monad.Reader (runReaderT) import Data.Text (Text) -import qualified Data.Text as T -- | Parse org-mode string and return a Pandoc document. readOrg :: PandocMonad m @@ -36,7 +36,7 @@ readOrg :: PandocMonad m readOrg opts s = do parsed <- flip runReaderT def $ readWithM parseOrg (optionsToParserState opts) - (T.unpack (crFilter s) ++ "\n\n") + (crFilter s <> "\n\n") case parsed of Right result -> return result Left _ -> throwError $ PandocParseError "problem parsing org" diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs index 58db4f46c..b4f3cc0d8 100644 --- a/src/Text/Pandoc/Readers/Org/BlockStarts.hs +++ b/src/Text/Pandoc/Readers/Org/BlockStarts.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.BlockStarts Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -25,6 +26,8 @@ module Text.Pandoc.Readers.Org.BlockStarts import Prelude import Control.Monad (void) +import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Readers.Org.Parsing -- | Horizontal Line (five -- dashes or more) @@ -49,15 +52,15 @@ gridTableStart :: Monad m => OrgParser m () gridTableStart = try $ skipSpaces <* char '+' <* char '-' -latexEnvStart :: Monad m => OrgParser m String +latexEnvStart :: Monad m => OrgParser m Text latexEnvStart = try $ skipSpaces *> string "\\begin{" *> latexEnvName <* string "}" <* blankline where - latexEnvName :: Monad m => OrgParser m String - latexEnvName = try $ mappend <$> many1 alphaNum <*> option "" (string "*") + latexEnvName :: Monad m => OrgParser m Text + latexEnvName = try $ mappend <$> many1Char alphaNum <*> option "" (textStr "*") bulletListStart :: Monad m => OrgParser m Int bulletListStart = try $ do @@ -68,7 +71,7 @@ bulletListStart = try $ do return (ind + 1) genericListStart :: Monad m - => OrgParser m String + => OrgParser m Text -> OrgParser m Int genericListStart listMarker = try $ do ind <- length <$> many spaceChar @@ -82,11 +85,11 @@ eol = void (char '\n') orderedListStart :: Monad m => OrgParser m Int orderedListStart = genericListStart orderedListMarker -- Ordered list markers allowed in org-mode - where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") + where orderedListMarker = T.snoc <$> many1Char digit <*> oneOf ".)" -drawerStart :: Monad m => OrgParser m String +drawerStart :: Monad m => OrgParser m Text drawerStart = try $ skipSpaces *> drawerName <* skipSpaces <* newline - where drawerName = char ':' *> manyTill nonspaceChar (char ':') + where drawerName = char ':' *> manyTillChar nonspaceChar (char ':') metaLineStart :: Monad m => OrgParser m () metaLineStart = try $ skipSpaces <* string "#+" @@ -99,12 +102,12 @@ commentLineStart = try $ exampleLineStart :: Monad m => OrgParser m () exampleLineStart = () <$ try (skipSpaces *> string ": ") -noteMarker :: Monad m => OrgParser m String +noteMarker :: Monad m => OrgParser m Text noteMarker = try $ do char '[' - choice [ many1Till digit (char ']') - , (++) <$> string "fn:" - <*> many1Till (noneOf "\n\r\t ") (char ']') + choice [ many1TillChar digit (char ']') + , (<>) <$> textStr "fn:" + <*> many1TillChar (noneOf "\n\r\t ") (char ']') ] -- | Succeeds if the parser is at the end of a block. diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index cba876f06..de51dec3d 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -1,6 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.Blocks Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -23,7 +24,7 @@ import Text.Pandoc.Readers.Org.Inlines import Text.Pandoc.Readers.Org.Meta (metaExport, metaKey, metaLine) import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing -import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename, +import Text.Pandoc.Readers.Org.Shared (cleanLinkText, isImageFilename, originalLang, translateLang, exportsCode) import Text.Pandoc.Builder (Blocks, Inlines) @@ -33,11 +34,13 @@ import Text.Pandoc.Options import Text.Pandoc.Shared (compactify, compactifyDL, safeRead) import Control.Monad (foldM, guard, mzero, void) -import Data.Char (isSpace, toLower, toUpper) +import Data.Char (isSpace) import Data.Default (Default) -import Data.List (foldl', isPrefixOf) +import Data.List (foldl') import Data.Maybe (fromMaybe, isJust, isNothing) +import Data.Text (Text) +import qualified Data.Text as T import qualified Text.Pandoc.Builder as B import qualified Text.Pandoc.Walk as Walk @@ -90,10 +93,10 @@ horizontalRule = return B.horizontalRule <$ try hline -- | Attributes that may be added to figures (like a name or caption). data BlockAttributes = BlockAttributes - { blockAttrName :: Maybe String - , blockAttrLabel :: Maybe String + { blockAttrName :: Maybe Text + , blockAttrLabel :: Maybe Text , blockAttrCaption :: Maybe (F Inlines) - , blockAttrKeyValues :: [(String, String)] + , blockAttrKeyValues :: [(Text, Text)] } -- | Convert BlockAttributes into pandoc Attr @@ -103,14 +106,14 @@ attrFromBlockAttributes BlockAttributes{..} = ident = fromMaybe mempty $ lookup "id" blockAttrKeyValues classes = case lookup "class" blockAttrKeyValues of Nothing -> [] - Just clsStr -> words clsStr + Just clsStr -> T.words clsStr kv = filter ((`notElem` ["id", "class"]) . fst) blockAttrKeyValues in (ident, classes, kv) -stringyMetaAttribute :: Monad m => OrgParser m (String, String) +stringyMetaAttribute :: Monad m => OrgParser m (Text, Text) stringyMetaAttribute = try $ do metaLineStart - attrName <- map toUpper <$> many1Till nonspaceChar (char ':') + attrName <- T.toUpper <$> many1TillChar nonspaceChar (char ':') skipSpaces attrValue <- anyLine <|> ("" <$ newline) return (attrName, attrValue) @@ -129,8 +132,8 @@ blockAttributes = try $ do let label = lookup "LABEL" kv caption' <- case caption of Nothing -> return Nothing - Just s -> Just <$> parseFromString inlines (s ++ "\n") - kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs + Just s -> Just <$> parseFromString inlines (s <> "\n") + kvAttrs' <- parseFromString keyValues . (<> "\n") $ fromMaybe mempty kvAttrs return BlockAttributes { blockAttrName = name , blockAttrLabel = label @@ -138,31 +141,31 @@ blockAttributes = try $ do , blockAttrKeyValues = kvAttrs' } where - isBlockAttr :: String -> Bool + isBlockAttr :: Text -> Bool isBlockAttr = flip elem [ "NAME", "LABEL", "CAPTION" , "ATTR_HTML", "ATTR_LATEX" , "RESULTS" ] - appendValues :: String -> Maybe String -> (String, String) -> Maybe String + appendValues :: Text -> Maybe Text -> (Text, Text) -> Maybe Text appendValues attrName accValue (key, value) = if key /= attrName then accValue else case accValue of - Just acc -> Just $ acc ++ ' ':value + Just acc -> Just $ acc <> " " <> value Nothing -> Just value -- | Parse key-value pairs for HTML attributes -keyValues :: Monad m => OrgParser m [(String, String)] +keyValues :: Monad m => OrgParser m [(Text, Text)] keyValues = try $ manyTill ((,) <$> key <*> value) newline where - key :: Monad m => OrgParser m String - key = try $ skipSpaces *> char ':' *> many1 nonspaceChar + key :: Monad m => OrgParser m Text + key = try $ skipSpaces *> char ':' *> many1Char nonspaceChar - value :: Monad m => OrgParser m String - value = skipSpaces *> manyTill anyChar endOfValue + value :: Monad m => OrgParser m Text + value = skipSpaces *> manyTillChar anyChar endOfValue endOfValue :: Monad m => OrgParser m () endOfValue = @@ -180,7 +183,7 @@ orgBlock = try $ do blockAttrs <- blockAttributes blkType <- blockHeaderStart ($ blkType) $ - case map toLower blkType of + case T.toLower blkType of "export" -> exportBlock "comment" -> rawBlockLines (const mempty) "html" -> rawBlockLines (return . B.rawBlock (lowercase blkType)) @@ -194,13 +197,13 @@ orgBlock = try $ do let (ident, classes, kv) = attrFromBlockAttributes blockAttrs in fmap $ B.divWith (ident, classes ++ [blkType], kv) where - blockHeaderStart :: Monad m => OrgParser m String + blockHeaderStart :: Monad m => OrgParser m Text blockHeaderStart = try $ skipSpaces *> stringAnyCase "#+begin_" *> orgArgWord - lowercase :: String -> String - lowercase = map toLower + lowercase :: Text -> Text + lowercase = T.toLower -exampleBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks) +exampleBlock :: PandocMonad m => BlockAttributes -> Text -> OrgParser m (F Blocks) exampleBlock blockAttrs _label = do skipSpaces (classes, kv) <- switchesAsAttributes @@ -210,54 +213,54 @@ exampleBlock blockAttrs _label = do let codeBlck = B.codeBlockWith (id', "example":classes, kv) content return . return $ codeBlck -rawBlockLines :: Monad m => (String -> F Blocks) -> String -> OrgParser m (F Blocks) +rawBlockLines :: Monad m => (Text -> F Blocks) -> Text -> OrgParser m (F Blocks) rawBlockLines f blockType = ignHeaders *> (f <$> rawBlockContent blockType) -parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> String -> OrgParser m (F Blocks) +parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> Text -> OrgParser m (F Blocks) parseBlockLines f blockType = ignHeaders *> (f <$> parsedBlockContent) where parsedBlockContent :: PandocMonad m => OrgParser m (F Blocks) parsedBlockContent = try $ do raw <- rawBlockContent blockType - parseFromString blocks (raw ++ "\n") + parseFromString blocks (raw <> "\n") -- | Read the raw string content of a block -rawBlockContent :: Monad m => String -> OrgParser m String +rawBlockContent :: Monad m => Text -> OrgParser m Text rawBlockContent blockType = try $ do blkLines <- manyTill rawLine blockEnder tabLen <- getOption readerTabStop trimP <- orgStateTrimLeadBlkIndent <$> getState - let stripIndent strs = if trimP then map (drop (shortestIndent strs)) strs else strs - (unlines + let stripIndent strs = if trimP then map (T.drop (shortestIndent strs)) strs else strs + (T.unlines . stripIndent . map (tabsToSpaces tabLen . commaEscaped) $ blkLines) <$ updateState (\s -> s { orgStateTrimLeadBlkIndent = True }) where - rawLine :: Monad m => OrgParser m String + rawLine :: Monad m => OrgParser m Text rawLine = try $ ("" <$ blankline) <|> anyLine blockEnder :: Monad m => OrgParser m () blockEnder = try $ skipSpaces <* stringAnyCase ("#+end_" <> blockType) - shortestIndent :: [String] -> Int - shortestIndent = foldr (min . length . takeWhile isSpace) maxBound - . filter (not . null) - - tabsToSpaces :: Int -> String -> String - tabsToSpaces _ [] = [] - tabsToSpaces tabLen cs'@(c:cs) = - case c of - ' ' -> ' ':tabsToSpaces tabLen cs - '\t' -> replicate tabLen ' ' ++ tabsToSpaces tabLen cs - _ -> cs' - - commaEscaped :: String -> String - commaEscaped (',':cs@('*':_)) = cs - commaEscaped (',':cs@('#':'+':_)) = cs - commaEscaped (' ':cs) = ' ':commaEscaped cs - commaEscaped ('\t':cs) = '\t':commaEscaped cs - commaEscaped cs = cs + shortestIndent :: [Text] -> Int + shortestIndent = foldr (min . T.length . T.takeWhile isSpace) maxBound + . filter (not . T.null) + + tabsToSpaces :: Int -> Text -> Text + tabsToSpaces tabStop t = + let (ind, suff) = T.span (\c -> c == ' ' || c == '\t') t + tabNum = T.length $ T.filter (== '\n') ind + spaceNum = T.length ind - tabNum + in T.replicate (spaceNum + tabStop * tabNum) " " <> suff + + commaEscaped t = + let (ind, suff) = T.span (\c -> c == ' ' || c == '\t') t + in case T.uncons suff of + Just (',', cs) + | "*" <- T.take 1 cs -> ind <> cs + | "#+" <- T.take 2 cs -> ind <> cs + _ -> t -- | Read but ignore all remaining block headers. ignHeaders :: Monad m => OrgParser m () @@ -265,34 +268,34 @@ ignHeaders = (() <$ newline) <|> (() <$ anyLine) -- | Read a block containing code intended for export in specific backends -- only. -exportBlock :: Monad m => String -> OrgParser m (F Blocks) +exportBlock :: Monad m => Text -> OrgParser m (F Blocks) exportBlock blockType = try $ do exportType <- skipSpaces *> orgArgWord <* ignHeaders contents <- rawBlockContent blockType - returnF (B.rawBlock (map toLower exportType) contents) + returnF (B.rawBlock (T.toLower exportType) contents) -verseBlock :: PandocMonad m => String -> OrgParser m (F Blocks) +verseBlock :: PandocMonad m => Text -> OrgParser m (F Blocks) verseBlock blockType = try $ do ignHeaders content <- rawBlockContent blockType fmap B.lineBlock . sequence - <$> mapM parseVerseLine (lines content) + <$> mapM parseVerseLine (T.lines content) where -- replace initial spaces with nonbreaking spaces to preserve -- indentation, parse the rest as normal inline - parseVerseLine :: PandocMonad m => String -> OrgParser m (F Inlines) + parseVerseLine :: PandocMonad m => Text -> OrgParser m (F Inlines) parseVerseLine cs = do - let (initialSpaces, indentedLine) = span isSpace cs - let nbspIndent = if null initialSpaces + let (initialSpaces, indentedLine) = T.span isSpace cs + let nbspIndent = if T.null initialSpaces then mempty - else B.str $ map (const '\160') initialSpaces - line <- parseFromString inlines (indentedLine ++ "\n") + else B.str $ T.map (const '\160') initialSpaces + line <- parseFromString inlines (indentedLine <> "\n") return (trimInlinesF $ pure nbspIndent <> line) -- | Read a code block and the associated results block if present. Which of -- boths blocks is included in the output is determined using the "exports" -- argument in the block header. -codeBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks) +codeBlock :: PandocMonad m => BlockAttributes -> Text -> OrgParser m (F Blocks) codeBlock blockAttrs blockType = do skipSpaces (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) @@ -314,7 +317,7 @@ codeBlock blockAttrs blockType = do labelledBlock :: F Inlines -> F Blocks labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) - exportsResults :: [(String, String)] -> Bool + exportsResults :: [(Text, Text)] -> Bool exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports" -- | Parse the result of an evaluated babel code block. @@ -329,7 +332,7 @@ babelResultsBlock = try $ do resultsMarker = try . void $ stringAnyCase "#+RESULTS:" *> blankline -- | Parse code block arguments -codeHeaderArgs :: Monad m => OrgParser m ([String], [(String, String)]) +codeHeaderArgs :: Monad m => OrgParser m ([Text], [(Text, Text)]) codeHeaderArgs = try $ do language <- skipSpaces *> orgArgWord (switchClasses, switchKv) <- switchesAsAttributes @@ -338,14 +341,14 @@ codeHeaderArgs = try $ do , originalLang language <> switchKv <> parameters ) -switchesAsAttributes :: Monad m => OrgParser m ([String], [(String, String)]) +switchesAsAttributes :: Monad m => OrgParser m ([Text], [(Text, Text)]) switchesAsAttributes = try $ do switches <- skipSpaces *> try (switch `sepBy` many1 spaceChar) return $ foldr addToAttr ([], []) switches where - addToAttr :: (Char, Maybe String, SwitchPolarity) - -> ([String], [(String, String)]) - -> ([String], [(String, String)]) + addToAttr :: (Char, Maybe Text, SwitchPolarity) + -> ([Text], [(Text, Text)]) + -> ([Text], [(Text, Text)]) addToAttr ('n', lineNum, pol) (cls, kv) = let kv' = case lineNum of Just num -> ("startFrom", num):kv @@ -365,15 +368,15 @@ switchPolarity :: Monad m => OrgParser m SwitchPolarity switchPolarity = (SwitchMinus <$ char '-') <|> (SwitchPlus <$ char '+') -- | Parses a source block switch option. -switch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity) +switch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity) switch = try $ lineNumberSwitch <|> labelSwitch <|> whitespaceSwitch <|> simpleSwitch where simpleSwitch = (\pol c -> (c, Nothing, pol)) <$> switchPolarity <*> letter labelSwitch = genericSwitch 'l' $ - char '"' *> many1Till nonspaceChar (char '"') + char '"' *> many1TillChar nonspaceChar (char '"') -whitespaceSwitch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity) +whitespaceSwitch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity) whitespaceSwitch = do string "-i" updateState $ \s -> s { orgStateTrimLeadBlkIndent = False } @@ -382,8 +385,8 @@ whitespaceSwitch = do -- | Generic source block switch-option parser. genericSwitch :: Monad m => Char - -> OrgParser m String - -> OrgParser m (Char, Maybe String, SwitchPolarity) + -> OrgParser m Text + -> OrgParser m (Char, Maybe Text, SwitchPolarity) genericSwitch c p = try $ do polarity <- switchPolarity <* char c <* skipSpaces arg <- optionMaybe p @@ -391,17 +394,17 @@ genericSwitch c p = try $ do -- | Reads a line number switch option. The line number switch can be used with -- example and source blocks. -lineNumberSwitch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity) -lineNumberSwitch = genericSwitch 'n' (many digit) +lineNumberSwitch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity) +lineNumberSwitch = genericSwitch 'n' (manyChar digit) -blockOption :: Monad m => OrgParser m (String, String) +blockOption :: Monad m => OrgParser m (Text, Text) blockOption = try $ do argKey <- orgArgKey paramValue <- option "yes" orgParamValue return (argKey, paramValue) -orgParamValue :: Monad m => OrgParser m String -orgParamValue = try $ +orgParamValue :: Monad m => OrgParser m Text +orgParamValue = try $ fmap T.pack $ skipSpaces *> notFollowedBy orgArgKey *> noneOf "\n\r" `many1Till` endOfValue @@ -420,7 +423,7 @@ orgParamValue = try $ -- export setting. genericDrawer :: PandocMonad m => OrgParser m (F Blocks) genericDrawer = try $ do - name <- map toUpper <$> drawerStart + name <- T.toUpper <$> drawerStart content <- manyTill drawerLine (try drawerEnd) state <- getState -- Include drawer if it is explicitly included in or not explicitly excluded @@ -432,16 +435,16 @@ genericDrawer = try $ do Right names | name `notElem` names -> return mempty _ -> drawerDiv name <$> parseLines content where - parseLines :: PandocMonad m => [String] -> OrgParser m (F Blocks) - parseLines = parseFromString blocks . (++ "\n") . unlines + parseLines :: PandocMonad m => [Text] -> OrgParser m (F Blocks) + parseLines = parseFromString blocks . (<> "\n") . T.unlines - drawerDiv :: String -> F Blocks -> F Blocks + drawerDiv :: Text -> F Blocks -> F Blocks drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty) -drawerLine :: Monad m => OrgParser m String +drawerLine :: Monad m => OrgParser m Text drawerLine = anyLine -drawerEnd :: Monad m => OrgParser m String +drawerEnd :: Monad m => OrgParser m Text drawerEnd = try $ skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline @@ -456,17 +459,17 @@ figure :: PandocMonad m => OrgParser m (F Blocks) figure = try $ do figAttrs <- blockAttributes src <- skipSpaces *> selfTarget <* skipSpaces <* endOfParagraph - case cleanLinkString src of + case cleanLinkText src of Nothing -> mzero Just imgSrc -> do guard (isImageFilename imgSrc) let isFigure = isJust $ blockAttrCaption figAttrs return $ imageBlock isFigure figAttrs imgSrc where - selfTarget :: PandocMonad m => OrgParser m String + selfTarget :: PandocMonad m => OrgParser m Text selfTarget = try $ char '[' *> linkTarget <* char ']' - imageBlock :: Bool -> BlockAttributes -> String -> F Blocks + imageBlock :: Bool -> BlockAttributes -> Text -> F Blocks imageBlock isFigure figAttrs imgSrc = let figName = fromMaybe mempty $ blockAttrName figAttrs @@ -478,11 +481,11 @@ figure = try $ do in B.para . B.imageWith attr imgSrc figTitle <$> figCaption - withFigPrefix :: String -> String + withFigPrefix :: Text -> Text withFigPrefix cs = - if "fig:" `isPrefixOf` cs + if "fig:" `T.isPrefixOf` cs then cs - else "fig:" ++ cs + else "fig:" <> cs -- | Succeeds if looking at the end of the current paragraph endOfParagraph :: Monad m => OrgParser m () @@ -495,12 +498,12 @@ endOfParagraph = try $ skipSpaces *> newline *> endOfBlock -- | Example code marked up by a leading colon. example :: Monad m => OrgParser m (F Blocks) -example = try $ returnF . exampleCode =<< unlines <$> many1 exampleLine +example = try $ returnF . exampleCode =<< T.unlines <$> many1 exampleLine where - exampleLine :: Monad m => OrgParser m String + exampleLine :: Monad m => OrgParser m Text exampleLine = try $ exampleLineStart *> anyLine -exampleCode :: String -> Blocks +exampleCode :: Text -> Blocks exampleCode = B.codeBlockWith ("", ["example"], []) @@ -516,7 +519,7 @@ include :: PandocMonad m => OrgParser m (F Blocks) include = try $ do metaLineStart <* stringAnyCase "include:" <* skipSpaces filename <- includeTarget - includeArgs <- many (try $ skipSpaces *> many1 alphaNum) + includeArgs <- many (try $ skipSpaces *> many1Char alphaNum) params <- keyValues blocksParser <- case includeArgs of ("example" : _) -> return $ pure . B.codeBlock <$> parseRaw @@ -535,10 +538,10 @@ include = try $ do char '"' manyTill (noneOf "\n\r\t") (char '"') - parseRaw :: PandocMonad m => OrgParser m String - parseRaw = many anyChar + parseRaw :: PandocMonad m => OrgParser m Text + parseRaw = manyChar anyChar - blockFilter :: [(String, String)] -> [Block] -> [Block] + blockFilter :: [(Text, Text)] -> [Block] -> [Block] blockFilter params blks = let minlvl = lookup "minlevel" params in case (minlvl >>= safeRead :: Maybe Int) of @@ -660,7 +663,7 @@ columnPropertyCell = emptyCell <|> propCell <?> "alignment info" <$> (skipSpaces *> char '<' *> optionMaybe tableAlignFromChar) - <*> (optionMaybe (many1 digit >>= safeRead) + <*> (optionMaybe (many1Char digit >>= safeRead) <* char '>' <* emptyCell) @@ -739,10 +742,10 @@ latexFragment = try $ do , "\\end{", e, "}\n" ] -latexEnd :: Monad m => String -> OrgParser m () +latexEnd :: Monad m => Text -> OrgParser m () latexEnd envName = try $ () <$ skipSpaces - <* string ("\\end{" ++ envName ++ "}") + <* textStr ("\\end{" <> envName <> "}") <* blankline @@ -813,12 +816,12 @@ definitionListItem :: PandocMonad m -> OrgParser m (F (Inlines, [Blocks])) definitionListItem parseIndentedMarker = try $ do markerLength <- parseIndentedMarker - term <- manyTill (noneOf "\n\r") (try definitionMarker) + term <- manyTillChar (noneOf "\n\r") (try definitionMarker) line1 <- anyLineNewline blank <- option "" ("\n" <$ blankline) - cont <- concat <$> many (listContinuation markerLength) + cont <- T.concat <$> many (listContinuation markerLength) term' <- parseFromString inlines term - contents' <- parseFromString blocks $ line1 ++ blank ++ cont + contents' <- parseFromString blocks $ line1 <> blank <> cont return $ (,) <$> term' <*> fmap (:[]) contents' where definitionMarker = @@ -832,16 +835,16 @@ listItem parseIndentedMarker = try . withContext ListItemState $ do markerLength <- try parseIndentedMarker firstLine <- anyLineNewline blank <- option "" ("\n" <$ blankline) - rest <- concat <$> many (listContinuation markerLength) - parseFromString blocks $ firstLine ++ blank ++ rest + rest <- T.concat <$> many (listContinuation markerLength) + parseFromString blocks $ firstLine <> blank <> rest -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. -listContinuation :: PandocMonad m => Int -> OrgParser m String +listContinuation :: PandocMonad m => Int -> OrgParser m Text listContinuation markerLength = try $ do notFollowedBy' blankline - mappend <$> (concat <$> many1 (listContinuation' markerLength)) - <*> many blankline + mappend <$> (T.concat <$> many1 (listContinuation' markerLength)) + <*> manyChar blankline where listContinuation' indentation = blockLines indentation <|> listLine indentation @@ -853,6 +856,6 @@ listContinuation markerLength = try $ do >> blockAttributes >>= (\blockAttrs -> case attrFromBlockAttributes blockAttrs of - ("", [], []) -> count 1 anyChar + ("", [], []) -> countChar 1 anyChar _ -> indentWith indentation)) >> (snd <$> withRaw orgBlock) diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs index c96087be7..09a501b68 100644 --- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs +++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs @@ -1,5 +1,6 @@ -{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.DocumentTree Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -17,9 +18,9 @@ module Text.Pandoc.Readers.Org.DocumentTree import Prelude import Control.Arrow ((***)) import Control.Monad (guard, void) -import Data.Char (toLower, toUpper) import Data.List (intersperse) import Data.Maybe (mapMaybe) +import Data.Text (Text) import Text.Pandoc.Builder (Blocks, Inlines) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition @@ -28,6 +29,7 @@ import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing import qualified Data.Set as Set +import qualified Data.Text as T import qualified Text.Pandoc.Builder as B -- @@ -59,28 +61,28 @@ documentTree blocks inline = do } -- | Create a tag containing the given string. -toTag :: String -> Tag +toTag :: Text -> Tag toTag = Tag -- | The key (also called name or type) of a property. -newtype PropertyKey = PropertyKey { fromKey :: String } +newtype PropertyKey = PropertyKey { fromKey :: Text } deriving (Show, Eq, Ord) -- | Create a property key containing the given string. Org mode keys are -- case insensitive and are hence converted to lower case. -toPropertyKey :: String -> PropertyKey -toPropertyKey = PropertyKey . map toLower +toPropertyKey :: Text -> PropertyKey +toPropertyKey = PropertyKey . T.toLower -- | The value assigned to a property. -newtype PropertyValue = PropertyValue { fromValue :: String } +newtype PropertyValue = PropertyValue { fromValue :: Text } -- | Create a property value containing the given string. -toPropertyValue :: String -> PropertyValue +toPropertyValue :: Text -> PropertyValue toPropertyValue = PropertyValue -- | Check whether the property value is non-nil (i.e. truish). isNonNil :: PropertyValue -> Bool -isNonNil p = map toLower (fromValue p) `notElem` ["()", "{}", "nil"] +isNonNil p = T.toLower (fromValue p) `notElem` ["()", "{}", "nil"] -- | Key/value pairs from a PROPERTIES drawer type Properties = [(PropertyKey, PropertyValue)] @@ -273,7 +275,7 @@ headlineToHeader hdln = do todoKeyword :: Monad m => OrgParser m TodoMarker todoKeyword = try $ do taskStates <- activeTodoMarkers <$> getState - let kwParser tdm = try (tdm <$ string (todoMarkerName tdm) + let kwParser tdm = try (tdm <$ textStr (todoMarkerName tdm) <* spaceChar <* updateLastPreCharPos) choice (map kwParser taskStates) @@ -281,26 +283,26 @@ todoKeyword = try $ do todoKeywordToInlines :: TodoMarker -> Inlines todoKeywordToInlines tdm = let todoText = todoMarkerName tdm - todoState = map toLower . show $ todoMarkerState tdm + todoState = T.toLower . T.pack . show $ todoMarkerState tdm classes = [todoState, todoText] in B.spanWith (mempty, classes, mempty) (B.str todoText) propertiesToAttr :: Properties -> Attr propertiesToAttr properties = let - toStringPair = fromKey *** fromValue + toTextPair = fromKey *** fromValue customIdKey = toPropertyKey "custom_id" classKey = toPropertyKey "class" unnumberedKey = toPropertyKey "unnumbered" specialProperties = [customIdKey, classKey, unnumberedKey] id' = maybe mempty fromValue . lookup customIdKey $ properties cls = maybe mempty fromValue . lookup classKey $ properties - kvs' = map toStringPair . filter ((`notElem` specialProperties) . fst) + kvs' = map toTextPair . filter ((`notElem` specialProperties) . fst) $ properties isUnnumbered = maybe False isNonNil . lookup unnumberedKey $ properties in - (id', words cls ++ ["unnumbered" | isUnnumbered], kvs') + (id', T.words cls ++ ["unnumbered" | isUnnumbered], kvs') tagsToInlines :: [Tag] -> Inlines tagsToInlines [] = mempty @@ -336,15 +338,15 @@ planningToBlock planning = do <> B.emph (B.str time) -- | An Org timestamp, including repetition marks. TODO: improve -type Timestamp = String +type Timestamp = Text timestamp :: Monad m => OrgParser m Timestamp timestamp = try $ do openChar <- oneOf "<[" let isActive = openChar == '<' let closeChar = if isActive then '>' else ']' - content <- many1Till anyChar (char closeChar) - return (openChar : content ++ [closeChar]) + content <- many1TillChar anyChar (char closeChar) + return $ T.cons openChar $ content `T.snoc` closeChar -- | Planning information for a subtree/headline. data PlanningInfo = PlanningInfo @@ -374,7 +376,7 @@ planningInfo = try $ do propertiesDrawer :: Monad m => OrgParser m Properties propertiesDrawer = try $ do drawerType <- drawerStart - guard $ map toUpper drawerType == "PROPERTIES" + guard $ T.toUpper drawerType == "PROPERTIES" manyTill property (try endOfDrawer) where property :: Monad m => OrgParser m (PropertyKey, PropertyValue) @@ -382,12 +384,12 @@ propertiesDrawer = try $ do key :: Monad m => OrgParser m PropertyKey key = fmap toPropertyKey . try $ - skipSpaces *> char ':' *> many1Till nonspaceChar (char ':') + skipSpaces *> char ':' *> many1TillChar nonspaceChar (char ':') value :: Monad m => OrgParser m PropertyValue value = fmap toPropertyValue . try $ - skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline) + skipSpaces *> manyTillChar anyChar (try $ skipSpaces *> newline) - endOfDrawer :: Monad m => OrgParser m String + endOfDrawer :: Monad m => OrgParser m Text endOfDrawer = try $ skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs index f783eaa0f..f1f089273 100644 --- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs +++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.ExportSettings Copyright : © 2016–2019 Albert Krewinkel @@ -21,6 +22,7 @@ import Text.Pandoc.Readers.Org.Parsing import Control.Monad (mzero, void) import Data.Char (toLower) import Data.Maybe (listToMaybe) +import Data.Text (Text) -- | Read and handle space separated org-mode export settings. exportSettings :: PandocMonad m => OrgParser m () @@ -70,11 +72,11 @@ exportSetting = choice genericExportSetting :: Monad m => OrgParser m a - -> String + -> Text -> ExportSettingSetter a -> OrgParser m () genericExportSetting optionParser settingIdentifier setter = try $ do - _ <- string settingIdentifier *> char ':' + _ <- textStr settingIdentifier *> char ':' value <- optionParser updateState $ modifyExportSettings value where @@ -82,11 +84,11 @@ genericExportSetting optionParser settingIdentifier setter = try $ do st { orgStateExportSettings = setter val . orgStateExportSettings $ st } -- | A boolean option, either nil (False) or non-nil (True). -booleanSetting :: Monad m => String -> ExportSettingSetter Bool -> OrgParser m () +booleanSetting :: Monad m => Text -> ExportSettingSetter Bool -> OrgParser m () booleanSetting = genericExportSetting elispBoolean -- | An integer-valued option. -integerSetting :: Monad m => String -> ExportSettingSetter Int -> OrgParser m () +integerSetting :: Monad m => Text -> ExportSettingSetter Int -> OrgParser m () integerSetting = genericExportSetting parseInt where parseInt = try $ @@ -95,7 +97,7 @@ integerSetting = genericExportSetting parseInt -- | Either the string "headline" or an elisp boolean and treated as an -- @ArchivedTreesOption@. archivedTreeSetting :: Monad m - => String + => Text -> ExportSettingSetter ArchivedTreesOption -> OrgParser m () archivedTreeSetting = @@ -115,42 +117,42 @@ archivedTreeSetting = -- | A list or a complement list (i.e. a list starting with `not`). complementableListSetting :: Monad m - => String - -> ExportSettingSetter (Either [String] [String]) + => Text + -> ExportSettingSetter (Either [Text] [Text]) -> OrgParser m () complementableListSetting = genericExportSetting $ choice - [ Left <$> complementStringList + [ Left <$> complementTextList , Right <$> stringList , (\b -> if b then Left [] else Right []) <$> elispBoolean ] where -- Read a plain list of strings. - stringList :: Monad m => OrgParser m [String] + stringList :: Monad m => OrgParser m [Text] stringList = try $ char '(' - *> sepBy elispString spaces + *> sepBy elispText spaces <* char ')' -- Read an emacs lisp list specifying a complement set. - complementStringList :: Monad m => OrgParser m [String] - complementStringList = try $ + complementTextList :: Monad m => OrgParser m [Text] + complementTextList = try $ string "(not " - *> sepBy elispString spaces + *> sepBy elispText spaces <* char ')' - elispString :: Monad m => OrgParser m String - elispString = try $ + elispText :: Monad m => OrgParser m Text + elispText = try $ char '"' - *> manyTill alphaNum (char '"') + *> manyTillChar alphaNum (char '"') -- | Read but ignore the export setting. -ignoredSetting :: Monad m => String -> OrgParser m () -ignoredSetting s = try (() <$ string s <* char ':' <* many1 nonspaceChar) +ignoredSetting :: Monad m => Text -> OrgParser m () +ignoredSetting s = try (() <$ textStr s <* char ':' <* many1 nonspaceChar) -- | Read any setting string, but ignore it and emit a warning. ignoreAndWarn :: PandocMonad m => OrgParser m () ignoreAndWarn = try $ do - opt <- many1 nonspaceChar + opt <- many1Char nonspaceChar report (UnknownOrgExportOption opt) return () diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index cae590c5f..da638f717 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -20,7 +20,7 @@ import Prelude import Text.Pandoc.Readers.Org.BlockStarts (endOfBlock, noteMarker) import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing -import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename, +import Text.Pandoc.Readers.Org.Shared (cleanLinkText, isImageFilename, originalLang, translateLang, exportsCode) import Text.Pandoc.Builder (Inlines) @@ -38,12 +38,14 @@ import Control.Monad.Trans (lift) import Data.Char (isAlphaNum, isSpace) import Data.List (intersperse) import qualified Data.Map as M +import Data.Text (Text) +import qualified Data.Text as T import Data.Maybe (fromMaybe) -- -- Functions acting on the parser state -- -recordAnchorId :: PandocMonad m => String -> OrgParser m () +recordAnchorId :: PandocMonad m => Text -> OrgParser m () recordAnchorId i = updateState $ \s -> s{ orgStateAnchorIds = i : orgStateAnchorIds s } @@ -127,7 +129,7 @@ linebreak :: PandocMonad m => OrgParser m (F Inlines) linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline str :: PandocMonad m => OrgParser m (F Inlines) -str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") +str = return . B.str <$> many1Char (noneOf $ specialChars ++ "\n\r ") <* updateLastStrPos -- | An endline character that can be treated as a space, not a structural @@ -321,7 +323,7 @@ linkLikeOrgRefCite = try $ do -- | Read a citation key. The characters allowed in citation keys are taken -- from the `org-ref-cite-re` variable in `org-ref.el`. -orgRefCiteKey :: PandocMonad m => OrgParser m String +orgRefCiteKey :: PandocMonad m => OrgParser m Text orgRefCiteKey = let citeKeySpecialChars = "-_:\\./," :: String isCiteKeySpecialChar c = c `elem` citeKeySpecialChars @@ -329,7 +331,7 @@ orgRefCiteKey = endOfCitation = try $ do many $ satisfy isCiteKeySpecialChar satisfy $ not . isCiteKeyChar - in try $ satisfy isCiteKeyChar `many1Till` lookAhead endOfCitation + in try $ satisfy isCiteKeyChar `many1TillChar` lookAhead endOfCitation -- | Supported citation types. Only a small subset of org-ref types is @@ -384,11 +386,11 @@ footnote = try $ inlineNote <|> referencedNote inlineNote :: PandocMonad m => OrgParser m (F Inlines) inlineNote = try $ do string "[fn:" - ref <- many alphaNum + ref <- manyChar alphaNum char ':' note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']') - unless (null ref) $ - addToNotesTable ("fn:" ++ ref, note) + unless (T.null ref) $ + addToNotesTable ("fn:" <> ref, note) return $ B.note <$> note referencedNote :: PandocMonad m => OrgParser m (F Inlines) @@ -397,7 +399,7 @@ referencedNote = try $ do return $ do notes <- asksF orgStateNotes' case lookup ref notes of - Nothing -> return . B.str $ "[" ++ ref ++ "]" + Nothing -> return . B.str $ "[" <> ref <> "]" Just contents -> do st <- askF let contents' = runF contents st{ orgStateNotes' = [] } @@ -420,7 +422,7 @@ explicitOrImageLink = try $ do return $ do src <- srcF title <- titleF - case cleanLinkString descr of + case cleanLinkText descr of Just imgSrc | isImageFilename imgSrc -> return . B.link src "" $ B.image imgSrc mempty mempty _ -> @@ -429,10 +431,10 @@ explicitOrImageLink = try $ do selflinkOrImage :: PandocMonad m => OrgParser m (F Inlines) selflinkOrImage = try $ do target <- char '[' *> linkTarget <* char ']' - case cleanLinkString target of - Nothing -> case target of - '#':_ -> returnF $ B.link target "" (B.str target) - _ -> return $ internalLink target (B.str target) + case cleanLinkText target of + Nothing -> case T.uncons target of + Just ('#', _) -> returnF $ B.link target "" (B.str target) + _ -> return $ internalLink target (B.str target) Just nonDocTgt -> if isImageFilename nonDocTgt then returnF $ B.image nonDocTgt "" "" else returnF $ B.link nonDocTgt "" (B.str target) @@ -449,35 +451,35 @@ angleLink = try $ do char '>' return link -linkTarget :: PandocMonad m => OrgParser m String -linkTarget = enclosedByPair1 '[' ']' (noneOf "\n\r[]") +linkTarget :: PandocMonad m => OrgParser m Text +linkTarget = T.pack <$> enclosedByPair1 '[' ']' (noneOf "\n\r[]") -possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m String +possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m Text possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]") -applyCustomLinkFormat :: String -> OrgParser m (F String) +applyCustomLinkFormat :: Text -> OrgParser m (F Text) applyCustomLinkFormat link = do - let (linkType, rest) = break (== ':') link + let (linkType, rest) = T.break (== ':') link return $ do formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters - return $ maybe link ($ drop 1 rest) formatter + return $ maybe link ($ T.drop 1 rest) formatter -- | Take a link and return a function which produces new inlines when given -- description inlines. -linkToInlinesF :: String -> Inlines -> F Inlines +linkToInlinesF :: Text -> Inlines -> F Inlines linkToInlinesF linkStr = - case linkStr of - "" -> pure . B.link mempty "" -- wiki link (empty by convention) - ('#':_) -> pure . B.link linkStr "" -- document-local fraction - _ -> case cleanLinkString linkStr of - Just extTgt -> return . B.link extTgt "" - Nothing -> internalLink linkStr -- other internal link - -internalLink :: String -> Inlines -> F Inlines + case T.uncons linkStr of + Nothing -> pure . B.link mempty "" -- wiki link (empty by convention) + Just ('#', _) -> pure . B.link linkStr "" -- document-local fraction + _ -> case cleanLinkText linkStr of + Just extTgt -> return . B.link extTgt "" + Nothing -> internalLink linkStr -- other internal link + +internalLink :: Text -> Inlines -> F Inlines internalLink link title = do anchorB <- (link `elem`) <$> asksF orgStateAnchorIds if anchorB - then return $ B.link ('#':link) "" title + then return $ B.link ("#" <> link) "" title else return $ B.emph title -- | Parse an anchor like @<<anchor-id>>@ and return an empty span with @@ -493,15 +495,15 @@ anchor = try $ do returnF $ B.spanWith (solidify anchorId, [], []) mempty where parseAnchor = string "<<" - *> many1 (noneOf "\t\n\r<>\"' ") + *> many1Char (noneOf "\t\n\r<>\"' ") <* string ">>" <* skipSpaces -- | Replace every char but [a-zA-Z0-9_.-:] with a hyphen '-'. This mirrors -- the org function @org-export-solidify-link-text@. -solidify :: String -> String -solidify = map replaceSpecialChar +solidify :: Text -> Text +solidify = T.map replaceSpecialChar where replaceSpecialChar c | isAlphaNum c = c | c `elem` ("_.-:" :: String) = c @@ -511,25 +513,25 @@ solidify = map replaceSpecialChar inlineCodeBlock :: PandocMonad m => OrgParser m (F Inlines) inlineCodeBlock = try $ do string "src_" - lang <- many1 orgArgWordChar + lang <- many1Char orgArgWordChar opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption - inlineCode <- enclosedByPair1 '{' '}' (noneOf "\n\r") + inlineCode <- T.pack <$> enclosedByPair1 '{' '}' (noneOf "\n\r") let attrClasses = [translateLang lang] let attrKeyVal = originalLang lang <> opts let codeInlineBlck = B.codeWith ("", attrClasses, attrKeyVal) inlineCode returnF $ if exportsCode opts then codeInlineBlck else mempty where - inlineBlockOption :: PandocMonad m => OrgParser m (String, String) + inlineBlockOption :: PandocMonad m => OrgParser m (Text, Text) inlineBlockOption = try $ do argKey <- orgArgKey paramValue <- option "yes" orgInlineParamValue return (argKey, paramValue) - orgInlineParamValue :: PandocMonad m => OrgParser m String + orgInlineParamValue :: PandocMonad m => OrgParser m Text orgInlineParamValue = try $ skipSpaces *> notFollowedBy (char ':') - *> many1 (noneOf "\t\n\r ]") + *> many1Char (noneOf "\t\n\r ]") <* skipSpaces @@ -584,7 +586,7 @@ superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr) math :: PandocMonad m => OrgParser m (F Inlines) math = return . B.math <$> choice [ math1CharBetween '$' - , mathStringBetween '$' + , mathTextBetween '$' , rawMathBetween "\\(" "\\)" ] @@ -604,7 +606,7 @@ updatePositions c = do return c symbol :: PandocMonad m => OrgParser m (F Inlines) -symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions) +symbol = return . B.str . T.singleton <$> (oneOf specialChars >>= updatePositions) emphasisBetween :: PandocMonad m => Char @@ -619,7 +621,7 @@ emphasisBetween c = try $ do verbatimBetween :: PandocMonad m => Char - -> OrgParser m String + -> OrgParser m Text verbatimBetween c = try $ emphasisStart c *> many1TillNOrLessNewlines 1 verbatimChar (emphasisEnd c) @@ -627,33 +629,33 @@ verbatimBetween c = try $ verbatimChar = noneOf "\n\r" >>= updatePositions -- | Parses a raw string delimited by @c@ using Org's math rules -mathStringBetween :: PandocMonad m +mathTextBetween :: PandocMonad m => Char - -> OrgParser m String -mathStringBetween c = try $ do + -> OrgParser m Text +mathTextBetween c = try $ do mathStart c body <- many1TillNOrLessNewlines mathAllowedNewlines (noneOf (c:"\n\r")) (lookAhead $ mathEnd c) final <- mathEnd c - return $ body ++ [final] + return $ T.snoc body final -- | Parse a single character between @c@ using math rules math1CharBetween :: PandocMonad m => Char - -> OrgParser m String + -> OrgParser m Text math1CharBetween c = try $ do char c res <- noneOf $ c:mathForbiddenBorderChars char c eof <|> () <$ lookAhead (oneOf mathPostChars) - return [res] + return $ T.singleton res rawMathBetween :: PandocMonad m - => String - -> String - -> OrgParser m String -rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e) + => Text + -> Text + -> OrgParser m Text +rawMathBetween s e = try $ textStr s *> manyTillChar anyChar (try $ textStr e) -- | Parses the start (opening character) of emphasis emphasisStart :: PandocMonad m => Char -> OrgParser m Char @@ -702,10 +704,10 @@ enclosedInlines start end = try $ enclosedRaw :: (PandocMonad m, Show b) => OrgParser m a -> OrgParser m b - -> OrgParser m String + -> OrgParser m Text enclosedRaw start end = try $ start *> (onSingleLine <|> spanningTwoLines) - where onSingleLine = try $ many1Till (noneOf "\n\r") end + where onSingleLine = try $ many1TillChar (noneOf "\n\r") end spanningTwoLines = try $ anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine @@ -714,7 +716,7 @@ enclosedRaw start end = try $ many1TillNOrLessNewlines :: PandocMonad m => Int -> OrgParser m Char -> OrgParser m a - -> OrgParser m String + -> OrgParser m Text many1TillNOrLessNewlines n p end = try $ nMoreLines (Just n) mempty >>= oneOrMore where @@ -726,7 +728,7 @@ many1TillNOrLessNewlines n p end = try $ rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline) finalLine = try $ manyTill p end minus1 k = k - 1 - oneOrMore cs = cs <$ guard (not $ null cs) + oneOrMore cs = T.pack cs <$ guard (not $ null cs) -- Org allows customization of the way it reads emphasis. We use the defaults -- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components` @@ -773,17 +775,17 @@ subOrSuperExpr :: PandocMonad m => OrgParser m (F Inlines) subOrSuperExpr = try $ choice [ charsInBalanced '{' '}' (noneOf "\n\r") , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r") - , simpleSubOrSuperString + , simpleSubOrSuperText ] >>= parseFromString (mconcat <$> many inline) - where enclosing (left, right) s = left : s ++ [right] + where enclosing (left, right) s = T.cons left $ T.snoc s right -simpleSubOrSuperString :: PandocMonad m => OrgParser m String -simpleSubOrSuperString = try $ do +simpleSubOrSuperText :: PandocMonad m => OrgParser m Text +simpleSubOrSuperText = try $ do state <- getState guard . exportSubSuperscripts . orgStateExportSettings $ state - choice [ string "*" - , mappend <$> option [] ((:[]) <$> oneOf "+-") - <*> many1 alphaNum + choice [ textStr "*" + , mappend <$> option "" (T.singleton <$> oneOf "+-") + <*> many1Char alphaNum ] inlineLaTeX :: PandocMonad m => OrgParser m (F Inlines) @@ -793,28 +795,28 @@ inlineLaTeX = try $ do maybe mzero returnF $ parseAsMathMLSym cmd `mplus` parseAsMath cmd `mplus` ils where - parseAsMath :: String -> Maybe Inlines + parseAsMath :: Text -> Maybe Inlines parseAsMath cs = B.fromList <$> texMathToPandoc cs - parseAsInlineLaTeX :: PandocMonad m => String -> m (Maybe Inlines) + parseAsInlineLaTeX :: PandocMonad m => Text -> m (Maybe Inlines) parseAsInlineLaTeX cs = maybeRight <$> runParserT inlineCommand state "" cs - parseAsMathMLSym :: String -> Maybe Inlines + parseAsMathMLSym :: Text -> Maybe Inlines parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs) -- drop initial backslash and any trailing "{}" - where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1 + where clean = T.dropWhileEnd (`elem` ("{}" :: String)) . T.drop 1 state :: ParserState state = def{ stateOptions = def{ readerExtensions = enableExtension Ext_raw_tex (readerExtensions def) } } - texMathToPandoc :: String -> Maybe [Inline] + texMathToPandoc :: Text -> Maybe [Inline] texMathToPandoc cs = maybeRight (readTeX cs) >>= writePandoc DisplayInline maybeRight :: Either a b -> Maybe b maybeRight = either (const Nothing) Just -inlineLaTeXCommand :: PandocMonad m => OrgParser m String +inlineLaTeXCommand :: PandocMonad m => OrgParser m Text inlineLaTeXCommand = try $ do rest <- getInput st <- getState @@ -823,21 +825,17 @@ inlineLaTeXCommand = try $ do Right cs -> do -- drop any trailing whitespace, those are not be part of the command as -- far as org mode is concerned. - let cmdNoSpc = dropWhileEnd isSpace cs - let len = length cmdNoSpc + let cmdNoSpc = T.dropWhileEnd isSpace cs + let len = T.length cmdNoSpc count len anyChar return cmdNoSpc _ -> mzero --- Taken from Data.OldList. -dropWhileEnd :: (a -> Bool) -> [a] -> [a] -dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) [] - exportSnippet :: PandocMonad m => OrgParser m (F Inlines) exportSnippet = try $ do string "@@" - format <- many1Till (alphaNum <|> char '-') (char ':') - snippet <- manyTill anyChar (try $ string "@@") + format <- many1TillChar (alphaNum <|> char '-') (char ':') + snippet <- manyTillChar anyChar (try $ string "@@") returnF $ B.rawInline format snippet macro :: PandocMonad m => OrgParser m (F Inlines) @@ -845,7 +843,7 @@ macro = try $ do recursionDepth <- orgStateMacroDepth <$> getState guard $ recursionDepth < 15 string "{{{" - name <- many alphaNum + name <- manyChar alphaNum args <- ([] <$ string "}}}") <|> char '(' *> argument `sepBy` char ',' <* eoa expander <- lookupMacro name <$> getState @@ -857,7 +855,7 @@ macro = try $ do updateState $ \s -> s { orgStateMacroDepth = recursionDepth } return res where - argument = many $ notFollowedBy eoa *> noneOf "," + argument = manyChar $ notFollowedBy eoa *> noneOf "," eoa = string ")}}}" smart :: PandocMonad m => OrgParser m (F Inlines) diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index 0a388403e..811a5b974 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -1,6 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE TupleSections #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.Meta Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -30,11 +31,12 @@ import Text.Pandoc.Definition import Text.Pandoc.Shared (safeRead) import Control.Monad (mzero, void, when) -import Data.Char (toLower) import Data.List (intersperse) import Data.Maybe (fromMaybe) import qualified Data.Map as M import qualified Data.Set as Set +import Data.Text (Text) +import qualified Data.Text as T import Network.HTTP (urlEncode) -- | Returns the current meta, respecting export options. @@ -47,7 +49,7 @@ metaExport = do . (if exportWithEmail settings then id else removeMeta "email") <$> orgStateMeta st -removeMeta :: String -> Meta -> Meta +removeMeta :: Text -> Meta -> Meta removeMeta key meta' = let metaMap = unMeta meta' in Meta $ M.delete key metaMap @@ -60,18 +62,18 @@ metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine) declarationLine :: PandocMonad m => OrgParser m () declarationLine = try $ do - key <- map toLower <$> metaKey + key <- T.toLower <$> metaKey (key', value) <- metaValue key let addMetaValue st = st { orgStateMeta = B.setMeta key' <$> value <*> orgStateMeta st } when (key' /= "results") $ updateState addMetaValue -metaKey :: Monad m => OrgParser m String -metaKey = map toLower <$> many1 (noneOf ": \n\r") - <* char ':' - <* skipSpaces +metaKey :: Monad m => OrgParser m Text +metaKey = T.toLower <$> many1Char (noneOf ": \n\r") + <* char ':' + <* skipSpaces -metaValue :: PandocMonad m => String -> OrgParser m (String, F MetaValue) +metaValue :: PandocMonad m => Text -> OrgParser m (Text, F MetaValue) metaValue key = let inclKey = "header-includes" in case key of @@ -88,7 +90,7 @@ metaValue key = -- Org-mode expects class options to contain the surrounding brackets, -- pandoc does not. "latex_class_options" -> ("classoption",) <$> - metaModifiedString (filter (`notElem` "[]")) + metaModifiedString (T.filter (`notElem` ("[]" :: String))) "html_head" -> (inclKey,) <$> accumulatingList inclKey (metaExportSnippet "html") _ -> (key,) <$> metaString @@ -98,25 +100,25 @@ metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline metaInlinesCommaSeparated :: PandocMonad m => OrgParser m (F MetaValue) metaInlinesCommaSeparated = do - itemStrs <- many1 (noneOf ",\n") `sepBy1` char ',' + itemStrs <- many1Char (noneOf ",\n") `sepBy1` char ',' newline - items <- mapM (parseFromString inlinesTillNewline . (++ "\n")) itemStrs + items <- mapM (parseFromString inlinesTillNewline . (<> "\n")) itemStrs let toMetaInlines = MetaInlines . B.toList return $ MetaList . map toMetaInlines <$> sequence items metaString :: Monad m => OrgParser m (F MetaValue) metaString = metaModifiedString id -metaModifiedString :: Monad m => (String -> String) -> OrgParser m (F MetaValue) +metaModifiedString :: Monad m => (Text -> Text) -> OrgParser m (F MetaValue) metaModifiedString f = return . MetaString . f <$> anyLine -- | Read an format specific meta definition -metaExportSnippet :: Monad m => String -> OrgParser m (F MetaValue) +metaExportSnippet :: Monad m => Text -> OrgParser m (F MetaValue) metaExportSnippet format = return . MetaInlines . B.toList . B.rawInline format <$> anyLine -- | Accumulate the result of the @parser@ in a list under @key@. -accumulatingList :: Monad m => String +accumulatingList :: Monad m => Text -> OrgParser m (F MetaValue) -> OrgParser m (F MetaValue) accumulatingList key p = do @@ -147,33 +149,33 @@ optionLine = try $ do "pandoc-emphasis-post" -> emphChars >>= updateState . setEmphasisPostChar _ -> mzero -addLinkFormat :: Monad m => String - -> (String -> String) +addLinkFormat :: Monad m => Text + -> (Text -> Text) -> OrgParser m () addLinkFormat key formatter = updateState $ \s -> let fs = orgStateLinkFormatters s in s{ orgStateLinkFormatters = M.insert key formatter fs } -parseLinkFormat :: Monad m => OrgParser m (String, String -> String) +parseLinkFormat :: Monad m => OrgParser m (Text, Text -> Text) parseLinkFormat = try $ do - linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces + linkType <- T.cons <$> letter <*> manyChar (alphaNum <|> oneOf "-_") <* skipSpaces linkSubst <- parseFormat return (linkType, linkSubst) -- | An ad-hoc, single-argument-only implementation of a printf-style format -- parser. -parseFormat :: Monad m => OrgParser m (String -> String) +parseFormat :: Monad m => OrgParser m (Text -> Text) parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend where -- inefficient, but who cares - replacePlain = try $ (\x -> concat . flip intersperse x) + replacePlain = try $ (\x -> T.concat . flip intersperse x) <$> sequence [tillSpecifier 's', rest] - replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode) + replaceUrl = try $ (\x -> T.concat . flip intersperse x . T.pack . urlEncode . T.unpack) <$> sequence [tillSpecifier 'h', rest] - justAppend = try $ (++) <$> rest + justAppend = try $ (<>) <$> rest - rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r") - tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:"")) + rest = manyTillChar anyChar (eof <|> () <$ oneOf "\n\r") + tillSpecifier c = manyTillChar (noneOf "\n\r") (try $ string ('%':c:"")) tagList :: Monad m => OrgParser m [Tag] tagList = do @@ -231,41 +233,41 @@ todoSequence = try $ do (x:xs) -> return $ keywordsToSequence (reverse xs) [x] where - todoKeywords :: Monad m => OrgParser m [String] + todoKeywords :: Monad m => OrgParser m [Text] todoKeywords = try $ - let keyword = many1 nonspaceChar <* skipSpaces + let keyword = many1Char nonspaceChar <* skipSpaces endOfKeywords = todoDoneSep <|> void newline in manyTill keyword (lookAhead endOfKeywords) todoDoneSep :: Monad m => OrgParser m () todoDoneSep = void . try $ skipSpaces *> char '|' <* skipSpaces1 - keywordsToSequence :: [String] -> [String] -> TodoSequence + keywordsToSequence :: [Text] -> [Text] -> TodoSequence keywordsToSequence todo done = let todoMarkers = map (TodoMarker Todo) todo doneMarkers = map (TodoMarker Done) done in todoMarkers ++ doneMarkers -macroDefinition :: Monad m => OrgParser m (String, [String] -> String) +macroDefinition :: Monad m => OrgParser m (Text, [Text] -> Text) macroDefinition = try $ do - macroName <- many1 nonspaceChar <* skipSpaces + macroName <- many1Char nonspaceChar <* skipSpaces firstPart <- expansionPart (elemOrder, parts) <- unzip <$> many ((,) <$> placeholder <*> expansionPart) let expander = mconcat . alternate (firstPart:parts) . reorder elemOrder return (macroName, expander) where placeholder :: Monad m => OrgParser m Int - placeholder = try . fmap (fromMaybe 1 . safeRead) $ char '$' *> many1 digit + placeholder = try . fmap (fromMaybe 1 . safeRead) $ char '$' *> many1Char digit - expansionPart :: Monad m => OrgParser m String - expansionPart = try $ many (notFollowedBy placeholder *> noneOf "\n\r") + expansionPart :: Monad m => OrgParser m Text + expansionPart = try $ manyChar (notFollowedBy placeholder *> noneOf "\n\r") alternate :: [a] -> [a] -> [a] alternate [] ys = ys alternate xs [] = xs alternate (x:xs) (y:ys) = x : y : alternate xs ys - reorder :: [Int] -> [String] -> [String] + reorder :: [Int] -> [Text] -> [Text] reorder perm xs = let element n = take 1 $ drop (n - 1) xs in concatMap element perm diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index d6dde8b22..cf5583b76 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -1,6 +1,7 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.ParserState Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -63,16 +64,16 @@ import Text.Pandoc.Readers.LaTeX.Types (Macro) type F = Future OrgParserState -- | An inline note / footnote containing the note key and its (inline) value. -type OrgNoteRecord = (String, F Blocks) +type OrgNoteRecord = (Text, F Blocks) -- | Table of footnotes type OrgNoteTable = [OrgNoteRecord] -- | Map of functions for link transformations. The map key is refers to the -- link-type, the corresponding function transforms the given link string. -type OrgLinkFormatters = M.Map String (String -> String) +type OrgLinkFormatters = M.Map Text (Text -> Text) -- | Macro expander function -type MacroExpander = [String] -> String +type MacroExpander = [Text] -> Text -- | Tag -newtype Tag = Tag { fromTag :: String } +newtype Tag = Tag { fromTag :: Text } deriving (Show, Eq, Ord) -- | The states in which a todo item can be @@ -82,7 +83,7 @@ data TodoState = Todo | Done -- | A ToDo keyword like @TODO@ or @DONE@. data TodoMarker = TodoMarker { todoMarkerState :: TodoState - , todoMarkerName :: String + , todoMarkerName :: Text } deriving (Show, Eq) @@ -91,7 +92,7 @@ type TodoSequence = [TodoMarker] -- | Org-mode parser state data OrgParserState = OrgParserState - { orgStateAnchorIds :: [String] + { orgStateAnchorIds :: [Text] , orgStateEmphasisCharStack :: [Char] , orgStateEmphasisPreChars :: [Char] -- ^ Chars allowed to occur before -- emphasis; spaces and newlines are @@ -102,13 +103,13 @@ data OrgParserState = OrgParserState , orgStateExcludeTags :: Set.Set Tag , orgStateExcludeTagsChanged :: Bool , orgStateExportSettings :: ExportSettings - , orgStateIdentifiers :: Set.Set String - , orgStateIncludeFiles :: [String] + , orgStateIdentifiers :: Set.Set Text + , orgStateIncludeFiles :: [Text] , orgStateLastForbiddenCharPos :: Maybe SourcePos , orgStateLastPreCharPos :: Maybe SourcePos , orgStateLastStrPos :: Maybe SourcePos , orgStateLinkFormatters :: OrgLinkFormatters - , orgStateMacros :: M.Map String MacroExpander + , orgStateMacros :: M.Map Text MacroExpander , orgStateMacroDepth :: Int , orgStateMeta :: F Meta , orgStateNotes' :: OrgNoteTable @@ -212,10 +213,10 @@ activeTodoSequences st = activeTodoMarkers :: OrgParserState -> TodoSequence activeTodoMarkers = concat . activeTodoSequences -lookupMacro :: String -> OrgParserState -> Maybe MacroExpander +lookupMacro :: Text -> OrgParserState -> Maybe MacroExpander lookupMacro macroName = M.lookup macroName . orgStateMacros -registerMacro :: (String, MacroExpander) -> OrgParserState -> OrgParserState +registerMacro :: (Text, MacroExpander) -> OrgParserState -> OrgParserState registerMacro (name, expander) st = let curMacros = orgStateMacros st in st{ orgStateMacros = M.insert name expander curMacros } @@ -236,7 +237,7 @@ data ArchivedTreesOption = -- These settings can be changed via OPTIONS statements. data ExportSettings = ExportSettings { exportArchivedTrees :: ArchivedTreesOption -- ^ How to treat archived trees - , exportDrawers :: Either [String] [String] + , exportDrawers :: Either [Text] [Text] -- ^ Specify drawer names which should be exported. @Left@ names are -- explicitly excluded from the resulting output while @Right@ means that -- only the listed drawer names should be included. diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index 24aa0779d..718925120 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -32,7 +32,13 @@ module Text.Pandoc.Readers.Org.Parsing , orgTagWordChar -- * Re-exports from Text.Pandoc.Parser , ParserContext (..) + , textStr + , countChar + , manyChar + , many1Char + , manyTillChar , many1Till + , many1TillChar , notFollowedBy' , spaceChar , nonspaceChar @@ -98,6 +104,7 @@ module Text.Pandoc.Readers.Org.Parsing ) where import Prelude +import Data.Text (Text) import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Parsing hiding (F, anyLine, blanklines, newline, @@ -108,14 +115,14 @@ import Control.Monad (guard) import Control.Monad.Reader (ReaderT) -- | The parser used to read org files. -type OrgParser m = ParserT [Char] OrgParserState (ReaderT OrgParserLocal m) +type OrgParser m = ParserT Text OrgParserState (ReaderT OrgParserLocal m) -- -- Adaptions and specializations of parsing utilities -- -- | Parse any line of text -anyLine :: Monad m => OrgParser m String +anyLine :: Monad m => OrgParser m Text anyLine = P.anyLine <* updateLastPreCharPos @@ -123,7 +130,7 @@ anyLine = -- | Like @'Text.Pandoc.Parsing'@, but resets the position of the last character -- allowed before emphasised text. -parseFromString :: Monad m => OrgParser m a -> String -> OrgParser m a +parseFromString :: Monad m => OrgParser m a -> Text -> OrgParser m a parseFromString parser str' = do updateState $ \s -> s{ orgStateLastPreCharPos = Nothing } result <- P.parseFromString parser str' @@ -142,7 +149,7 @@ newline = <* updateLastForbiddenCharPos -- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes. -blanklines :: Monad m => OrgParser m [Char] +blanklines :: Monad m => OrgParser m Text blanklines = P.blanklines <* updateLastPreCharPos @@ -192,21 +199,21 @@ updateLastPreCharPos = getPosition >>= \p -> -- -- | Read the key of a plist style key-value list. -orgArgKey :: Monad m => OrgParser m String +orgArgKey :: Monad m => OrgParser m Text orgArgKey = try $ skipSpaces *> char ':' - *> many1 orgArgWordChar + *> many1Char orgArgWordChar -- | Read the value of a plist style key-value list. -orgArgWord :: Monad m => OrgParser m String -orgArgWord = many1 orgArgWordChar +orgArgWord :: Monad m => OrgParser m Text +orgArgWord = many1Char orgArgWordChar -- | Chars treated as part of a word in plists. orgArgWordChar :: Monad m => OrgParser m Char orgArgWordChar = alphaNum <|> oneOf "-_" -orgTagWord :: Monad m => OrgParser m String -orgTagWord = many1 orgTagWordChar +orgTagWord :: Monad m => OrgParser m Text +orgTagWord = many1Char orgTagWordChar orgTagWordChar :: Monad m => OrgParser m Char orgTagWordChar = alphaNum <|> oneOf "@%#_" diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs index 34f958373..be0a2068e 100644 --- a/src/Text/Pandoc/Readers/Org/Shared.hs +++ b/src/Text/Pandoc/Readers/Org/Shared.hs @@ -10,7 +10,7 @@ Utility functions used in other Pandoc Org modules. -} module Text.Pandoc.Readers.Org.Shared - ( cleanLinkString + ( cleanLinkText , isImageFilename , originalLang , translateLang @@ -19,44 +19,44 @@ module Text.Pandoc.Readers.Org.Shared import Prelude import Data.Char (isAlphaNum) -import Data.List (isPrefixOf) +import Data.Text (Text) +import qualified Data.Text as T import System.FilePath (isValid, takeExtension) - +import Text.Pandoc.Shared (elemText) -- | Check whether the given string looks like the path to of URL of an image. -isImageFilename :: String -> Bool -isImageFilename fp = hasImageExtension && (isValid fp || isKnownProtocolUri) +isImageFilename :: Text -> Bool +isImageFilename fp = hasImageExtension && (isValid (T.unpack fp) || isKnownProtocolUri) where - hasImageExtension = takeExtension fp `elem` imageExtensions - isKnownProtocolUri = any (\x -> (x ++ "://") `isPrefixOf` fp) protocols + hasImageExtension = takeExtension (T.unpack fp) `elem` imageExtensions + isKnownProtocolUri = any (\x -> (x <> "://") `T.isPrefixOf` fp) protocols imageExtensions = [ ".jpeg", ".jpg", ".png", ".gif", ".svg" ] protocols = [ "file", "http", "https" ] -- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if -- the string does not appear to be a link. -cleanLinkString :: String -> Maybe String -cleanLinkString s = - case s of - '/':_ -> Just $ "file://" ++ s -- absolute path - '.':'/':_ -> Just s -- relative path - '.':'.':'/':_ -> Just s -- relative path - -- Relative path or URL (file schema) - 'f':'i':'l':'e':':':s' -> Just $ if "//" `isPrefixOf` s' then s else s' - _ -> if isUrl s then Just s else Nothing - where - isUrl :: String -> Bool - isUrl cs = - let (scheme, path) = break (== ':') cs - in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme - && not (null path) +cleanLinkText :: Text -> Maybe Text +cleanLinkText s + | Just _ <- T.stripPrefix "/" s = Just $ "file://" <> s -- absolute path + | Just _ <- T.stripPrefix "./" s = Just s -- relative path + | Just _ <- T.stripPrefix "../" s = Just s -- relative path + -- Relative path or URL (file schema) + | Just s' <- T.stripPrefix "file:" s = Just $ if "//" `T.isPrefixOf` s' then s else s' + | otherwise = if isUrl s then Just s else Nothing + where + isUrl :: Text -> Bool + isUrl cs = + let (scheme, path) = T.break (== ':') cs + in T.all (\c -> isAlphaNum c || c `elemText` ".-") scheme + && not (T.null path) -- | Creates an key-value pair marking the original language name specified for -- a piece of source code. -- | Creates an key-value attributes marking the original language name -- specified for a piece of source code. -originalLang :: String -> [(String, String)] +originalLang :: Text -> [(Text, Text)] originalLang lang = let transLang = translateLang lang in if transLang == lang @@ -66,7 +66,7 @@ originalLang lang = -- | Translate from Org-mode's programming language identifiers to those used -- by Pandoc. This is useful to allow for proper syntax highlighting in -- Pandoc output. -translateLang :: String -> String +translateLang :: Text -> Text translateLang cs = case cs of "C" -> "c" @@ -79,5 +79,5 @@ translateLang cs = "sqlite" -> "sql" _ -> cs -exportsCode :: [(String, String)] -> Bool +exportsCode :: [(Text, Text)] -> Bool exportsCode = maybe True (`elem` ["code", "both"]) . lookup "exports" diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 7e29caf28..d2fba4449 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -1,7 +1,8 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.RST Copyright : Copyright (C) 2006-2019 John MacFarlane @@ -19,9 +20,8 @@ import Control.Arrow (second) import Control.Monad (forM_, guard, liftM, mplus, mzero, when) import Control.Monad.Except (throwError) import Control.Monad.Identity (Identity (..)) -import Data.Char (isHexDigit, isSpace, toLower, toUpper, isAlphaNum) -import Data.List (deleteFirstsBy, elemIndex, intercalate, isInfixOf, isSuffixOf, - nub, sort, transpose) +import Data.Char (isHexDigit, isSpace, toUpper, isAlphaNum) +import Data.List (deleteFirstsBy, elemIndex, nub, sort, transpose) import qualified Data.Map as M import Data.Maybe (fromMaybe) import Data.Sequence (ViewR (..), viewr) @@ -47,16 +47,16 @@ import Text.Printf (printf) -- | Parse reStructuredText string and return Pandoc document. readRST :: PandocMonad m => ReaderOptions -- ^ Reader options - -> Text -- ^ String to parse (assuming @'\n'@ line endings) + -> Text -- ^ Text to parse (assuming @'\n'@ line endings) -> m Pandoc readRST opts s = do parsed <- readWithM parseRST def{ stateOptions = opts } - (T.unpack (crFilter s) ++ "\n\n") + (crFilter s <> "\n\n") case parsed of Right result -> return result Left e -> throwError e -type RSTParser m = ParserT [Char] ParserState m +type RSTParser m = ParserT Text ParserState m -- -- Constants and data structure definitions @@ -113,7 +113,7 @@ titleTransform (bs, meta) = metaFromDefList :: [([Inline], [[Block]])] -> Meta -> Meta metaFromDefList ds meta = adjustAuthors $ foldr f meta ds - where f (k,v) = setMeta (map toLower $ stringify k) (mconcat $ map fromList v) + where f (k,v) = setMeta (T.toLower $ stringify k) (mconcat $ map fromList v) adjustAuthors (Meta metamap) = Meta $ M.adjust splitAuthors "author" $ M.adjust toPlain "date" $ M.adjust toPlain "title" @@ -136,13 +136,13 @@ metaFromDefList ds meta = adjustAuthors $ foldr f meta ds isSp LineBreak = True isSp _ = False splitOnSemi = splitBy (==Str ";") - factorSemi (Str []) = [] - factorSemi (Str s) = case break (==';') s of - (xs,[]) -> [Str xs] - (xs,';':ys) -> Str xs : Str ";" : - factorSemi (Str ys) - (xs,ys) -> Str xs : - factorSemi (Str ys) + factorSemi (Str "") = [] + factorSemi (Str s) = case T.break (==';') s of + (xs,"") -> [Str xs] + (xs,T.uncons -> Just (';',ys)) -> Str xs : Str ";" : + factorSemi (Str ys) + (xs,ys) -> Str xs : + factorSemi (Str ys) factorSemi x = [x] parseRST :: PandocMonad m => RSTParser m Pandoc @@ -151,7 +151,7 @@ parseRST = do startPos <- getPosition -- go through once just to get list of reference keys and notes -- docMinusKeys is the raw document with blanks where the keys were... - docMinusKeys <- concat <$> + docMinusKeys <- T.concat <$> manyTill (referenceKey <|> anchorDef <|> noteBlock <|> citationBlock <|> (snd <$> withRaw comment) <|> @@ -180,7 +180,7 @@ parseRST = do return $ Pandoc meta' (blocks' ++ refBlock) parseCitation :: PandocMonad m - => (String, String) -> RSTParser m (Inlines, [Blocks]) + => (Text, Text) -> RSTParser m (Inlines, [Blocks]) parseCitation (ref, raw) = do contents <- parseFromString' parseBlocks raw return (B.spanWith (ref, ["citation-label"], []) (B.str ref), @@ -215,23 +215,23 @@ block = choice [ codeBlock -- field list -- -rawFieldListItem :: Monad m => Int -> RSTParser m (String, String) +rawFieldListItem :: Monad m => Int -> RSTParser m (Text, Text) rawFieldListItem minIndent = try $ do indent <- length <$> many (char ' ') guard $ indent >= minIndent char ':' - name <- many1Till (noneOf "\n") (char ':') + name <- many1TillChar (noneOf "\n") (char ':') (() <$ lookAhead newline) <|> skipMany1 spaceChar first <- anyLine rest <- option "" $ try $ do lookAhead (count indent (char ' ') >> spaceChar) indentedBlock - let raw = (if null first then "" else (first ++ "\n")) ++ rest ++ "\n" + let raw = (if T.null first then "" else (first <> "\n")) <> rest <> "\n" return (name, raw) fieldListItem :: PandocMonad m => Int -> RSTParser m (Inlines, [Blocks]) fieldListItem minIndent = try $ do (name, raw) <- rawFieldListItem minIndent - term <- parseInlineFromString name + term <- parseInlineFromText name contents <- parseFromString' parseBlocks raw optional blanklines return (term, [contents]) @@ -251,12 +251,12 @@ fieldList = try $ do lineBlock :: PandocMonad m => RSTParser m Blocks lineBlock = try $ do lines' <- lineBlockLines - lines'' <- mapM parseInlineFromString lines' + lines'' <- mapM parseInlineFromText lines' return $ B.lineBlock lines'' -lineBlockDirective :: PandocMonad m => String -> RSTParser m Blocks +lineBlockDirective :: PandocMonad m => Text -> RSTParser m Blocks lineBlockDirective body = do - lines' <- mapM parseInlineFromString $ lines $ stripTrailingNewlines body + lines' <- mapM parseInlineFromText $ T.lines $ stripTrailingNewlines body return $ B.lineBlock lines' -- @@ -271,9 +271,9 @@ para = try $ do newline blanklines case viewr (B.unMany result) of - ys :> Str xs | "::" `isSuffixOf` xs -> do + ys :> Str xs | "::" `T.isSuffixOf` xs -> do raw <- option mempty codeBlockBody - return $ B.para (B.Many ys <> B.str (take (length xs - 1) xs)) + return $ B.para (B.Many ys <> B.str (T.take (T.length xs - 1) xs)) <> raw _ -> return (B.para result) @@ -349,7 +349,7 @@ singleHeader' = try $ do -- hrule block -- -hrule :: Monad m => ParserT [Char] st m Blocks +hrule :: Monad m => ParserT Text st m Blocks hrule = try $ do chr <- oneOf underlineChars count 3 (char chr) @@ -364,7 +364,7 @@ hrule = try $ do -- read a line indented by a given string indentedLine :: (HasReaderOptions st, Monad m) - => Int -> ParserT [Char] st m [Char] + => Int -> ParserT Text st m Text indentedLine indents = try $ do lookAhead spaceChar gobbleAtMostSpaces indents @@ -373,29 +373,29 @@ indentedLine indents = try $ do -- one or more indented lines, possibly separated by blank lines. -- any amount of indentation will work. indentedBlock :: (HasReaderOptions st, Monad m) - => ParserT [Char] st m [Char] + => ParserT Text st m Text indentedBlock = try $ do indents <- length <$> lookAhead (many1 spaceChar) lns <- many1 $ try $ do b <- option "" blanklines l <- indentedLine indents - return (b ++ l) + return (b <> l) optional blanklines - return $ unlines lns + return $ T.unlines lns -quotedBlock :: Monad m => ParserT [Char] st m [Char] +quotedBlock :: Monad m => ParserT Text st m Text quotedBlock = try $ do quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" lns <- many1 $ lookAhead (char quote) >> anyLine optional blanklines - return $ unlines lns + return $ T.unlines lns -codeBlockStart :: Monad m => ParserT [Char] st m Char +codeBlockStart :: Monad m => ParserT Text st m Char codeBlockStart = string "::" >> blankline >> blankline -codeBlock :: (HasReaderOptions st, Monad m) => ParserT [Char] st m Blocks +codeBlock :: (HasReaderOptions st, Monad m) => ParserT Text st m Blocks codeBlock = try $ codeBlockStart >> codeBlockBody -codeBlockBody :: (HasReaderOptions st, Monad m) => ParserT [Char] st m Blocks +codeBlockBody :: (HasReaderOptions st, Monad m) => ParserT Text st m Blocks codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$> (indentedBlock <|> quotedBlock) @@ -407,24 +407,24 @@ lhsCodeBlock = try $ do lns <- latexCodeBlock <|> birdCodeBlock blanklines return $ B.codeBlockWith ("", ["haskell","literate"], []) - $ intercalate "\n" lns + $ T.intercalate "\n" lns -latexCodeBlock :: Monad m => ParserT [Char] st m [[Char]] +latexCodeBlock :: Monad m => ParserT Text st m [Text] latexCodeBlock = try $ do try (latexBlockLine "\\begin{code}") many1Till anyLine (try $ latexBlockLine "\\end{code}") where latexBlockLine s = skipMany spaceChar >> string s >> blankline -birdCodeBlock :: Monad m => ParserT [Char] st m [[Char]] +birdCodeBlock :: Monad m => ParserT Text st m [Text] birdCodeBlock = filterSpace <$> many1 birdTrackLine where filterSpace lns = -- if (as is normal) there is always a space after >, drop it - if all (\ln -> null ln || take 1 ln == " ") lns - then map (drop 1) lns + if all (\ln -> T.null ln || T.take 1 ln == " ") lns + then map (T.drop 1) lns else lns -birdTrackLine :: Monad m => ParserT [Char] st m [Char] +birdTrackLine :: Monad m => ParserT Text st m Text birdTrackLine = char '>' >> anyLine -- @@ -435,7 +435,7 @@ blockQuote :: PandocMonad m => RSTParser m Blocks blockQuote = do raw <- indentedBlock -- parse the extracted block, which may contain various block elements: - contents <- parseFromString' parseBlocks $ raw ++ "\n\n" + contents <- parseFromString' parseBlocks $ raw <> "\n\n" return $ B.blockQuote contents {- @@ -445,12 +445,12 @@ encoding -} includeDirective :: PandocMonad m - => String -> [(String, String)] -> String + => Text -> [(Text, Text)] -> Text -> RSTParser m Blocks includeDirective top fields body = do let f = trim top - guard $ not (null f) - guard $ null (trim body) + guard $ not (T.null f) + guard $ T.null (trim body) -- options let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead @@ -458,11 +458,11 @@ includeDirective top fields body = do oldInput <- getInput containers <- stateContainers <$> getState when (f `elem` containers) $ - throwError $ PandocParseError $ "Include file loop at " ++ show oldPos + throwError $ PandocParseError $ "Include file loop at " <> tshow oldPos updateState $ \s -> s{ stateContainers = f : stateContainers s } - mbContents <- readFileFromDirs ["."] f + mbContents <- readFileFromDirs ["."] $ T.unpack f contentLines <- case mbContents of - Just s -> return $ lines s + Just s -> return $ T.lines s Nothing -> do logMessage $ CouldNotLoadIncludeFile f oldPos return [] @@ -478,23 +478,23 @@ includeDirective top fields body = do let contentLines' = drop (startLine' - 1) $ take (endLine' - 1) contentLines let contentLines'' = (case trim <$> lookup "end-before" fields of - Just patt -> takeWhile (not . (patt `isInfixOf`)) + Just patt -> takeWhile (not . (patt `T.isInfixOf`)) Nothing -> id) . (case trim <$> lookup "start-after" fields of Just patt -> drop 1 . - dropWhile (not . (patt `isInfixOf`)) + dropWhile (not . (patt `T.isInfixOf`)) Nothing -> id) $ contentLines' - let contents' = unlines contentLines'' ++ "\n" + let contents' = T.unlines contentLines'' <> "\n" case lookup "code" fields of Just lang -> do let numberLines = lookup "number-lines" fields - let classes = maybe [] words (lookup "class" fields) + let classes = maybe [] T.words (lookup "class" fields) let ident = maybe "" trimr $ lookup "name" fields codeblock ident classes numberLines (trimr lang) contents' False Nothing -> case lookup "literal" fields of Just _ -> return $ B.rawBlock "rst" contents' Nothing -> do - setPosition $ newPos f 1 1 + setPosition $ newPos (T.unpack f) 1 1 setInput contents' bs <- optional blanklines >> (mconcat <$> many block) @@ -519,14 +519,14 @@ definitionListItem = try $ do term <- trimInlines . mconcat <$> many1Till inline endline raw <- indentedBlock -- parse the extracted block, which may contain various block elements: - contents <- parseFromString' parseBlocks $ raw ++ "\n" + contents <- parseFromString' parseBlocks $ raw <> "\n" return (term, [contents]) definitionList :: PandocMonad m => RSTParser m Blocks definitionList = B.definitionList <$> many1 definitionListItem -- parses bullet list start and returns its length (inc. following whitespace) -bulletListStart :: Monad m => ParserT [Char] st m Int +bulletListStart :: Monad m => ParserT Text st m Int bulletListStart = try $ do notFollowedBy' hrule -- because hrules start out just like lists marker <- oneOf bulletListMarkers @@ -543,7 +543,7 @@ orderedListStart style delim = try $ do return $ markerLen + length white -- parse a line of a list item -listLine :: Monad m => Int -> RSTParser m [Char] +listLine :: Monad m => Int -> RSTParser m Text listLine markerLength = try $ do notFollowedBy blankline indentWith markerLength @@ -551,21 +551,21 @@ listLine markerLength = try $ do -- parse raw text for one list item, excluding start marker and continuations rawListItem :: Monad m => RSTParser m Int - -> RSTParser m (Int, [Char]) + -> RSTParser m (Int, Text) rawListItem start = try $ do markerLength <- start firstLine <- anyLineNewline restLines <- many (listLine markerLength) - return (markerLength, firstLine ++ concat restLines) + return (markerLength, firstLine <> T.concat restLines) -- continuation of a list item - indented and separated by blankline or -- (in compact lists) endline. -- Note: nested lists are parsed as continuations. -listContinuation :: Monad m => Int -> RSTParser m [Char] +listContinuation :: Monad m => Int -> RSTParser m Text listContinuation markerLength = try $ do - blanks <- many1 blankline + blanks <- many1Char blankline result <- many1 (listLine markerLength) - return $ blanks ++ concat result + return $ blanks <> T.concat result listItem :: PandocMonad m => RSTParser m Int @@ -581,7 +581,7 @@ listItem start = try $ do let oldContext = stateParserContext state setState $ state {stateParserContext = ListItemState} -- parse the extracted block, which may itself contain block elements - parsed <- parseFromString' parseBlocks $ concat (first:rest) ++ "\n" + parsed <- parseFromString' parseBlocks $ T.concat (first:rest) <> "\n" updateState (\st -> st {stateParserContext = oldContext}) return $ case B.toList parsed of [Para xs] -> @@ -617,9 +617,9 @@ comment = try $ do optional indentedBlock return mempty -directiveLabel :: Monad m => RSTParser m String -directiveLabel = map toLower - <$> many1Till (letter <|> char '-') (try $ string "::") +directiveLabel :: Monad m => RSTParser m Text +directiveLabel = T.toLower + <$> many1TillChar (letter <|> char '-') (try $ string "::") directive :: PandocMonad m => RSTParser m Blocks directive = try $ do @@ -631,7 +631,7 @@ directive' = do skipMany1 spaceChar label <- directiveLabel skipMany spaceChar - top <- many $ satisfy (/='\n') + top <- manyChar $ satisfy (/='\n') <|> try (char '\n' <* notFollowedBy' (rawFieldListItem 1) <* many1 (char ' ') <* @@ -644,35 +644,33 @@ directive' = do else many $ rawFieldListItem fieldIndent body <- option "" $ try $ blanklines >> indentedBlock optional blanklines - let body' = body ++ "\n\n" + let body' = body <> "\n\n" name = trim $ fromMaybe "" (lookup "name" fields) - classes = words $ maybe "" trim (lookup "class" fields) + classes = T.words $ maybe "" trim (lookup "class" fields) keyvals = [(k, trim v) | (k, v) <- fields, k /= "name", k /= "class"] imgAttr cl = (name, classes ++ alignClasses, widthAttr ++ heightAttr) where - alignClasses = words $ maybe "" trim (lookup cl fields) ++ - maybe "" (\x -> "align-" ++ trim x) + alignClasses = T.words $ maybe "" trim (lookup cl fields) <> + maybe "" (\x -> "align-" <> trim x) (lookup "align" fields) scale = case trim <$> lookup "scale" fields of - Just v -> case reverse v of - '%':vv -> - case safeRead (reverse vv) of - Just (percent :: Double) - -> percent / 100.0 - Nothing -> 1.0 - _ -> - case safeRead v of - Just (s :: Double) -> s - Nothing -> 1.0 - Nothing -> 1.0 + Just v -> case T.unsnoc v of + Just (vv, '%') -> case safeRead vv of + Just (percent :: Double) + -> percent / 100.0 + Nothing -> 1.0 + _ -> case safeRead v of + Just (s :: Double) -> s + Nothing -> 1.0 + Nothing -> 1.0 widthAttr = maybe [] (\x -> [("width", - show $ scaleDimension scale x)]) + tshow $ scaleDimension scale x)]) $ lookup "width" fields >>= - (lengthToDim . filter (not . isSpace)) + (lengthToDim . T.filter (not . isSpace)) heightAttr = maybe [] (\x -> [("height", - show $ scaleDimension scale x)]) + tshow $ scaleDimension scale x)]) $ lookup "height" fields >>= - (lengthToDim . filter (not . isSpace)) + (lengthToDim . T.filter (not . isSpace)) case label of "include" -> includeDirective top fields body' "table" -> tableDirective top fields body' @@ -682,36 +680,37 @@ directive' = do "raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body) "role" -> addNewRole top $ map (second trim) fields "container" -> B.divWith - (name, "container" : words top ++ classes, []) <$> + (name, "container" : T.words top ++ classes, []) <$> parseFromString' parseBlocks body' "replace" -> B.para <$> -- consumed by substKey - parseInlineFromString (trim top) + parseInlineFromText (trim top) "unicode" -> B.para <$> -- consumed by substKey - parseInlineFromString (trim $ unicodeTransform top) + parseInlineFromText (trim $ unicodeTransform top) "compound" -> parseFromString' parseBlocks body' "pull-quote" -> B.blockQuote <$> parseFromString' parseBlocks body' "epigraph" -> B.blockQuote <$> parseFromString' parseBlocks body' "highlights" -> B.blockQuote <$> parseFromString' parseBlocks body' - "rubric" -> B.para . B.strong <$> parseInlineFromString top + "rubric" -> B.para . B.strong <$> parseInlineFromText top _ | label `elem` ["attention","caution","danger","error","hint", "important","note","tip","warning","admonition"] -> - do bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body' + do bod <- parseFromString' parseBlocks $ top <> "\n\n" <> body' let lab = case label of - "admonition" -> mempty - (l:ls) -> B.divWith ("",["title"],[]) - (B.para (B.str (toUpper l : ls))) - [] -> mempty + "admonition" -> mempty + (T.uncons -> Just (l, ls)) + -> B.divWith ("",["title"],[]) + (B.para (B.str $ T.cons (toUpper l) ls)) + _ -> mempty return $ B.divWith (name,label:classes,keyvals) (lab <> bod) "sidebar" -> do let subtit = maybe "" trim $ lookup "subtitle" fields - tit <- B.para . B.strong <$> parseInlineFromString - (trim top ++ if null subtit + tit <- B.para . B.strong <$> parseInlineFromText + (trim top <> if T.null subtit then "" - else (": " ++ subtit)) + else (": " <> subtit)) bod <- parseFromString' parseBlocks body' return $ B.divWith (name,"sidebar":classes,keyvals) $ tit <> bod "topic" -> - do tit <- B.para . B.strong <$> parseInlineFromString top + do tit <- B.para . B.strong <$> parseInlineFromText top bod <- parseFromString' parseBlocks body' return $ B.divWith (name,"topic":classes,keyvals) $ tit <> bod "default-role" -> mempty <$ updateState (\s -> @@ -726,7 +725,7 @@ directive' = do let attribs = (name, ["aafig"], map (second trimr) fields) return $ B.codeBlockWith attribs $ stripTrailingNewlines body "math" -> return $ B.para $ mconcat $ map B.displayMath - $ toChunks $ top ++ "\n\n" ++ body + $ toChunks $ top <> "\n\n" <> body "figure" -> do (caption, legend) <- parseFromString' extractCaption body' let src = escapeURI $ trim top @@ -742,7 +741,7 @@ directive' = do $ B.imageWith attr src "" alt Nothing -> B.imageWith attr src "" alt "class" -> do - let attrs = (name, words (trim top), map (second trimr) fields) + let attrs = (name, T.words (trim top), map (second trimr) fields) -- directive content or the first immediately following element children <- case body of "" -> block @@ -750,12 +749,12 @@ directive' = do return $ B.divWith attrs children other -> do pos <- getPosition - logMessage $ SkippedContent (".. " ++ other) pos - bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body' + logMessage $ SkippedContent (".. " <> other) pos + bod <- parseFromString' parseBlocks $ top <> "\n\n" <> body' return $ B.divWith (name, other:classes, keyvals) bod tableDirective :: PandocMonad m - => String -> [(String, String)] -> String -> RSTParser m Blocks + => Text -> [(Text, Text)] -> Text -> RSTParser m Blocks tableDirective top fields body = do bs <- parseFromString' parseBlocks body case B.toList bs of @@ -770,7 +769,7 @@ tableDirective top fields body = do Just "grid" -> widths' Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) - $ splitBy (`elem` (" ," :: String)) specs + $ splitTextBy (`elem` (" ," :: String)) specs Nothing -> widths' -- align is not applicable since we can't represent whole table align return $ B.singleton $ Table (B.toList title) @@ -783,7 +782,7 @@ tableDirective top fields body = do -- since Pandoc doesn't support a table with multiple header rows. -- We don't need to parse :align: as it represents the whole table align. listTableDirective :: PandocMonad m - => String -> [(String, String)] -> String + => Text -> [(Text, Text)] -> Text -> RSTParser m Blocks listTableDirective top fields body = do bs <- parseFromString' parseBlocks body @@ -799,7 +798,7 @@ listTableDirective top fields body = do widths = case trim <$> lookup "widths" fields of Just "auto" -> replicate numOfCols 0 Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) $ - splitBy (`elem` (" ," :: String)) specs + splitTextBy (`elem` (" ," :: String)) specs _ -> replicate numOfCols 0 return $ B.table title (zip (replicate numOfCols AlignDefault) widths) @@ -812,7 +811,7 @@ listTableDirective top fields body = do normWidths ws = map (/ max 1 (sum ws)) ws csvTableDirective :: PandocMonad m - => String -> [(String, String)] -> String + => Text -> [(Text, Text)] -> Text -> RSTParser m Blocks csvTableDirective top fields rawcsv = do let explicitHeader = trim <$> lookup "header" fields @@ -820,14 +819,17 @@ csvTableDirective top fields rawcsv = do csvDelim = case trim <$> lookup "delim" fields of Just "tab" -> '\t' Just "space" -> ' ' - Just [c] -> c + Just (T.unpack -> [c]) + -> c _ -> ',' , csvQuote = case trim <$> lookup "quote" fields of - Just [c] -> c - _ -> '"' + Just (T.unpack -> [c]) + -> c + _ -> '"' , csvEscape = case trim <$> lookup "escape" fields of - Just [c] -> Just c - _ -> Nothing + Just (T.unpack -> [c]) + -> Just c + _ -> Nothing , csvKeepSpace = case trim <$> lookup "keepspace" fields of Just "true" -> True _ -> False @@ -840,16 +842,16 @@ csvTableDirective top fields rawcsv = do lookup "file" fields `mplus` lookup "url" fields of Just u -> do (bs, _) <- fetchItem u - return $ UTF8.toString bs + return $ UTF8.toText bs Nothing -> return rawcsv - let res = parseCSV opts (T.pack $ case explicitHeader of - Just h -> h ++ "\n" ++ rawcsv' - Nothing -> rawcsv') + let res = parseCSV opts (case explicitHeader of + Just h -> h <> "\n" <> rawcsv' + Nothing -> rawcsv') case res of Left e -> throwError $ PandocParsecError "csv table" e Right rawrows -> do - let parseCell = parseFromString' (plain <|> return mempty) . T.unpack + let parseCell = parseFromString' (plain <|> return mempty) let parseRow = mapM parseCell rows <- mapM parseRow rawrows let (headerRow,bodyRows,numOfCols) = @@ -865,7 +867,7 @@ csvTableDirective top fields rawcsv = do Just "auto" -> replicate numOfCols 0 Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) - $ splitBy (`elem` (" ," :: String)) specs + $ splitTextBy (`elem` (" ," :: String)) specs _ -> replicate numOfCols 0 return $ B.table title (zip (replicate numOfCols AlignDefault) widths) @@ -876,10 +878,10 @@ csvTableDirective top fields rawcsv = do -- - Only supports :format: fields with a single format for :raw: roles, -- change Text.Pandoc.Definition.Format to fix addNewRole :: PandocMonad m - => String -> [(String, String)] -> RSTParser m Blocks -addNewRole roleString fields = do + => Text -> [(Text, Text)] -> RSTParser m Blocks +addNewRole roleText fields = do pos <- getPosition - (role, parentRole) <- parseFromString' inheritedRole roleString + (role, parentRole) <- parseFromString' inheritedRole roleText customRoles <- stateRstCustomRoles <$> getState let getBaseRole (r, f, a) roles = case M.lookup r roles of @@ -888,7 +890,7 @@ addNewRole roleString fields = do (baseRole, baseFmt, baseAttr) = getBaseRole (parentRole, Nothing, nullAttr) customRoles fmt = if parentRole == "raw" then lookup "format" fields else baseFmt - annotate :: [String] -> [String] + annotate :: [Text] -> [Text] annotate = maybe id (:) $ if baseRole == "code" then lookup "language" fields @@ -904,7 +906,7 @@ addNewRole roleString fields = do pos "format" -> when (baseRole /= "raw") $ logMessage $ SkippedContent ":format: [because parent of role is not :raw:]" pos - _ -> logMessage $ SkippedContent (":" ++ key ++ ":") pos + _ -> logMessage $ SkippedContent (":" <> key <> ":") pos when (parentRole == "raw" && countKeys "format" > 1) $ logMessage $ SkippedContent ":format: [after first in definition of role]" @@ -930,30 +932,29 @@ addNewRole roleString fields = do -- hexadecimal numbers, prefixed by 0x, x, \x, U+, u, or \u -- or as XML-style hexadecimal character entities, e.g. ᨫ -- or text, which is used as-is. Comments start with .. -unicodeTransform :: String -> String -unicodeTransform t = - case t of - ('.':'.':xs) -> unicodeTransform $ dropWhile (/='\n') xs -- comment - ('0':'x':xs) -> go "0x" xs - ('x':xs) -> go "x" xs - ('\\':'x':xs) -> go "\\x" xs - ('U':'+':xs) -> go "U+" xs - ('u':xs) -> go "u" xs - ('\\':'u':xs) -> go "\\u" xs - ('&':'#':'x':xs) -> maybe ("&#x" ++ unicodeTransform xs) - -- drop semicolon - (\(c,s) -> c : unicodeTransform (drop 1 s)) - $ extractUnicodeChar xs - (x:xs) -> x : unicodeTransform xs - [] -> [] - where go pref zs = maybe (pref ++ unicodeTransform zs) - (\(c,s) -> c : unicodeTransform s) - $ extractUnicodeChar zs - -extractUnicodeChar :: String -> Maybe (Char, String) +unicodeTransform :: Text -> Text +unicodeTransform t + | Just xs <- T.stripPrefix ".." t = unicodeTransform $ T.dropWhile (/= '\n') xs -- comment + | Just xs <- T.stripPrefix "0x" t = go "0x" xs + | Just xs <- T.stripPrefix "x" t = go "x" xs + | Just xs <- T.stripPrefix "\\x" t = go "\\x" xs + | Just xs <- T.stripPrefix "U+" t = go "U+" xs + | Just xs <- T.stripPrefix "u" t = go "u" xs + | Just xs <- T.stripPrefix "\\u" t = go "\\u" xs + | Just xs <- T.stripPrefix "&#x" t = maybe ("&#x" <> unicodeTransform xs) + -- drop semicolon + (\(c,s) -> T.cons c $ unicodeTransform $ T.drop 1 s) + $ extractUnicodeChar xs + | Just (x, xs) <- T.uncons t = T.cons x $ unicodeTransform xs + | otherwise = "" + where go pref zs = maybe (pref <> unicodeTransform zs) + (\(c,s) -> T.cons c $ unicodeTransform s) + $ extractUnicodeChar zs + +extractUnicodeChar :: Text -> Maybe (Char, Text) extractUnicodeChar s = maybe Nothing (\c -> Just (c,rest)) mbc - where (ds,rest) = span isHexDigit s - mbc = safeRead ('\'':'\\':'x':ds ++ "'") + where (ds,rest) = T.span isHexDigit s + mbc = safeRead ("'\\x" <> ds <> "'") extractCaption :: PandocMonad m => RSTParser m (Inlines, Blocks) extractCaption = do @@ -963,16 +964,16 @@ extractCaption = do -- divide string by blanklines, and surround with -- \begin{aligned}...\end{aligned} if needed. -toChunks :: String -> [String] -toChunks = dropWhile null - . map (addAligned . trim . unlines) - . splitBy (all (`elem` (" \t" :: String))) . lines +toChunks :: Text -> [Text] +toChunks = dropWhile T.null + . map (addAligned . trim . T.unlines) + . splitBy (T.all (`elem` (" \t" :: String))) . T.lines -- we put this in an aligned environment if it contains \\, see #4254 - where addAligned s = if "\\\\" `isInfixOf` s - then "\\begin{aligned}\n" ++ s ++ "\n\\end{aligned}" + where addAligned s = if "\\\\" `T.isInfixOf` s + then "\\begin{aligned}\n" <> s <> "\n\\end{aligned}" else s -codeblock :: String -> [String] -> Maybe String -> String -> String -> Bool +codeblock :: Text -> [Text] -> Maybe Text -> Text -> Text -> Bool -> RSTParser m Blocks codeblock ident classes numberLines lang body rmTrailingNewlines = return $ B.codeBlockWith attribs $ stripTrailingNewlines' body @@ -984,7 +985,7 @@ codeblock ident classes numberLines lang body rmTrailingNewlines = : maybe [] (const ["numberLines"]) numberLines ++ classes kvs = maybe [] (\n -> case trimr n of - [] -> [] + "" -> [] xs -> [("startFrom", xs)]) numberLines @@ -992,25 +993,25 @@ codeblock ident classes numberLines lang body rmTrailingNewlines = --- note block --- -noteBlock :: Monad m => RSTParser m [Char] +noteBlock :: Monad m => RSTParser m Text noteBlock = try $ do (ref, raw, replacement) <- noteBlock' noteMarker updateState $ \s -> s { stateNotes = (ref, raw) : stateNotes s } -- return blanks so line count isn't affected return replacement -citationBlock :: Monad m => RSTParser m [Char] +citationBlock :: Monad m => RSTParser m Text citationBlock = try $ do (ref, raw, replacement) <- noteBlock' citationMarker updateState $ \s -> s { stateCitations = M.insert ref raw (stateCitations s), - stateKeys = M.insert (toKey ref) (('#':ref,""), ("",["citation"],[])) + stateKeys = M.insert (toKey ref) (("#" <> ref,""), ("",["citation"],[])) (stateKeys s) } -- return blanks so line count isn't affected return replacement noteBlock' :: Monad m - => RSTParser m String -> RSTParser m (String, String, String) + => RSTParser m Text -> RSTParser m (Text, Text, Text) noteBlock' marker = try $ do startPos <- getPosition string ".." @@ -1021,24 +1022,24 @@ noteBlock' marker = try $ do blanks <- option "" blanklines rest <- option "" indentedBlock endPos <- getPosition - let raw = first ++ "\n" ++ blanks ++ rest ++ "\n" - let replacement =replicate (sourceLine endPos - sourceLine startPos) '\n' + let raw = first <> "\n" <> blanks <> rest <> "\n" + let replacement = T.replicate (sourceLine endPos - sourceLine startPos) "\n" return (ref, raw, replacement) -citationMarker :: Monad m => RSTParser m [Char] +citationMarker :: Monad m => RSTParser m Text citationMarker = do char '[' res <- simpleReferenceName char ']' return res -noteMarker :: Monad m => RSTParser m [Char] +noteMarker :: Monad m => RSTParser m Text noteMarker = do char '[' - res <- many1 digit + res <- many1Char digit <|> - try (char '#' >> liftM ('#':) simpleReferenceName) - <|> count 1 (oneOf "#*") + try (char '#' >> liftM ("#" <>) simpleReferenceName) + <|> countChar 1 (oneOf "#*") char ']' return res @@ -1046,47 +1047,48 @@ noteMarker = do -- reference key -- -quotedReferenceName :: PandocMonad m => RSTParser m String +quotedReferenceName :: PandocMonad m => RSTParser m Text quotedReferenceName = try $ do char '`' >> notFollowedBy (char '`') -- `` means inline code! - manyTill anyChar (char '`') + manyTillChar anyChar (char '`') -- Simple reference names are single words consisting of alphanumerics -- plus isolated (no two adjacent) internal hyphens, underscores, -- periods, colons and plus signs; no whitespace or other characters -- are allowed. -simpleReferenceName :: Monad m => ParserT [Char] st m String +simpleReferenceName :: Monad m => ParserT Text st m Text simpleReferenceName = do x <- alphaNum xs <- many $ alphaNum <|> try (oneOf "-_:+." <* lookAhead alphaNum) - return (x:xs) + return $ T.pack (x:xs) -referenceName :: PandocMonad m => RSTParser m String +referenceName :: PandocMonad m => RSTParser m Text referenceName = quotedReferenceName <|> simpleReferenceName -referenceKey :: PandocMonad m => RSTParser m [Char] +referenceKey :: PandocMonad m => RSTParser m Text referenceKey = do startPos <- getPosition choice [substKey, anonymousKey, regularKey] optional blanklines endPos <- getPosition -- return enough blanks to replace key - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' + return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n" -targetURI :: Monad m => ParserT [Char] st m [Char] +targetURI :: Monad m => ParserT Text st m Text targetURI = do skipSpaces optional $ try $ newline >> notFollowedBy blankline contents <- trim <$> - many1 (satisfy (/='\n') + many1Char (satisfy (/='\n') <|> try (newline >> many1 spaceChar >> noneOf " \t\n")) blanklines - case reverse contents of - -- strip backticks - '_':'`':xs -> return (dropWhile (=='`') (reverse xs) ++ "_") - '_':_ -> return contents - _ -> return (escapeURI contents) + return $ stripBackticks contents + where + stripBackticks t + | Just xs <- T.stripSuffix "`_" t = T.dropWhile (=='`') xs <> "_" + | Just _ <- T.stripSuffix "_" t = t + | otherwise = escapeURI t substKey :: PandocMonad m => RSTParser m () substKey = try $ do @@ -1112,21 +1114,21 @@ anonymousKey = try $ do oneOfStrings [".. __:", "__"] src <- targetURI pos <- getPosition - let key = toKey $ "_" ++ printf "%09d" (sourceLine pos) + let key = toKey $ "_" <> T.pack (printf "%09d" (sourceLine pos)) updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } -referenceNames :: PandocMonad m => RSTParser m [String] +referenceNames :: PandocMonad m => RSTParser m [Text] referenceNames = do let rn = try $ do string ".. _" ref <- quotedReferenceName - <|> many ( noneOf ":\n" - <|> try (char '\n' <* - string " " <* - notFollowedBy blankline) - <|> try (char ':' <* lookAhead alphaNum) - ) + <|> manyChar ( noneOf ":\n" + <|> try (char '\n' <* + string " " <* + notFollowedBy blankline) + <|> try (char ':' <* lookAhead alphaNum) + ) char ':' return ref first <- rn @@ -1140,18 +1142,18 @@ regularKey = try $ do -- .. _goodbye: url.com refs <- referenceNames src <- targetURI - guard $ not (null src) + guard $ not (T.null src) let keys = map toKey refs forM_ keys $ \key -> updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } -anchorDef :: PandocMonad m => RSTParser m [Char] +anchorDef :: PandocMonad m => RSTParser m Text anchorDef = try $ do (refs, raw) <- withRaw $ try (referenceNames <* blanklines) forM_ refs $ \rawkey -> updateState $ \s -> s { stateKeys = - M.insert (toKey rawkey) (('#':rawkey,""), nullAttr) $ stateKeys s } + M.insert (toKey rawkey) (("#" <> rawkey,""), nullAttr) $ stateKeys s } -- keep this for 2nd round of parsing, where we'll add the divs (anchor) return raw @@ -1174,12 +1176,12 @@ anchor = try $ do -- because it hides them from promoteHeader, see #4240 _ -> return $ foldr addDiv b refs -headerBlock :: PandocMonad m => RSTParser m [Char] +headerBlock :: PandocMonad m => RSTParser m Text headerBlock = do ((txt, _), raw) <- withRaw (doubleHeader' <|> singleHeader') (ident,_,_) <- registerHeader nullAttr txt let key = toKey (stringify txt) - updateState $ \s -> s { stateKeys = M.insert key (('#':ident,""), nullAttr) + updateState $ \s -> s { stateKeys = M.insert key (("#" <> ident,""), nullAttr) $ stateKeys s } return raw @@ -1201,13 +1203,13 @@ headerBlock = do -- Grid tables TODO: -- - column spans -dashedLine :: Monad m => Char -> ParserT [Char] st m (Int, Int) +dashedLine :: Monad m => Char -> ParserT Text st m (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many (char ' ') return (length dashes, length $ dashes ++ sp) -simpleDashedLines :: Monad m => Char -> ParserT [Char] st m [(Int,Int)] +simpleDashedLines :: Monad m => Char -> ParserT Text st m [(Int,Int)] simpleDashedLines ch = try $ many1 (dashedLine ch) -- Parse a table row separator @@ -1215,17 +1217,17 @@ simpleTableSep :: Monad m => Char -> RSTParser m Char simpleTableSep ch = try $ simpleDashedLines ch >> newline -- Parse a table footer -simpleTableFooter :: Monad m => RSTParser m [Char] +simpleTableFooter :: Monad m => RSTParser m Text simpleTableFooter = try $ simpleTableSep '=' >> blanklines -- Parse a raw line and split it into chunks by indices. -simpleTableRawLine :: Monad m => [Int] -> RSTParser m [String] +simpleTableRawLine :: Monad m => [Int] -> RSTParser m [Text] simpleTableRawLine indices = simpleTableSplitLine indices <$> anyLine -simpleTableRawLineWithEmptyCell :: Monad m => [Int] -> RSTParser m [String] +simpleTableRawLineWithEmptyCell :: Monad m => [Int] -> RSTParser m [Text] simpleTableRawLineWithEmptyCell indices = try $ do cs <- simpleTableRawLine indices - let isEmptyCell = all (\c -> c == ' ' || c == '\t') + let isEmptyCell = T.all (\c -> c == ' ' || c == '\t') guard $ any isEmptyCell cs return cs @@ -1235,15 +1237,15 @@ simpleTableRow indices = do notFollowedBy' simpleTableFooter firstLine <- simpleTableRawLine indices conLines <- many $ simpleTableRawLineWithEmptyCell indices - let cols = map unlines . transpose $ firstLine : conLines ++ - [replicate (length indices) "" - | not (null conLines)] + let cols = map T.unlines . transpose $ firstLine : conLines ++ + [replicate (length indices) "" + | not (null conLines)] mapM (parseFromString' parseBlocks) cols -simpleTableSplitLine :: [Int] -> String -> [String] +simpleTableSplitLine :: [Int] -> Text -> [Text] simpleTableSplitLine indices line = map trimr - $ tail $ splitByIndices (init indices) line + $ tail $ splitTextByIndices (init indices) line simpleTableHeader :: PandocMonad m => Bool -- ^ Headerless table @@ -1322,35 +1324,35 @@ inlineContent = choice [ whitespace , escapedChar , symbol ] <?> "inline content" -parseInlineFromString :: PandocMonad m => String -> RSTParser m Inlines -parseInlineFromString = parseFromString' (trimInlines . mconcat <$> many inline) +parseInlineFromText :: PandocMonad m => Text -> RSTParser m Inlines +parseInlineFromText = parseFromString' (trimInlines . mconcat <$> many inline) hyphens :: Monad m => RSTParser m Inlines hyphens = do - result <- many1 (char '-') + result <- many1Char (char '-') optional endline -- don't want to treat endline after hyphen or dash as a space return $ B.str result -escapedChar :: Monad m => ParserT [Char] st m Inlines +escapedChar :: Monad m => ParserT Text st m Inlines escapedChar = do c <- escaped anyChar return $ if c == ' ' || c == '\n' || c == '\r' -- '\ ' is null in RST then mempty - else B.str [c] + else B.str $ T.singleton c symbol :: Monad m => RSTParser m Inlines symbol = do result <- oneOf specialChars - return $ B.str [result] + return $ B.str $ T.singleton result -- parses inline code, between codeStart and codeEnd code :: Monad m => RSTParser m Inlines code = try $ do string "``" - result <- manyTill anyChar (try (string "``")) + result <- manyTillChar anyChar (try (string "``")) return $ B.code - $ trim $ unwords $ lines result + $ trim $ T.unwords $ T.lines result -- succeeds only if we're not right after a str (ie. in middle of word) atStart :: Monad m => RSTParser m a -> RSTParser m a @@ -1382,7 +1384,7 @@ interpretedRole = try $ do renderRole contents Nothing role nullAttr renderRole :: PandocMonad m - => String -> Maybe String -> String -> Attr -> RSTParser m Inlines + => Text -> Maybe Text -> Text -> Attr -> RSTParser m Inlines renderRole contents fmt role attr = case role of "sup" -> return $ B.superscript $ treatAsText contents "superscript" -> return $ B.superscript $ treatAsText contents @@ -1412,36 +1414,36 @@ renderRole contents fmt role attr = case role of contents where titleRef ref = return $ B.spanWith ("",["title-ref"],[]) $ treatAsText ref - rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo) - where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html" - pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo) - where padNo = replicate (4 - length pepNo) '0' ++ pepNo - pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/" + rfcLink rfcNo = B.link rfcUrl ("RFC " <> rfcNo) $ B.str ("RFC " <> rfcNo) + where rfcUrl = "http://www.faqs.org/rfcs/rfc" <> rfcNo <> ".html" + pepLink pepNo = B.link pepUrl ("PEP " <> pepNo) $ B.str ("PEP " <> pepNo) + where padNo = T.replicate (4 - T.length pepNo) "0" <> pepNo + pepUrl = "http://www.python.org/dev/peps/pep-" <> padNo <> "/" treatAsText = B.text . handleEscapes - handleEscapes [] = [] - handleEscapes ('\\':' ':cs) = handleEscapes cs - handleEscapes ('\\':c:cs) = c : handleEscapes cs - handleEscapes (c:cs) = c : handleEscapes cs + handleEscapes = T.concat . removeSpace . T.splitOn "\\" + where headSpace t = fromMaybe t $ T.stripPrefix " " t + removeSpace (x:xs) = x : map headSpace xs + removeSpace [] = [] -roleName :: PandocMonad m => RSTParser m String -roleName = many1 (letter <|> char '-') +roleName :: PandocMonad m => RSTParser m Text +roleName = many1Char (letter <|> char '-') -roleMarker :: PandocMonad m => RSTParser m String +roleMarker :: PandocMonad m => RSTParser m Text roleMarker = char ':' *> roleName <* char ':' -roleBefore :: PandocMonad m => RSTParser m (String,String) +roleBefore :: PandocMonad m => RSTParser m (Text,Text) roleBefore = try $ do role <- roleMarker contents <- unmarkedInterpretedText return (role,contents) -roleAfter :: PandocMonad m => RSTParser m (String,String) +roleAfter :: PandocMonad m => RSTParser m (Text,Text) roleAfter = try $ do contents <- unmarkedInterpretedText role <- roleMarker <|> (stateRstDefaultRole <$> getState) return (role,contents) -unmarkedInterpretedText :: PandocMonad m => RSTParser m [Char] +unmarkedInterpretedText :: PandocMonad m => RSTParser m Text unmarkedInterpretedText = try $ do atStart (char '`') contents <- mconcat <$> (many1 @@ -1453,7 +1455,7 @@ unmarkedInterpretedText = try $ do lookAhead (satisfy isAlphaNum)) )) char '`' - return contents + return $ T.pack contents whitespace :: PandocMonad m => RSTParser m Inlines whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace" @@ -1461,7 +1463,7 @@ whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace" str :: Monad m => RSTParser m Inlines str = do let strChar = noneOf ("\t\n " ++ specialChars) - result <- many1 strChar + result <- many1Char strChar updateLastStrPos return $ B.str result @@ -1489,7 +1491,7 @@ explicitLink = try $ do notFollowedBy (char '`') -- `` marks start of inline code label' <- trimInlines . mconcat <$> manyTill (notFollowedBy (char '`') >> inlineContent) (char '<') - src <- trim <$> manyTill (noneOf ">\n") (char '>') + src <- trim <$> manyTillChar (noneOf ">\n") (char '>') skipSpaces string "`_" optional $ char '_' -- anonymous form @@ -1501,22 +1503,22 @@ explicitLink = try $ do if isURI src then return ((src, ""), nullAttr) else - case reverse src of - '_':xs -> lookupKey [] (toKey (reverse xs)) - _ -> return ((src, ""), nullAttr) + case T.unsnoc src of + Just (xs, '_') -> lookupKey [] (toKey xs) + _ -> return ((src, ""), nullAttr) return $ B.linkWith attr (escapeURI src') tit label'' -citationName :: PandocMonad m => RSTParser m String +citationName :: PandocMonad m => RSTParser m Text citationName = do raw <- citationMarker - return $ "[" ++ raw ++ "]" + return $ "[" <> raw <> "]" referenceLink :: PandocMonad m => RSTParser m Inlines referenceLink = try $ do ref <- (referenceName <|> citationName) <* char '_' let label' = B.text ref - let isAnonKey (Key ('_':_)) = True - isAnonKey _ = False + let isAnonKey (Key (T.uncons -> Just ('_',_))) = True + isAnonKey _ = False state <- getState let keyTable = stateKeys state key <- option (toKey ref) $ @@ -1533,7 +1535,7 @@ referenceLink = try $ do -- We keep a list of oldkeys so we can detect lookup loops. lookupKey :: PandocMonad m - => [Key] -> Key -> RSTParser m ((String, String), Attr) + => [Key] -> Key -> RSTParser m ((Text, Text), Attr) lookupKey oldkeys key = do pos <- getPosition state <- getState @@ -1544,8 +1546,8 @@ lookupKey oldkeys key = do logMessage $ ReferenceNotFound key' pos return (("",""),nullAttr) -- check for keys of the form link_, which need to be resolved: - Just ((u@(c:_),""),_) | last u == '_', c /= '#' -> do - let rawkey = init u + Just ((u, ""),_) | T.length u > 1, T.last u == '_', T.head u /= '#' -> do + let rawkey = T.init u let newkey = toKey rawkey if newkey `elem` oldkeys then do @@ -1576,7 +1578,7 @@ subst = try $ do case M.lookup key substTable of Nothing -> do pos <- getPosition - logMessage $ ReferenceNotFound (show key) pos + logMessage $ ReferenceNotFound (tshow key) pos return mempty Just target -> return target diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs index 6519587c6..73122cc14 100644 --- a/src/Text/Pandoc/Readers/Roff.hs +++ b/src/Text/Pandoc/Readers/Roff.hs @@ -1,5 +1,7 @@ {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.Roff Copyright : Copyright (C) 2018-2019 Yan Pashkovsky and John MacFarlane @@ -21,7 +23,7 @@ module Text.Pandoc.Readers.Roff , TableRow , RoffToken(..) , RoffTokens(..) - , linePartsToString + , linePartsToText , lexRoff ) where @@ -40,7 +42,7 @@ import qualified Data.Text as T import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Options import Text.Pandoc.Parsing -import Text.Pandoc.Shared (safeRead, substitute) +import Text.Pandoc.Shared (safeRead) import Text.Parsec hiding (tokenPrim) import Text.Pandoc.RoffChar (characterCodes, combiningAccents) import qualified Data.Sequence as Seq @@ -60,28 +62,28 @@ data FontSpec = FontSpec{ fontBold :: Bool defaultFontSpec :: FontSpec defaultFontSpec = FontSpec False False False -data LinePart = RoffStr String +data LinePart = RoffStr T.Text | Font FontSpec | MacroArg Int deriving Show type Arg = [LinePart] -type TableOption = (String, String) +type TableOption = (T.Text, T.Text) data CellFormat = CellFormat { columnType :: Char , pipePrefix :: Bool , pipeSuffix :: Bool - , columnSuffixes :: [String] + , columnSuffixes :: [T.Text] } deriving (Show, Eq, Ord) type TableRow = ([CellFormat], [RoffTokens]) data RoffToken = TextLine [LinePart] | EmptyLine - | ControlLine String [Arg] SourcePos + | ControlLine T.Text [Arg] SourcePos | Tbl [TableOption] [TableRow] SourcePos deriving Show @@ -95,7 +97,7 @@ data RoffMode = NormalMode | CopyMode deriving Show -data RoffState = RoffState { customMacros :: M.Map String RoffTokens +data RoffState = RoffState { customMacros :: M.Map T.Text RoffTokens , prevFont :: FontSpec , currentFont :: FontSpec , tableTabChar :: Char @@ -121,10 +123,10 @@ instance Default RoffState where , afterConditional = False } -type RoffLexer m = ParserT [Char] RoffState m +type RoffLexer m = ParserT T.Text RoffState m -- --- Lexer: String -> RoffToken +-- Lexer: T.Text -> RoffToken -- eofline :: Stream s m Char => ParsecT s u m () @@ -133,11 +135,11 @@ eofline = void newline <|> eof <|> () <$ lookAhead (string "\\}") spacetab :: Stream s m Char => ParsecT s u m Char spacetab = char ' ' <|> char '\t' -characterCodeMap :: M.Map String Char +characterCodeMap :: M.Map T.Text Char characterCodeMap = M.fromList $ map (\(x,y) -> (y,x)) characterCodes -combiningAccentsMap :: M.Map String Char +combiningAccentsMap :: M.Map T.Text Char combiningAccentsMap = M.fromList $ map (\(x,y) -> (y,x)) combiningAccents @@ -151,43 +153,40 @@ escapeGlyph = do c <- lookAhead (oneOf ['[','(']) escapeArg >>= resolveGlyph c -resolveGlyph :: PandocMonad m => Char -> String -> RoffLexer m [LinePart] +resolveGlyph :: PandocMonad m => Char -> T.Text -> RoffLexer m [LinePart] resolveGlyph delimChar glyph = do - let cs = substitute "_u" " u" glyph -- unicode glyphs separated by _ - (case words cs of + let cs = T.replace "_u" " u" glyph -- unicode glyphs separated by _ + (case T.words cs of [] -> mzero [s] -> case M.lookup s characterCodeMap `mplus` readUnicodeChar s of Nothing -> mzero - Just c -> return [RoffStr [c]] + Just c -> return [RoffStr $ T.singleton c] (s:ss) -> do basechar <- case M.lookup s characterCodeMap `mplus` readUnicodeChar s of Nothing -> - case s of + case T.unpack s of [ch] | isAscii ch && isAlphaNum ch -> return ch _ -> mzero Just c -> return c - let addAccents [] xs = return $ T.unpack . - Normalize.normalize Normalize.NFC . - T.pack $ reverse xs + let addAccents [] xs = return $ Normalize.normalize Normalize.NFC $ + T.reverse xs addAccents (a:as) xs = case M.lookup a combiningAccentsMap `mplus` readUnicodeChar a of - Just x -> addAccents as (x:xs) + Just x -> addAccents as $ T.cons x xs Nothing -> mzero - addAccents ss [basechar] >>= \xs -> return [RoffStr xs]) + addAccents ss (T.singleton basechar) >>= \xs -> return [RoffStr xs]) <|> case delimChar of - '[' -> escUnknown ("\\[" ++ glyph ++ "]") - '(' -> escUnknown ("\\(" ++ glyph) - '\'' -> escUnknown ("\\C'" ++ glyph ++ "'") + '[' -> escUnknown ("\\[" <> glyph <> "]") + '(' -> escUnknown ("\\(" <> glyph) + '\'' -> escUnknown ("\\C'" <> glyph <> "'") _ -> Prelude.fail "resolveGlyph: unknown glyph delimiter" -readUnicodeChar :: String -> Maybe Char -readUnicodeChar ('u':cs@(_:_:_:_:_)) = - case safeRead ('0':'x':cs) of - Just i -> Just (chr i) - Nothing -> Nothing -readUnicodeChar _ = Nothing +readUnicodeChar :: T.Text -> Maybe Char +readUnicodeChar t = case T.uncons t of + Just ('u', cs) | T.length cs > 3 -> chr <$> safeRead ("0x" <> cs) + _ -> Nothing escapeNormal :: PandocMonad m => RoffLexer m [LinePart] escapeNormal = do @@ -218,14 +217,14 @@ escapeNormal = do NormalMode -> return [RoffStr "\\"] 'H' -> escIgnore 'H' [quoteArg] 'L' -> escIgnore 'L' [quoteArg] - 'M' -> escIgnore 'M' [escapeArg, count 1 (satisfy (/='\n'))] + 'M' -> escIgnore 'M' [escapeArg, countChar 1 (satisfy (/='\n'))] 'N' -> escIgnore 'N' [quoteArg] - 'O' -> escIgnore 'O' [count 1 (oneOf ['0','1'])] + 'O' -> escIgnore 'O' [countChar 1 (oneOf ['0','1'])] 'R' -> escIgnore 'R' [quoteArg] 'S' -> escIgnore 'S' [quoteArg] - 'V' -> escIgnore 'V' [escapeArg, count 1 alphaNum] + 'V' -> escIgnore 'V' [escapeArg, countChar 1 alphaNum] 'X' -> escIgnore 'X' [quoteArg] - 'Y' -> escIgnore 'Y' [escapeArg, count 1 (satisfy (/='\n'))] + 'Y' -> escIgnore 'Y' [escapeArg, countChar 1 (satisfy (/='\n'))] 'Z' -> escIgnore 'Z' [quoteArg] '\'' -> return [RoffStr "`"] '\n' -> return mempty -- line continuation @@ -238,12 +237,12 @@ escapeNormal = do 'd' -> escIgnore 'd' [] -- forward down 1/2em 'e' -> return [RoffStr "\\"] 'f' -> escFont - 'g' -> escIgnore 'g' [escapeArg, count 1 (satisfy (/='\n'))] + 'g' -> escIgnore 'g' [escapeArg, countChar 1 (satisfy (/='\n'))] 'h' -> escIgnore 'h' [quoteArg] - 'k' -> escIgnore 'k' [escapeArg, count 1 (satisfy (/='\n'))] + 'k' -> escIgnore 'k' [escapeArg, countChar 1 (satisfy (/='\n'))] 'l' -> escIgnore 'l' [quoteArg] - 'm' -> escIgnore 'm' [escapeArg, count 1 (satisfy (/='\n'))] - 'n' -> escIgnore 'm' [escapeArg, count 1 (satisfy (/='\n'))] + 'm' -> escIgnore 'm' [escapeArg, countChar 1 (satisfy (/='\n'))] + 'n' -> escIgnore 'm' [escapeArg, countChar 1 (satisfy (/='\n'))] 'o' -> escIgnore 'o' [quoteArg] 'p' -> escIgnore 'p' [] 'r' -> escIgnore 'r' [] @@ -253,7 +252,7 @@ escapeNormal = do 'v' -> escIgnore 'v' [quoteArg] 'w' -> escIgnore 'w' [quoteArg] 'x' -> escIgnore 'x' [quoteArg] - 'z' -> escIgnore 'z' [count 1 anyChar] + 'z' -> escIgnore 'z' [countChar 1 anyChar] '|' -> return [RoffStr "\x2006"] --1/6 em space '~' -> return [RoffStr "\160"] -- nonbreaking space '\\' -> do @@ -262,40 +261,40 @@ escapeNormal = do CopyMode -> char '\\' NormalMode -> return '\\' return [RoffStr "\\"] - _ -> return [RoffStr [c]] + _ -> return [RoffStr $ T.singleton c] -- man 7 groff: "If a backslash is followed by a character that -- does not constitute a defined escape sequence, the backslash -- is silently ignored and the character maps to itself." escIgnore :: PandocMonad m => Char - -> [RoffLexer m String] + -> [RoffLexer m T.Text] -> RoffLexer m [LinePart] escIgnore c argparsers = do pos <- getPosition arg <- snd <$> withRaw (choice argparsers) <|> return "" - report $ SkippedContent ('\\':c:arg) pos + report $ SkippedContent ("\\" <> T.cons c arg) pos return mempty -escUnknown :: PandocMonad m => String -> RoffLexer m [LinePart] +escUnknown :: PandocMonad m => T.Text -> RoffLexer m [LinePart] escUnknown s = do pos <- getPosition report $ SkippedContent s pos return [RoffStr "\xFFFD"] -signedNumber :: PandocMonad m => RoffLexer m String +signedNumber :: PandocMonad m => RoffLexer m T.Text signedNumber = try $ do sign <- option "" ("-" <$ char '-' <|> "" <$ char '+') - ds <- many1 digit - return (sign ++ ds) + ds <- many1Char digit + return (sign <> ds) -- Parses: [..] or (.. -escapeArg :: PandocMonad m => RoffLexer m String +escapeArg :: PandocMonad m => RoffLexer m T.Text escapeArg = choice [ char '[' *> optional expandString *> - manyTill (noneOf ['\n',']']) (char ']') + manyTillChar (noneOf ['\n',']']) (char ']') , char '(' *> optional expandString *> - count 2 (satisfy (/='\n')) + countChar 2 (satisfy (/='\n')) ] expandString :: PandocMonad m => RoffLexer m () @@ -303,21 +302,21 @@ expandString = try $ do pos <- getPosition char '\\' char '*' - cs <- escapeArg <|> count 1 anyChar - s <- linePartsToString <$> resolveString cs pos - getInput >>= setInput . (s ++) + cs <- escapeArg <|> countChar 1 anyChar + s <- linePartsToText <$> resolveText cs pos + getInput >>= setInput . (s <>) return () -- Parses: '..' -quoteArg :: PandocMonad m => RoffLexer m String -quoteArg = char '\'' *> manyTill (noneOf ['\n','\'']) (char '\'') +quoteArg :: PandocMonad m => RoffLexer m T.Text +quoteArg = char '\'' *> manyTillChar (noneOf ['\n','\'']) (char '\'') escFont :: PandocMonad m => RoffLexer m [LinePart] escFont = do - font <- escapeArg <|> count 1 alphaNum - font' <- if null font || font == "P" + font <- escapeArg <|> countChar 1 alphaNum + font' <- if T.null font || font == "P" then prevFont <$> getState - else return $ foldr processFontLetter defaultFontSpec font + else return $ foldr processFontLetter defaultFontSpec $ T.unpack font modifyState $ \st -> st{ prevFont = currentFont st , currentFont = font' } return [Font font'] @@ -345,7 +344,7 @@ lexMacro = do guard $ sourceColumn pos == 1 || afterConditional st char '.' <|> char '\'' skipMany spacetab - macroName <- many (satisfy isAlphaNum) + macroName <- manyChar (satisfy isAlphaNum) case macroName of "nop" -> return mempty "ie" -> lexConditional "ie" @@ -374,8 +373,8 @@ lexTable pos = do spaces opts <- try tableOptions <|> [] <$ optional (char ';') case lookup "tab" opts of - Just (c:_) -> modifyState $ \st -> st{ tableTabChar = c } - _ -> modifyState $ \st -> st{ tableTabChar = '\t' } + Just (T.uncons -> Just (c, _)) -> modifyState $ \st -> st{ tableTabChar = c } + _ -> modifyState $ \st -> st{ tableTabChar = '\t' } spaces skipMany lexComment spaces @@ -388,7 +387,7 @@ lexTable pos = do string ".TE" skipMany spacetab eofline - return $ singleTok $ Tbl opts (rows ++ concat morerows) pos + return $ singleTok $ Tbl opts (rows <> concat morerows) pos lexTableRows :: PandocMonad m => RoffLexer m [TableRow] lexTableRows = do @@ -428,11 +427,11 @@ tableOptions = many1 tableOption <* spaces <* char ';' tableOption :: PandocMonad m => RoffLexer m TableOption tableOption = do - k <- many1 letter + k <- many1Char letter v <- option "" $ try $ do skipMany spacetab char '(' - manyTill anyChar (char ')') + manyTillChar anyChar (char ')') skipMany spacetab optional (char ',' >> skipMany spacetab) return (k,v) @@ -444,7 +443,7 @@ tableFormatSpec = do let speclines = first:rest spaces char '.' - return $ speclines ++ repeat (lastDef [] speclines) -- last line is default + return $ speclines <> repeat (lastDef [] speclines) -- last line is default tableFormatSpecLine :: PandocMonad m => RoffLexer m [CellFormat] tableFormatSpecLine = @@ -456,19 +455,19 @@ tableColFormat = do $ True <$ try (string "|" <* notFollowedBy spacetab) c <- oneOf ['a','A','c','C','l','L','n','N','r','R','s','S','^','_','-', '=','|'] - suffixes <- many $ try (skipMany spacetab *> count 1 digit) <|> + suffixes <- many $ try (skipMany spacetab *> countChar 1 digit) <|> (do x <- oneOf ['b','B','d','D','e','E','f','F','i','I','m','M', 'p','P','t','T','u','U','v','V','w','W','x','X', 'z','Z'] num <- case toLower x of 'w' -> many1 digit <|> (do char '(' xs <- manyTill anyChar (char ')') - return ("(" ++ xs ++ ")")) <|> + return ("(" <> xs <> ")")) <|> return "" 'f' -> count 1 alphaNum <* skipMany spacetab 'm' -> count 1 alphaNum <* skipMany spacetab _ -> return "" - return $ x : num) + return $ T.pack $ x : num) pipeSuffix' <- option False $ True <$ string "|" return $ CellFormat { columnType = c @@ -479,7 +478,7 @@ tableColFormat = do -- We don't fully handle the conditional. But we do -- include everything under '.ie n', which occurs commonly -- in man pages. -lexConditional :: PandocMonad m => String -> RoffLexer m RoffTokens +lexConditional :: PandocMonad m => T.Text -> RoffLexer m RoffTokens lexConditional mname = do pos <- getPosition skipMany spacetab @@ -498,7 +497,7 @@ lexConditional mname = do case mbtest of Nothing -> do putState st -- reset state, so we don't record macros in skipped section - report $ SkippedContent ('.':mname) pos + report $ SkippedContent (T.cons '.' mname) pos return mempty Just True -> return ifPart Just False -> do @@ -508,7 +507,7 @@ lexConditional mname = do expression :: PandocMonad m => RoffLexer m (Maybe Bool) expression = do raw <- charsInBalanced '(' ')' (satisfy (/= '\n')) - <|> many1 nonspaceChar + <|> many1Char nonspaceChar returnValue $ case raw of "1" -> Just True @@ -533,17 +532,17 @@ lexIncludeFile args = do pos <- getPosition case args of (f:_) -> do - let fp = linePartsToString f + let fp = linePartsToText f dirs <- getResourcePath - result <- readFileFromDirs dirs fp + result <- readFileFromDirs dirs $ T.unpack fp case result of Nothing -> report $ CouldNotLoadIncludeFile fp pos - Just s -> getInput >>= setInput . (s ++) + Just s -> getInput >>= setInput . (s <>) return mempty [] -> return mempty resolveMacro :: PandocMonad m - => String -> [Arg] -> SourcePos -> RoffLexer m RoffTokens + => T.Text -> [Arg] -> SourcePos -> RoffLexer m RoffTokens resolveMacro macroName args pos = do macros <- customMacros <$> getState case M.lookup macroName macros of @@ -552,7 +551,7 @@ resolveMacro macroName args pos = do let fillLP (MacroArg i) zs = case drop (i - 1) args of [] -> zs - (ys:_) -> ys ++ zs + (ys:_) -> ys <> zs fillLP z zs = z : zs let fillMacroArg (TextLine lineparts) = TextLine (foldr fillLP [] lineparts) @@ -565,7 +564,7 @@ lexStringDef args = do -- string definition [] -> Prelude.fail "No argument to .ds" (x:ys) -> do let ts = singleTok $ TextLine (intercalate [RoffStr " " ] ys) - let stringName = linePartsToString x + let stringName = linePartsToText x modifyState $ \st -> st{ customMacros = M.insert stringName ts (customMacros st) } return mempty @@ -575,14 +574,14 @@ lexMacroDef args = do -- macro definition modifyState $ \st -> st{ roffMode = CopyMode } (macroName, stopMacro) <- case args of - (x : y : _) -> return (linePartsToString x, linePartsToString y) + (x : y : _) -> return (linePartsToText x, linePartsToText y) -- optional second arg - (x:_) -> return (linePartsToString x, ".") + (x:_) -> return (linePartsToText x, ".") [] -> Prelude.fail "No argument to .de" let stop = try $ do char '.' <|> char '\'' skipMany spacetab - string stopMacro + textStr stopMacro _ <- lexArgs return () ts <- mconcat <$> manyTill manToken stop @@ -628,7 +627,7 @@ lexArgs = do char '"' return [RoffStr "\""] -checkDefined :: PandocMonad m => String -> RoffLexer m [LinePart] +checkDefined :: PandocMonad m => T.Text -> RoffLexer m [LinePart] checkDefined name = do macros <- customMacros <$> getState case M.lookup name macros of @@ -638,19 +637,19 @@ checkDefined name = do escString :: PandocMonad m => RoffLexer m [LinePart] escString = try $ do pos <- getPosition - (do cs <- escapeArg <|> count 1 anyChar - resolveString cs pos) + (do cs <- escapeArg <|> countChar 1 anyChar + resolveText cs pos) <|> mempty <$ char 'S' -- strings and macros share namespace -resolveString :: PandocMonad m - => String -> SourcePos -> RoffLexer m [LinePart] -resolveString stringname pos = do +resolveText :: PandocMonad m + => T.Text -> SourcePos -> RoffLexer m [LinePart] +resolveText stringname pos = do RoffTokens ts <- resolveMacro stringname [] pos case Foldable.toList ts of [TextLine xs] -> return xs _ -> do - report $ SkippedContent ("unknown string " ++ stringname) pos + report $ SkippedContent ("unknown string " <> stringname) pos return mempty lexLine :: PandocMonad m => RoffLexer m RoffTokens @@ -688,16 +687,16 @@ macroArg = try $ do pos <- getPosition backslash char '$' - x <- escapeArg <|> count 1 digit + x <- escapeArg <|> countChar 1 digit case safeRead x of Just i -> return [MacroArg i] Nothing -> do - report $ SkippedContent ("illegal macro argument " ++ x) pos + report $ SkippedContent ("illegal macro argument " <> x) pos return [] regularText :: PandocMonad m => RoffLexer m [LinePart] regularText = do - s <- many1 $ noneOf "\n\r\t \\\"" + s <- many1Char $ noneOf "\n\r\t \\\"" return [RoffStr s] quoteChar :: PandocMonad m => RoffLexer m [LinePart] @@ -708,7 +707,7 @@ quoteChar = do spaceTabChar :: PandocMonad m => RoffLexer m [LinePart] spaceTabChar = do c <- spacetab - return [RoffStr [c]] + return [RoffStr $ T.singleton c] lexEmptyLine :: PandocMonad m => RoffLexer m RoffTokens lexEmptyLine = newline >> return (singleTok EmptyLine) @@ -716,8 +715,8 @@ lexEmptyLine = newline >> return (singleTok EmptyLine) manToken :: PandocMonad m => RoffLexer m RoffTokens manToken = lexComment <|> lexMacro <|> lexLine <|> lexEmptyLine -linePartsToString :: [LinePart] -> String -linePartsToString = mconcat . map go +linePartsToText :: [LinePart] -> T.Text +linePartsToText = mconcat . map go where go (RoffStr s) = s go _ = mempty @@ -726,7 +725,7 @@ linePartsToString = mconcat . map go lexRoff :: PandocMonad m => SourcePos -> T.Text -> m RoffTokens lexRoff pos txt = do eithertokens <- readWithM (do setPosition pos - mconcat <$> many manToken) def (T.unpack txt) + mconcat <$> many manToken) def txt case eithertokens of Left e -> throwError e Right tokenz -> return tokenz diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index 9796de4b9..d587bc41b 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -1,5 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RelaxedPolyRec #-} -- RelaxedPolyRec needed for inlinesBetween on GHC < 7 {- | @@ -31,7 +32,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed, nested) import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag) -import Text.Pandoc.Shared (crFilter) +import Text.Pandoc.Shared (crFilter, tshow) import Text.Pandoc.XML (fromEntities) -- | Read twiki from an input string and return a Pandoc document. @@ -41,19 +42,19 @@ readTWiki :: PandocMonad m -> m Pandoc readTWiki opts s = do res <- readWithM parseTWiki def{ stateOptions = opts } - (T.unpack (crFilter s) ++ "\n\n") + (crFilter s <> "\n\n") case res of Left e -> throwError e Right d -> return d -type TWParser = ParserT [Char] ParserState +type TWParser = ParserT Text ParserState -- -- utility functions -- -tryMsg :: String -> TWParser m a -> TWParser m a -tryMsg msg p = try p <?> msg +tryMsg :: Text -> TWParser m a -> TWParser m a +tryMsg msg p = try p <?> T.unpack msg nested :: PandocMonad m => TWParser m a -> TWParser m a nested p = do @@ -64,25 +65,25 @@ nested p = do updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } return res -htmlElement :: PandocMonad m => String -> TWParser m (Attr, String) +htmlElement :: PandocMonad m => Text -> TWParser m (Attr, Text) htmlElement tag = tryMsg tag $ do (TagOpen _ attr, _) <- htmlTag (~== TagOpen tag []) - content <- manyTill anyChar (endtag <|> endofinput) + content <- T.pack <$> manyTill anyChar (endtag <|> endofinput) return (htmlAttrToPandoc attr, trim content) where endtag = void $ htmlTag (~== TagClose tag) endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof - trim = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse + trim = T.dropAround (=='\n') -htmlAttrToPandoc :: [Attribute String] -> Attr +htmlAttrToPandoc :: [Attribute Text] -> Attr htmlAttrToPandoc attrs = (ident, classes, keyvals) where ident = fromMaybe "" $ lookup "id" attrs - classes = maybe [] words $ lookup "class" attrs + classes = maybe [] T.words $ lookup "class" attrs keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] parseHtmlContentWithAttrs :: PandocMonad m - => String -> TWParser m a -> TWParser m (Attr, [a]) + => Text -> TWParser m a -> TWParser m (Attr, [a]) parseHtmlContentWithAttrs tag parser = do (attr, content) <- htmlElement tag parsedContent <- try $ parseContent content @@ -91,7 +92,13 @@ parseHtmlContentWithAttrs tag parser = do parseContent = parseFromString' $ nested $ manyTill parser endOfContent endOfContent = try $ skipMany blankline >> skipSpaces >> eof -parseHtmlContent :: PandocMonad m => String -> TWParser m a -> TWParser m [a] +parseCharHtmlContentWithAttrs :: PandocMonad m + => Text -> TWParser m Char -> TWParser m (Attr, Text) +parseCharHtmlContentWithAttrs tag = fmap go . parseHtmlContentWithAttrs tag + where + go (x, y) = (x, T.pack y) + +parseHtmlContent :: PandocMonad m => Text -> TWParser m a -> TWParser m [a] parseHtmlContent tag p = snd <$> parseHtmlContentWithAttrs tag p -- @@ -113,7 +120,7 @@ block = do <|> blockElements <|> para skipMany blankline - trace (take 60 $ show $ B.toList res) + trace (T.take 60 $ tshow $ B.toList res) return res blockElements :: PandocMonad m => TWParser m B.Blocks @@ -150,38 +157,38 @@ literal = rawBlock <$> htmlElement "literal" format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs rawBlock (attrs, content) = B.rawBlock (format attrs) content -list :: PandocMonad m => String -> TWParser m B.Blocks +list :: PandocMonad m => Text -> TWParser m B.Blocks list prefix = choice [ bulletList prefix , orderedList prefix , definitionList prefix] -definitionList :: PandocMonad m => String -> TWParser m B.Blocks +definitionList :: PandocMonad m => Text -> TWParser m B.Blocks definitionList prefix = tryMsg "definitionList" $ do - indent <- lookAhead $ string prefix *> many1 (string " ") <* string "$ " - elements <- many $ parseDefinitionListItem (prefix ++ concat indent) + indent <- lookAhead $ textStr prefix *> many1 (textStr " ") <* textStr "$ " + elements <- many $ parseDefinitionListItem (prefix <> T.concat indent) return $ B.definitionList elements where parseDefinitionListItem :: PandocMonad m - => String -> TWParser m (B.Inlines, [B.Blocks]) + => Text -> TWParser m (B.Inlines, [B.Blocks]) parseDefinitionListItem indent = do - string (indent ++ "$ ") >> skipSpaces + textStr (indent <> "$ ") >> skipSpaces term <- many1Till inline $ string ": " line <- listItemLine indent $ string "$ " return (mconcat term, [line]) -bulletList :: PandocMonad m => String -> TWParser m B.Blocks +bulletList :: PandocMonad m => Text -> TWParser m B.Blocks bulletList prefix = tryMsg "bulletList" $ parseList prefix (char '*') (char ' ') -orderedList :: PandocMonad m => String -> TWParser m B.Blocks +orderedList :: PandocMonad m => Text -> TWParser m B.Blocks orderedList prefix = tryMsg "orderedList" $ parseList prefix (oneOf "1iIaA") (string ". ") parseList :: PandocMonad m - => String -> TWParser m Char -> TWParser m a -> TWParser m B.Blocks + => Text -> TWParser m Char -> TWParser m a -> TWParser m B.Blocks parseList prefix marker delim = do - (indent, style) <- lookAhead $ string prefix *> listStyle <* delim - blocks <- many $ parseListItem (prefix ++ indent) (char style <* delim) + (indent, style) <- lookAhead $ textStr prefix *> listStyle <* delim + blocks <- many $ parseListItem (prefix <> indent) (char style <* delim) return $ case style of '1' -> B.orderedListWith (1, DefaultStyle, DefaultDelim) blocks 'i' -> B.orderedListWith (1, LowerRoman, DefaultDelim) blocks @@ -191,24 +198,24 @@ parseList prefix marker delim = do _ -> B.bulletList blocks where listStyle = do - indent <- many1 $ string " " + indent <- many1 $ textStr " " style <- marker - return (concat indent, style) + return (T.concat indent, style) parseListItem :: (PandocMonad m, Show a) - => String -> TWParser m a -> TWParser m B.Blocks -parseListItem prefix marker = string prefix >> marker >> listItemLine prefix marker + => Text -> TWParser m a -> TWParser m B.Blocks +parseListItem prefix marker = textStr prefix >> marker >> listItemLine prefix marker listItemLine :: (PandocMonad m, Show a) - => String -> TWParser m a -> TWParser m B.Blocks + => Text -> TWParser m a -> TWParser m B.Blocks listItemLine prefix marker = mconcat <$> (lineContent >>= parseContent) where lineContent = do content <- anyLine continuation <- optionMaybe listContinuation - return $ filterSpaces content ++ "\n" ++ maybe "" (" " ++) continuation - filterSpaces = reverse . dropWhile (== ' ') . reverse - listContinuation = notFollowedBy (string prefix >> marker) >> + return $ filterSpaces content <> "\n" <> maybe "" (" " <>) continuation + filterSpaces = T.dropWhileEnd (== ' ') + listContinuation = notFollowedBy (textStr prefix >> marker) >> string " " >> lineContent parseContent = parseFromString' $ many1 $ nestedList <|> parseInline parseInline = (B.plain . mconcat) <$> many1Till inline (lastNewline <|> newlineBeforeNestedList) @@ -352,29 +359,29 @@ macroWithParameters = try $ do char '%' return $ buildSpan name kvs $ B.str content -buildSpan :: String -> [(String, String)] -> B.Inlines -> B.Inlines +buildSpan :: Text -> [(Text, Text)] -> B.Inlines -> B.Inlines buildSpan className kvs = B.spanWith attrs where attrs = ("", ["twiki-macro", className] ++ additionalClasses, kvsWithoutClasses) - additionalClasses = maybe [] words $ lookup "class" kvs + additionalClasses = maybe [] T.words $ lookup "class" kvs kvsWithoutClasses = [(k,v) | (k,v) <- kvs, k /= "class"] -macroName :: PandocMonad m => TWParser m String +macroName :: PandocMonad m => TWParser m Text macroName = do first <- letter rest <- many $ alphaNum <|> char '_' - return (first:rest) + return $ T.pack $ first:rest -attributes :: PandocMonad m => TWParser m (String, [(String, String)]) -attributes = foldr (either mkContent mkKvs) ([], []) +attributes :: PandocMonad m => TWParser m (Text, [(Text, Text)]) +attributes = foldr (either mkContent mkKvs) ("", []) <$> (char '{' *> spnl *> many (attribute <* spnl) <* char '}') where spnl = skipMany (spaceChar <|> newline) - mkContent c ([], kvs) = (c, kvs) - mkContent c (rest, kvs) = (c ++ " " ++ rest, kvs) + mkContent c ("", kvs) = (c, kvs) + mkContent c (rest, kvs) = (c <> " " <> rest, kvs) mkKvs kv (cont, rest) = (cont, kv : rest) -attribute :: PandocMonad m => TWParser m (Either String (String, String)) +attribute :: PandocMonad m => TWParser m (Either Text (Text, Text)) attribute = withKey <|> withoutKey where withKey = try $ do @@ -383,10 +390,10 @@ attribute = withKey <|> withoutKey curry Right key <$> parseValue False withoutKey = try $ Left <$> parseValue True parseValue allowSpaces = fromEntities <$> (withQuotes <|> withoutQuotes allowSpaces) - withQuotes = between (char '"') (char '"') (\_ -> count 1 $ noneOf ['"']) + withQuotes = between (char '"') (char '"') (\_ -> countChar 1 $ noneOf ['"']) withoutQuotes allowSpaces - | allowSpaces = many1 $ noneOf "}" - | otherwise = many1 $ noneOf " }" + | allowSpaces = many1Char $ noneOf "}" + | otherwise = many1Char $ noneOf " }" nestedInlines :: (Show a, PandocMonad m) => TWParser m a -> TWParser m B.Inlines @@ -413,10 +420,10 @@ emphHtml :: PandocMonad m => TWParser m B.Inlines emphHtml = B.emph . mconcat <$> (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline) nestedString :: (Show a, PandocMonad m) - => TWParser m a -> TWParser m String -nestedString end = innerSpace <|> count 1 nonspaceChar + => TWParser m a -> TWParser m Text +nestedString end = innerSpace <|> countChar 1 nonspaceChar where - innerSpace = try $ many1 spaceChar <* notFollowedBy end + innerSpace = try $ many1Char spaceChar <* notFollowedBy end boldCode :: PandocMonad m => TWParser m B.Inlines boldCode = try $ (B.strong . B.code . fromEntities) <$> enclosed (string "==") nestedString @@ -429,7 +436,7 @@ code = try $ (B.code . fromEntities) <$> enclosed (char '=') nestedString codeHtml :: PandocMonad m => TWParser m B.Inlines codeHtml = do - (attrs, content) <- parseHtmlContentWithAttrs "code" anyChar + (attrs, content) <- parseCharHtmlContentWithAttrs "code" anyChar return $ B.codeWith attrs $ fromEntities content autoLink :: PandocMonad m => TWParser m B.Inlines @@ -437,7 +444,7 @@ autoLink = try $ do state <- getState guard $ stateAllowLinks state (text, url) <- parseLink - guard $ checkLink (last url) + guard $ checkLink (T.last url) return $ makeLink (text, url) where parseLink = notFollowedBy nop >> (uri <|> emailAddress) @@ -447,17 +454,17 @@ autoLink = try $ do | otherwise = isAlphaNum c str :: PandocMonad m => TWParser m B.Inlines -str = B.str <$> (many1 alphaNum <|> count 1 characterReference) +str = B.str <$> (many1Char alphaNum <|> countChar 1 characterReference) nop :: PandocMonad m => TWParser m B.Inlines nop = try $ (void exclamation <|> void nopTag) >> followContent where exclamation = char '!' nopTag = stringAnyCase "<nop>" - followContent = B.str . fromEntities <$> many1 nonspaceChar + followContent = B.str . fromEntities <$> many1Char nonspaceChar symbol :: PandocMonad m => TWParser m B.Inlines -symbol = B.str <$> count 1 nonspaceChar +symbol = B.str <$> countChar 1 nonspaceChar smart :: PandocMonad m => TWParser m B.Inlines smart = do @@ -491,13 +498,13 @@ link = try $ do setState $ st{ stateAllowLinks = True } return $ B.link url title content -linkText :: PandocMonad m => TWParser m (String, String, B.Inlines) +linkText :: PandocMonad m => TWParser m (Text, Text, B.Inlines) linkText = do string "[[" - url <- many1Till anyChar (char ']') + url <- T.pack <$> many1Till anyChar (char ']') content <- option (B.str url) (mconcat <$> linkContent) char ']' return (url, "", content) where - linkContent = char '[' >> many1Till anyChar (char ']') >>= parseLinkContent + linkContent = char '[' >> many1Till anyChar (char ']') >>= parseLinkContent . T.pack parseLinkContent = parseFromString' $ many1 inline diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index a638fdf40..5e7aaf910 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -1,4 +1,6 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.Textile Copyright : Copyright (C) 2010-2012 Paul Rivier @@ -38,7 +40,7 @@ import Prelude import Control.Monad (guard, liftM) import Control.Monad.Except (throwError) import Data.Char (digitToInt, isUpper) -import Data.List (intercalate, intersperse, transpose) +import Data.List (intersperse, transpose) import Data.Text (Text) import qualified Data.Text as T import Text.HTML.TagSoup (Tag (..), fromAttrib) @@ -52,7 +54,7 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag) import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline) -import Text.Pandoc.Shared (crFilter, trim, underlineSpan) +import Text.Pandoc.Shared (crFilter, trim, underlineSpan, tshow) -- | Parse a Textile text and return a Pandoc document. readTextile :: PandocMonad m @@ -61,21 +63,21 @@ readTextile :: PandocMonad m -> m Pandoc readTextile opts s = do parsed <- readWithM parseTextile def{ stateOptions = opts } - (T.unpack (crFilter s) ++ "\n\n") + (crFilter s <> "\n\n") case parsed of Right result -> return result Left e -> throwError e -- | Generate a Pandoc ADT from a textile document -parseTextile :: PandocMonad m => ParserT [Char] ParserState m Pandoc +parseTextile :: PandocMonad m => ParserT Text ParserState m Pandoc parseTextile = do many blankline startPos <- getPosition -- go through once just to get list of reference keys and notes -- docMinusKeys is the raw document with blanks where the keys/notes were... let firstPassParser = noteBlock <|> lineClump - manyTill firstPassParser eof >>= setInput . concat + manyTill firstPassParser eof >>= setInput . T.concat setPosition startPos st' <- getState let reversedNotes = stateNotes st' @@ -84,29 +86,29 @@ parseTextile = do blocks <- parseBlocks return $ Pandoc nullMeta (B.toList blocks) -- FIXME -noteMarker :: PandocMonad m => ParserT [Char] ParserState m [Char] -noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.') +noteMarker :: PandocMonad m => ParserT Text ParserState m Text +noteMarker = skipMany spaceChar >> string "fn" >> T.pack <$> manyTill digit (char '.') -noteBlock :: PandocMonad m => ParserT [Char] ParserState m [Char] +noteBlock :: PandocMonad m => ParserT Text ParserState m Text noteBlock = try $ do startPos <- getPosition ref <- noteMarker optional blankline - contents <- unlines <$> many1Till anyLine (blanklines <|> noteBlock) + contents <- T.unlines <$> many1Till anyLine (blanklines <|> noteBlock) endPos <- getPosition - let newnote = (ref, contents ++ "\n") + let newnote = (ref, contents <> "\n") st <- getState let oldnotes = stateNotes st updateState $ \s -> s { stateNotes = newnote : oldnotes } -- return blanks so line count isn't affected - return $ replicate (sourceLine endPos - sourceLine startPos) '\n' + return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n" -- | Parse document blocks -parseBlocks :: PandocMonad m => ParserT [Char] ParserState m Blocks +parseBlocks :: PandocMonad m => ParserT Text ParserState m Blocks parseBlocks = mconcat <$> manyTill block eof -- | Block parsers list tried in definition order -blockParsers :: PandocMonad m => [ParserT [Char] ParserState m Blocks] +blockParsers :: PandocMonad m => [ParserT Text ParserState m Blocks] blockParsers = [ codeBlock , header , blockQuote @@ -121,22 +123,22 @@ blockParsers = [ codeBlock ] -- | Any block in the order of definition of blockParsers -block :: PandocMonad m => ParserT [Char] ParserState m Blocks +block :: PandocMonad m => ParserT Text ParserState m Blocks block = do res <- choice blockParsers <?> "block" - trace (take 60 $ show $ B.toList res) + trace (T.take 60 $ tshow $ B.toList res) return res -commentBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks +commentBlock :: PandocMonad m => ParserT Text ParserState m Blocks commentBlock = try $ do string "###." manyTill anyLine blanklines return mempty -codeBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks +codeBlock :: PandocMonad m => ParserT Text ParserState m Blocks codeBlock = codeBlockBc <|> codeBlockPre -codeBlockBc :: PandocMonad m => ParserT [Char] ParserState m Blocks +codeBlockBc :: PandocMonad m => ParserT Text ParserState m Blocks codeBlockBc = try $ do string "bc." extended <- option False (True <$ char '.') @@ -150,31 +152,31 @@ codeBlockBc = try $ do rest <- many (notFollowedBy ender *> anyLine) return (f:rest) else manyTill anyLine blanklines - return $ B.codeBlock (trimTrailingNewlines (unlines contents)) + return $ B.codeBlock (trimTrailingNewlines (T.unlines contents)) -trimTrailingNewlines :: String -> String -trimTrailingNewlines = reverse . dropWhile (=='\n') . reverse +trimTrailingNewlines :: Text -> Text +trimTrailingNewlines = T.dropWhileEnd (=='\n') -- | Code Blocks in Textile are between <pre> and </pre> -codeBlockPre :: PandocMonad m => ParserT [Char] ParserState m Blocks +codeBlockPre :: PandocMonad m => ParserT Text ParserState m Blocks codeBlockPre = try $ do (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True)) - result' <- manyTill anyChar (htmlTag (tagClose (=="pre"))) + result' <- T.pack <$> manyTill anyChar (htmlTag (tagClose (=="pre"))) -- drop leading newline if any - let result'' = case result' of - '\n':xs -> xs - _ -> result' + let result'' = case T.uncons result' of + Just ('\n', xs) -> xs + _ -> result' -- drop trailing newline if any - let result''' = case reverse result'' of - '\n':_ -> init result'' - _ -> result'' - let classes = words $ fromAttrib "class" t + let result''' = case T.unsnoc result'' of + Just (xs, '\n') -> xs + _ -> result'' + let classes = T.words $ fromAttrib "class" t let ident = fromAttrib "id" t let kvs = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] return $ B.codeBlockWith (ident,classes,kvs) result''' -- | Header of the form "hN. content" with N in 1..6 -header :: PandocMonad m => ParserT [Char] ParserState m Blocks +header :: PandocMonad m => ParserT Text ParserState m Blocks header = try $ do char 'h' level <- digitToInt <$> oneOf "123456" @@ -186,14 +188,14 @@ header = try $ do return $ B.headerWith attr' level name -- | Blockquote of the form "bq. content" -blockQuote :: PandocMonad m => ParserT [Char] ParserState m Blocks +blockQuote :: PandocMonad m => ParserT Text ParserState m Blocks blockQuote = try $ do string "bq" >> attributes >> char '.' >> whitespace B.blockQuote <$> para -- Horizontal rule -hrule :: PandocMonad m => ParserT [Char] st m Blocks +hrule :: PandocMonad m => ParserT Text st m Blocks hrule = try $ do skipSpaces start <- oneOf "-*" @@ -208,39 +210,39 @@ hrule = try $ do -- | Can be a bullet list or an ordered list. This implementation is -- strict in the nesting, sublist must start at exactly "parent depth -- plus one" -anyList :: PandocMonad m => ParserT [Char] ParserState m Blocks +anyList :: PandocMonad m => ParserT Text ParserState m Blocks anyList = try $ anyListAtDepth 1 <* blanklines -- | This allow one type of list to be nested into an other type, -- provided correct nesting -anyListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks +anyListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks anyListAtDepth depth = choice [ bulletListAtDepth depth, orderedListAtDepth depth, definitionList ] -- | Bullet List of given depth, depth being the number of leading '*' -bulletListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks +bulletListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth depth) -- | Bullet List Item of given depth, depth being the number of -- leading '*' -bulletListItemAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks +bulletListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks bulletListItemAtDepth = genericListItemAtDepth '*' -- | Ordered List of given depth, depth being the number of -- leading '#' -orderedListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks +orderedListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks orderedListAtDepth depth = try $ do items <- many1 (orderedListItemAtDepth depth) return $ B.orderedList items -- | Ordered List Item of given depth, depth being the number of -- leading '#' -orderedListItemAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks +orderedListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks orderedListItemAtDepth = genericListItemAtDepth '#' -- | Common implementation of list items -genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT [Char] ParserState m Blocks +genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT Text ParserState m Blocks genericListItemAtDepth c depth = try $ do count depth (char c) >> attributes >> whitespace contents <- mconcat <$> many ((B.plain . mconcat <$> many1 inline) <|> @@ -250,25 +252,25 @@ genericListItemAtDepth c depth = try $ do return $ contents <> sublist -- | A definition list is a set of consecutive definition items -definitionList :: PandocMonad m => ParserT [Char] ParserState m Blocks +definitionList :: PandocMonad m => ParserT Text ParserState m Blocks definitionList = try $ B.definitionList <$> many1 definitionListItem -- | List start character. -listStart :: PandocMonad m => ParserT [Char] ParserState m () +listStart :: PandocMonad m => ParserT Text ParserState m () listStart = genericListStart '*' <|> () <$ genericListStart '#' <|> () <$ definitionListStart -genericListStart :: PandocMonad m => Char -> ParserT [Char] st m () +genericListStart :: PandocMonad m => Char -> ParserT Text st m () genericListStart c = () <$ try (many1 (char c) >> whitespace) -basicDLStart :: PandocMonad m => ParserT [Char] ParserState m () +basicDLStart :: PandocMonad m => ParserT Text ParserState m () basicDLStart = do char '-' whitespace notFollowedBy newline -definitionListStart :: PandocMonad m => ParserT [Char] ParserState m Inlines +definitionListStart :: PandocMonad m => ParserT Text ParserState m Inlines definitionListStart = try $ do basicDLStart trimInlines . mconcat <$> @@ -281,26 +283,26 @@ definitionListStart = try $ do -- the term defined, then spaces and ":=". The definition follows, on -- the same single line, or spaned on multiple line, after a line -- break. -definitionListItem :: PandocMonad m => ParserT [Char] ParserState m (Inlines, [Blocks]) +definitionListItem :: PandocMonad m => ParserT Text ParserState m (Inlines, [Blocks]) definitionListItem = try $ do term <- (mconcat . intersperse B.linebreak) <$> many1 definitionListStart def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef) return (term, def') - where inlineDef :: PandocMonad m => ParserT [Char] ParserState m [Blocks] + where inlineDef :: PandocMonad m => ParserT Text ParserState m [Blocks] inlineDef = liftM (\d -> [B.plain d]) $ optional whitespace >> (trimInlines . mconcat <$> many inline) <* newline - multilineDef :: PandocMonad m => ParserT [Char] ParserState m [Blocks] + multilineDef :: PandocMonad m => ParserT Text ParserState m [Blocks] multilineDef = try $ do optional whitespace >> newline - s <- many1Till anyChar (try (string "=:" >> newline)) - -- this ++ "\n\n" does not look very good - ds <- parseFromString' parseBlocks (s ++ "\n\n") + s <- T.pack <$> many1Till anyChar (try (string "=:" >> newline)) + -- this <> "\n\n" does not look very good + ds <- parseFromString' parseBlocks (s <> "\n\n") return [ds] -- raw content -- | A raw Html Block, optionally followed by blanklines -rawHtmlBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks +rawHtmlBlock :: PandocMonad m => ParserT Text ParserState m Blocks rawHtmlBlock = try $ do skipMany spaceChar (_,b) <- htmlTag isBlockTag @@ -308,14 +310,14 @@ rawHtmlBlock = try $ do return $ B.rawBlock "html" b -- | Raw block of LaTeX content -rawLaTeXBlock' :: PandocMonad m => ParserT [Char] ParserState m Blocks +rawLaTeXBlock' :: PandocMonad m => ParserT Text ParserState m Blocks rawLaTeXBlock' = do guardEnabled Ext_raw_tex B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces) -- | In textile, paragraphs are separated by blank lines. -para :: PandocMonad m => ParserT [Char] ParserState m Blocks +para :: PandocMonad m => ParserT Text ParserState m Blocks para = B.para . trimInlines . mconcat <$> many1 inline -- Tables @@ -326,7 +328,7 @@ toAlignment '>' = AlignRight toAlignment '=' = AlignCenter toAlignment _ = AlignDefault -cellAttributes :: PandocMonad m => ParserT [Char] ParserState m (Bool, Alignment) +cellAttributes :: PandocMonad m => ParserT Text ParserState m (Bool, Alignment) cellAttributes = try $ do isHeader <- option False (True <$ char '_') -- we just ignore colspan and rowspan markers: @@ -339,18 +341,18 @@ cellAttributes = try $ do return (isHeader, alignment) -- | A table cell spans until a pipe | -tableCell :: PandocMonad m => ParserT [Char] ParserState m ((Bool, Alignment), Blocks) +tableCell :: PandocMonad m => ParserT Text ParserState m ((Bool, Alignment), Blocks) tableCell = try $ do char '|' (isHeader, alignment) <- option (False, AlignDefault) cellAttributes notFollowedBy blankline - raw <- trim <$> + raw <- trim . T.pack <$> many (noneOf "|\n" <|> try (char '\n' <* notFollowedBy blankline)) content <- mconcat <$> parseFromString' (many inline) raw return ((isHeader, alignment), B.plain content) -- | A table row is made of many table cells -tableRow :: PandocMonad m => ParserT [Char] ParserState m [((Bool, Alignment), Blocks)] +tableRow :: PandocMonad m => ParserT Text ParserState m [((Bool, Alignment), Blocks)] tableRow = try $ do -- skip optional row attributes optional $ try $ do @@ -360,7 +362,7 @@ tableRow = try $ do many1 tableCell <* char '|' <* blankline -- | A table with an optional header. -table :: PandocMonad m => ParserT [Char] ParserState m Blocks +table :: PandocMonad m => ParserT Text ParserState m Blocks table = try $ do -- ignore table attributes caption <- option mempty $ try $ do @@ -384,7 +386,7 @@ table = try $ do (map (map snd) rows) -- | Ignore markers for cols, thead, tfoot. -ignorableRow :: PandocMonad m => ParserT [Char] ParserState m () +ignorableRow :: PandocMonad m => ParserT Text ParserState m () ignorableRow = try $ do char '|' oneOf ":^-~" @@ -393,9 +395,9 @@ ignorableRow = try $ do _ <- anyLine return () -explicitBlockStart :: PandocMonad m => String -> ParserT [Char] ParserState m () +explicitBlockStart :: PandocMonad m => Text -> ParserT Text ParserState m () explicitBlockStart name = try $ do - string name + string (T.unpack name) attributes char '.' optional whitespace @@ -404,9 +406,9 @@ explicitBlockStart name = try $ do -- | Blocks like 'p' and 'table' do not need explicit block tag. -- However, they can be used to set HTML/CSS attributes when needed. maybeExplicitBlock :: PandocMonad m - => String -- ^ block tag name - -> ParserT [Char] ParserState m Blocks -- ^ implicit block - -> ParserT [Char] ParserState m Blocks + => Text -- ^ block tag name + -> ParserT Text ParserState m Blocks -- ^ implicit block + -> ParserT Text ParserState m Blocks maybeExplicitBlock name blk = try $ do optional $ explicitBlockStart name blk @@ -419,11 +421,11 @@ maybeExplicitBlock name blk = try $ do -- | Any inline element -inline :: PandocMonad m => ParserT [Char] ParserState m Inlines +inline :: PandocMonad m => ParserT Text ParserState m Inlines inline = choice inlineParsers <?> "inline" -- | Inline parsers tried in order -inlineParsers :: PandocMonad m => [ParserT [Char] ParserState m Inlines] +inlineParsers :: PandocMonad m => [ParserT Text ParserState m Inlines] inlineParsers = [ str , whitespace , endline @@ -437,13 +439,13 @@ inlineParsers = [ str , link , image , mark - , (B.str . (:[])) <$> characterReference + , (B.str . T.singleton) <$> characterReference , smartPunctuation inline , symbol ] -- | Inline markups -inlineMarkup :: PandocMonad m => ParserT [Char] ParserState m Inlines +inlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines inlineMarkup = choice [ simpleInline (string "??") (B.cite []) , simpleInline (string "**") B.strong , simpleInline (string "__") B.emph @@ -457,33 +459,33 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite []) ] -- | Trademark, registered, copyright -mark :: PandocMonad m => ParserT [Char] st m Inlines +mark :: PandocMonad m => ParserT Text st m Inlines mark = try $ char '(' >> (try tm <|> try reg <|> copy) -reg :: PandocMonad m => ParserT [Char] st m Inlines +reg :: PandocMonad m => ParserT Text st m Inlines reg = do oneOf "Rr" char ')' return $ B.str "\174" -tm :: PandocMonad m => ParserT [Char] st m Inlines +tm :: PandocMonad m => ParserT Text st m Inlines tm = do oneOf "Tt" oneOf "Mm" char ')' return $ B.str "\8482" -copy :: PandocMonad m => ParserT [Char] st m Inlines +copy :: PandocMonad m => ParserT Text st m Inlines copy = do oneOf "Cc" char ')' return $ B.str "\169" -note :: PandocMonad m => ParserT [Char] ParserState m Inlines +note :: PandocMonad m => ParserT Text ParserState m Inlines note = try $ do ref <- char '[' *> many1 digit <* char ']' notes <- stateNotes <$> getState - case lookup ref notes of + case lookup (T.pack ref) notes of Nothing -> Prelude.fail "note not found" Just raw -> B.note <$> parseFromString' parseBlocks raw @@ -500,42 +502,42 @@ stringBreakers :: [Char] stringBreakers = " \t\n\r.,\"'?!;:<>«»„“”‚‘’()[]" wordBoundaries :: [Char] -wordBoundaries = markupChars ++ stringBreakers +wordBoundaries = markupChars <> stringBreakers -- | Parse a hyphened sequence of words -hyphenedWords :: PandocMonad m => ParserT [Char] ParserState m String +hyphenedWords :: PandocMonad m => ParserT Text ParserState m Text hyphenedWords = do x <- wordChunk xs <- many (try $ char '-' >> wordChunk) - return $ intercalate "-" (x:xs) + return $ T.intercalate "-" (x:xs) -wordChunk :: PandocMonad m => ParserT [Char] ParserState m String +wordChunk :: PandocMonad m => ParserT Text ParserState m Text wordChunk = try $ do hd <- noneOf wordBoundaries tl <- many ( noneOf wordBoundaries <|> try (notFollowedBy' note *> oneOf markupChars <* lookAhead (noneOf wordBoundaries) ) ) - return $ hd:tl + return $ T.pack $ hd:tl -- | Any string -str :: PandocMonad m => ParserT [Char] ParserState m Inlines +str :: PandocMonad m => ParserT Text ParserState m Inlines str = do baseStr <- hyphenedWords -- RedCloth compliance : if parsed word is uppercase and immediately -- followed by parens, parens content is unconditionally word acronym fullStr <- option baseStr $ try $ do - guard $ all isUpper baseStr - acro <- enclosed (char '(') (char ')') anyChar' - return $ concat [baseStr, " (", acro, ")"] + guard $ T.all isUpper baseStr + acro <- T.pack <$> enclosed (char '(') (char ')') anyChar' + return $ T.concat [baseStr, " (", acro, ")"] updateLastStrPos return $ B.str fullStr -- | Some number of space chars -whitespace :: PandocMonad m => ParserT [Char] st m Inlines +whitespace :: PandocMonad m => ParserT Text st m Inlines whitespace = many1 spaceChar >> return B.space <?> "whitespace" -- | In Textile, an isolated endline character is a line break -endline :: PandocMonad m => ParserT [Char] ParserState m Inlines +endline :: PandocMonad m => ParserT Text ParserState m Inlines endline = try $ do newline notFollowedBy blankline @@ -543,18 +545,18 @@ endline = try $ do notFollowedBy rawHtmlBlock return B.linebreak -rawHtmlInline :: PandocMonad m => ParserT [Char] ParserState m Inlines +rawHtmlInline :: PandocMonad m => ParserT Text ParserState m Inlines rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag -- | Raw LaTeX Inline -rawLaTeXInline' :: PandocMonad m => ParserT [Char] ParserState m Inlines +rawLaTeXInline' :: PandocMonad m => ParserT Text ParserState m Inlines rawLaTeXInline' = try $ do guardEnabled Ext_raw_tex B.rawInline "latex" <$> rawLaTeXInline -- | Textile standard link syntax is "label":target. But we -- can also have ["label":target]. -link :: PandocMonad m => ParserT [Char] ParserState m Inlines +link :: PandocMonad m => ParserT Text ParserState m Inlines link = try $ do bracketed <- (True <$ char '[') <|> return False char '"' *> notFollowedBy (oneOf " \t\n\r") @@ -567,121 +569,122 @@ link = try $ do else lookAhead $ space <|> eof' <|> try (oneOf "!.,;:" *> (space <|> newline <|> eof')) - url <- many1Till nonspaceChar stop + url <- T.pack <$> many1Till nonspaceChar stop let name' = if B.toList name == [Str "$"] then B.str url else name return $ if attr == nullAttr then B.link url "" name' else B.spanWith attr $ B.link url "" name' -- | image embedding -image :: PandocMonad m => ParserT [Char] ParserState m Inlines +image :: PandocMonad m => ParserT Text ParserState m Inlines image = try $ do char '!' >> notFollowedBy space (ident, cls, kvs) <- attributes let attr = case lookup "style" kvs of Just stls -> (ident, cls, pickStylesToKVs ["width", "height"] stls) Nothing -> (ident, cls, kvs) - src <- many1 (noneOf " \t\n\r!(") - alt <- option "" $ try $ char '(' *> manyTill anyChar (char ')') + src <- T.pack <$> many1 (noneOf " \t\n\r!(") + alt <- fmap T.pack $ option "" $ try $ char '(' *> manyTill anyChar (char ')') char '!' return $ B.imageWith attr src alt (B.str alt) -escapedInline :: PandocMonad m => ParserT [Char] ParserState m Inlines +escapedInline :: PandocMonad m => ParserT Text ParserState m Inlines escapedInline = escapedEqs <|> escapedTag -escapedEqs :: PandocMonad m => ParserT [Char] ParserState m Inlines -escapedEqs = B.str <$> +escapedEqs :: PandocMonad m => ParserT Text ParserState m Inlines +escapedEqs = B.str . T.pack <$> try (string "==" *> manyTill anyChar' (try $ string "==")) -- | literal text escaped btw <notextile> tags -escapedTag :: PandocMonad m => ParserT [Char] ParserState m Inlines -escapedTag = B.str <$> +escapedTag :: PandocMonad m => ParserT Text ParserState m Inlines +escapedTag = B.str . T.pack <$> try (string "<notextile>" *> manyTill anyChar' (try $ string "</notextile>")) -- | Any special symbol defined in wordBoundaries -symbol :: PandocMonad m => ParserT [Char] ParserState m Inlines -symbol = B.str . singleton <$> (notFollowedBy newline *> - notFollowedBy rawHtmlBlock *> - oneOf wordBoundaries) +symbol :: PandocMonad m => ParserT Text ParserState m Inlines +symbol = B.str . T.singleton <$> (notFollowedBy newline *> + notFollowedBy rawHtmlBlock *> + oneOf wordBoundaries) -- | Inline code -code :: PandocMonad m => ParserT [Char] ParserState m Inlines +code :: PandocMonad m => ParserT Text ParserState m Inlines code = code1 <|> code2 -- any character except a newline before a blank line -anyChar' :: PandocMonad m => ParserT [Char] ParserState m Char +anyChar' :: PandocMonad m => ParserT Text ParserState m Char anyChar' = satisfy (/='\n') <|> try (char '\n' <* notFollowedBy blankline) -code1 :: PandocMonad m => ParserT [Char] ParserState m Inlines -code1 = B.code <$> surrounded (char '@') anyChar' +code1 :: PandocMonad m => ParserT Text ParserState m Inlines +code1 = B.code . T.pack <$> surrounded (char '@') anyChar' -code2 :: PandocMonad m => ParserT [Char] ParserState m Inlines +code2 :: PandocMonad m => ParserT Text ParserState m Inlines code2 = do htmlTag (tagOpen (=="tt") null) - B.code <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt")) + B.code . T.pack <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt")) -- | Html / CSS attributes -attributes :: PandocMonad m => ParserT [Char] ParserState m Attr +attributes :: PandocMonad m => ParserT Text ParserState m Attr attributes = foldl (flip ($)) ("",[],[]) <$> try (do special <- option id specialAttribute attrs <- many attribute return (special : attrs)) -specialAttribute :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) +specialAttribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) specialAttribute = do alignStr <- ("center" <$ char '=') <|> ("justify" <$ try (string "<>")) <|> ("right" <$ char '>') <|> ("left" <$ char '<') notFollowedBy spaceChar - return $ addStyle ("text-align:" ++ alignStr) + return $ addStyle $ T.pack $ "text-align:" ++ alignStr -attribute :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) +attribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) attribute = try $ (classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar -classIdAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) +classIdAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) classIdAttr = try $ do -- (class class #id) char '(' - ws <- words `fmap` manyTill anyChar' (char ')') + ws <- T.words `fmap` T.pack <$> manyTill anyChar' (char ')') case reverse ws of - [] -> return $ \(_,_,keyvals) -> ("",[],keyvals) - (('#':ident'):classes') -> return $ \(_,_,keyvals) -> - (ident',classes',keyvals) - classes' -> return $ \(_,_,keyvals) -> - ("",classes',keyvals) - -styleAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) + [] + -> return $ \(_,_,keyvals) -> ("",[],keyvals) + ((T.uncons -> Just ('#', ident')):classes') + -> return $ \(_,_,keyvals) -> (ident',classes',keyvals) + classes' + -> return $ \(_,_,keyvals) -> ("",classes',keyvals) + +styleAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) styleAttr = do style <- try $ enclosed (char '{') (char '}') anyChar' - return $ addStyle style + return $ addStyle $ T.pack style -addStyle :: String -> Attr -> Attr +addStyle :: Text -> Attr -> Attr addStyle style (id',classes,keyvals) = (id',classes,keyvals') where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"] - style' = style ++ ";" ++ concat [v | ("style",v) <- keyvals] + style' = style <> ";" <> T.concat [v | ("style",v) <- keyvals] -langAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr) +langAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) langAttr = do lang <- try $ enclosed (char '[') (char ']') alphaNum - return $ \(id',classes,keyvals) -> (id',classes,("lang",lang):keyvals) + return $ \(id',classes,keyvals) -> (id',classes,("lang",T.pack lang):keyvals) -- | Parses material surrounded by a parser. surrounded :: (PandocMonad m, Show t) - => ParserT [Char] st m t -- ^ surrounding parser - -> ParserT [Char] st m a -- ^ content parser (to be used repeatedly) - -> ParserT [Char] st m [a] + => ParserT Text st m t -- ^ surrounding parser + -> ParserT Text st m a -- ^ content parser (to be used repeatedly) + -> ParserT Text st m [a] surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border) simpleInline :: PandocMonad m - => ParserT [Char] ParserState m t -- ^ surrounding parser + => ParserT Text ParserState m t -- ^ surrounding parser -> (Inlines -> Inlines) -- ^ Inline constructor - -> ParserT [Char] ParserState m Inlines -- ^ content parser (to be used repeatedly) + -> ParserT Text ParserState m Inlines -- ^ content parser (to be used repeatedly) simpleInline border construct = try $ do notAfterString border *> notFollowedBy (oneOf " \t\n\r") @@ -695,7 +698,7 @@ simpleInline border construct = try $ do then body else B.spanWith attr body -groupedInlineMarkup :: PandocMonad m => ParserT [Char] ParserState m Inlines +groupedInlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines groupedInlineMarkup = try $ do char '[' sp1 <- option mempty $ B.space <$ whitespace @@ -704,9 +707,5 @@ groupedInlineMarkup = try $ do char ']' return $ sp1 <> result <> sp2 --- | Create a singleton list -singleton :: a -> [a] -singleton x = [x] - -eof' :: Monad m => ParserT [Char] s m Char +eof' :: Monad m => ParserT Text s m Char eof' = '\n' <$ eof diff --git a/src/Text/Pandoc/Readers/TikiWiki.hs b/src/Text/Pandoc/Readers/TikiWiki.hs index 5daf6b0bb..501c204f5 100644 --- a/src/Text/Pandoc/Readers/TikiWiki.hs +++ b/src/Text/Pandoc/Readers/TikiWiki.hs @@ -43,19 +43,19 @@ readTikiWiki :: PandocMonad m -> m Pandoc readTikiWiki opts s = do res <- readWithM parseTikiWiki def{ stateOptions = opts } - (T.unpack (crFilter s) ++ "\n\n") + (crFilter s <> "\n\n") case res of Left e -> throwError e Right d -> return d -type TikiWikiParser = ParserT [Char] ParserState +type TikiWikiParser = ParserT Text ParserState -- -- utility functions -- -tryMsg :: String -> TikiWikiParser m a -> TikiWikiParser m a -tryMsg msg p = try p <?> msg +tryMsg :: Text -> TikiWikiParser m a -> TikiWikiParser m a +tryMsg msg p = try p <?> (T.unpack msg) skip :: TikiWikiParser m a -> TikiWikiParser m () skip parser = Control.Monad.void parser @@ -89,7 +89,7 @@ block = do <|> para skipMany blankline when (verbosity >= INFO) $ - trace (printf "line %d: %s" (sourceLine pos) (take 60 $ show $ B.toList res)) + trace (T.pack $ printf "line %d: %s" (sourceLine pos) (take 60 $ show $ B.toList res)) return res blockElements :: PandocMonad m => TikiWikiParser m B.Blocks @@ -133,7 +133,7 @@ tableRow :: PandocMonad m => TikiWikiParser m [B.Blocks] tableRow = try $ do -- row <- sepBy1 (many1Till inline $ oneOf "\n|") (try $ string "|" <* notFollowedBy (oneOf "|\n")) -- return $ map (B.plain . mconcat) row - row <- sepBy1 (many1 (noneOf "\n|") >>= parseColumn) (try $ string "|" <* notFollowedBy (oneOf "|\n")) + row <- sepBy1 (many1 (noneOf "\n|") >>= parseColumn . T.pack) (try $ string "|" <* notFollowedBy (oneOf "|\n")) return $ map B.plain row where parseColumn x = do @@ -342,15 +342,15 @@ listItemLine nest = lineContent >>= parseContent lineContent = do content <- anyLine continuation <- optionMaybe listContinuation - return $ filterSpaces content ++ "\n" ++ Data.Maybe.fromMaybe "" continuation - filterSpaces = reverse . dropWhile (== ' ') . reverse + return $ filterSpaces content <> "\n" <> Data.Maybe.fromMaybe "" continuation + filterSpaces = T.dropWhileEnd (== ' ') listContinuation = string (replicate nest '+') >> lineContent parseContent x = do parsed <- parseFromString (many1 inline) x return $ mconcat $ dropWhileEnd (== B.space) parsed -- Turn the CODE macro attributes into Pandoc code block attributes. -mungeAttrs :: [(String, String)] -> (String, [String], [(String, String)]) +mungeAttrs :: [(Text, Text)] -> (Text, [Text], [(Text, Text)]) mungeAttrs rawAttrs = ("", classes, rawAttrs) where -- "colors" is TikiWiki CODE macro for "name of language to do @@ -370,7 +370,7 @@ codeMacro = try $ do string "{CODE(" rawAttrs <- macroAttrs string ")}" - body <- manyTill anyChar (try (string "{CODE}")) + body <- T.pack <$> manyTill anyChar (try (string "{CODE}")) newline if not (null rawAttrs) then @@ -428,9 +428,9 @@ nbsp = try $ do htmlComment :: PandocMonad m => TikiWikiParser m B.Inlines htmlComment = try $ do string "~hc~" - inner <- many1 $ noneOf "~" + inner <- fmap T.pack $ many1 $ noneOf "~" string "~/hc~" - return $ B.str $ " NOT SUPPORTED: ~hc~ (html comment opener) BEGIN: " ++ inner ++ " ~/hc~ :END " + return $ B.str $ " NOT SUPPORTED: ~hc~ (html comment opener) BEGIN: " <> inner <> " ~/hc~ :END " linebreak :: PandocMonad m => TikiWikiParser m B.Inlines linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline) @@ -469,15 +469,15 @@ image = try $ do let title = fromMaybe src $ lookup "desc" rawAttrs let alt = fromMaybe title $ lookup "alt" rawAttrs let classes = map fst $ filter (\(_,b) -> b == "" || b == "y") rawAttrs - if not (null src) + if not (T.null src) then return $ B.imageWith ("", classes, rawAttrs) src title (B.str alt) else - return $ B.str $ " NOT SUPPORTED: image without src attribute BEGIN: {img " ++ printAttrs rawAttrs ++ "} :END " + return $ B.str $ " NOT SUPPORTED: image without src attribute BEGIN: {img " <> printAttrs rawAttrs <> "} :END " where - printAttrs attrs = unwords $ map (\(a, b) -> a ++ "=\"" ++ b ++ "\"") attrs + printAttrs attrs = T.unwords $ map (\(a, b) -> a <> "=\"" <> b <> "\"") attrs -imageAttr :: PandocMonad m => TikiWikiParser m (String, String) +imageAttr :: PandocMonad m => TikiWikiParser m (Text, Text) imageAttr = try $ do key <- many1 (noneOf "=} \t\n") char '=' @@ -485,7 +485,7 @@ imageAttr = try $ do value <- many1 (noneOf "}\"\n") optional $ char '"' optional $ char ',' - return (key, value) + return (T.pack key, T.pack value) -- __strong__ @@ -500,57 +500,57 @@ emph = try $ fmap B.emph (enclosed (string "''") nestedInlines) escapedChar :: PandocMonad m => TikiWikiParser m B.Inlines escapedChar = try $ do string "~" - mNumber <- safeRead <$> many1 digit + mNumber <- safeRead . T.pack <$> many1 digit string "~" return $ B.str $ case mNumber of - Just number -> [toEnum (number :: Int) :: Char] - Nothing -> [] + Just number -> T.singleton $ toEnum (number :: Int) + Nothing -> "" -- UNSUPPORTED, as there doesn't seem to be any facility in calibre -- for this centered :: PandocMonad m => TikiWikiParser m B.Inlines centered = try $ do string "::" - inner <- many1 $ noneOf ":\n" + inner <- fmap T.pack $ many1 $ noneOf ":\n" string "::" - return $ B.str $ " NOT SUPPORTED: :: (centered) BEGIN: ::" ++ inner ++ ":: :END " + return $ B.str $ " NOT SUPPORTED: :: (centered) BEGIN: ::" <> inner <> ":: :END " -- UNSUPPORTED, as there doesn't seem to be any facility in calibre -- for this colored :: PandocMonad m => TikiWikiParser m B.Inlines colored = try $ do string "~~" - inner <- many1 $ noneOf "~\n" + inner <- fmap T.pack $ many1 $ noneOf "~\n" string "~~" - return $ B.str $ " NOT SUPPORTED: ~~ (colored) BEGIN: ~~" ++ inner ++ "~~ :END " + return $ B.str $ " NOT SUPPORTED: ~~ (colored) BEGIN: ~~" <> inner <> "~~ :END " -- UNSUPPORTED, as there doesn't seem to be any facility in calibre -- for this underlined :: PandocMonad m => TikiWikiParser m B.Inlines underlined = try $ do string "===" - inner <- many1 $ noneOf "=\n" + inner <- fmap T.pack $ many1 $ noneOf "=\n" string "===" - return $ B.str $ " NOT SUPPORTED: ==== (underlined) BEGIN: ===" ++ inner ++ "=== :END " + return $ B.str $ " NOT SUPPORTED: ==== (underlined) BEGIN: ===" <> inner <> "=== :END " -- UNSUPPORTED, as there doesn't seem to be any facility in calibre -- for this boxed :: PandocMonad m => TikiWikiParser m B.Inlines boxed = try $ do string "^" - inner <- many1 $ noneOf "^\n" + inner <- fmap T.pack $ many1 $ noneOf "^\n" string "^" - return $ B.str $ " NOT SUPPORTED: ^ (boxed) BEGIN: ^" ++ inner ++ "^ :END " + return $ B.str $ " NOT SUPPORTED: ^ (boxed) BEGIN: ^" <> inner <> "^ :END " -- --text-- strikeout :: PandocMonad m => TikiWikiParser m B.Inlines strikeout = try $ fmap B.strikeout (enclosed (string "--") nestedInlines) -nestedString :: (Show a, PandocMonad m) => TikiWikiParser m a -> TikiWikiParser m String -nestedString end = innerSpace <|> count 1 nonspaceChar +nestedString :: (Show a, PandocMonad m) => TikiWikiParser m a -> TikiWikiParser m Text +nestedString end = innerSpace <|> countChar 1 nonspaceChar where - innerSpace = try $ many1 spaceChar <* notFollowedBy end + innerSpace = try $ T.pack <$> many1 spaceChar <* notFollowedBy end breakChars :: PandocMonad m => TikiWikiParser m B.Inlines breakChars = try $ string "%%%" >> return B.linebreak @@ -564,7 +564,7 @@ superMacro = try $ do string "{SUP(" manyTill anyChar (string ")}") body <- manyTill anyChar (string "{SUP}") - return $ B.superscript $ B.text body + return $ B.superscript $ B.text $ T.pack body -- subscript: baz{TAG(tag=>sub)}sub{TAG}qux / qux{SUB()}sub2{SUB}qux subTag :: PandocMonad m => TikiWikiParser m B.Inlines @@ -575,22 +575,22 @@ subMacro = try $ do string "{SUB(" manyTill anyChar (string ")}") body <- manyTill anyChar (string "{SUB}") - return $ B.subscript $ B.text body + return $ B.subscript $ B.text $ T.pack body -- -+text+- code :: PandocMonad m => TikiWikiParser m B.Inlines code = try $ fmap (B.code . fromEntities) ( between (string "-+") (string "+-") nestedString) -macroAttr :: PandocMonad m => TikiWikiParser m (String, String) +macroAttr :: PandocMonad m => TikiWikiParser m (Text, Text) macroAttr = try $ do key <- many1 (noneOf "=)") char '=' optional $ char '"' value <- many1 (noneOf " )\"") optional $ char '"' - return (key, value) + return (T.pack key, T.pack value) -macroAttrs :: PandocMonad m => TikiWikiParser m [(String, String)] +macroAttrs :: PandocMonad m => TikiWikiParser m [(Text, Text)] macroAttrs = try $ sepEndBy macroAttr spaces -- ~np~ __not bold__ ~/np~ @@ -598,13 +598,13 @@ noparse :: PandocMonad m => TikiWikiParser m B.Inlines noparse = try $ do string "~np~" body <- manyTill anyChar (string "~/np~") - return $ B.str body + return $ B.str $ T.pack body str :: PandocMonad m => TikiWikiParser m B.Inlines -str = fmap B.str (many1 alphaNum <|> count 1 characterReference) +str = fmap B.str (T.pack <$> many1 alphaNum <|> countChar 1 characterReference) symbol :: PandocMonad m => TikiWikiParser m B.Inlines -symbol = fmap B.str (count 1 nonspaceChar) +symbol = fmap B.str (countChar 1 nonspaceChar) -- [[not a link] notExternalLink :: PandocMonad m => TikiWikiParser m B.Inlines @@ -612,14 +612,14 @@ notExternalLink = try $ do start <- string "[[" body <- many (noneOf "\n[]") end <- string "]" - return $ B.text (start ++ body ++ end) + return $ B.text $ T.pack $ start ++ body ++ end -- [http://www.somesite.org url|Some Site title] -- ((internal link)) -- -- The ((...)) wiki links and [...] external links are handled -- exactly the same; this abstracts that out -makeLink :: PandocMonad m => String -> String -> String -> TikiWikiParser m B.Inlines +makeLink :: PandocMonad m => Text -> Text -> Text -> TikiWikiParser m B.Inlines makeLink start middle end = try $ do st <- getState guard $ stateAllowLinks st @@ -627,15 +627,15 @@ makeLink start middle end = try $ do (url, title, anchor) <- wikiLinkText start middle end parsedTitle <- parseFromString (many1 inline) title setState $ st{ stateAllowLinks = True } - return $ B.link (url++anchor) "" $mconcat parsedTitle + return $ B.link (url <> anchor) "" $ mconcat parsedTitle -wikiLinkText :: PandocMonad m => String -> String -> String -> TikiWikiParser m (String, String, String) +wikiLinkText :: PandocMonad m => Text -> Text -> Text -> TikiWikiParser m (Text, Text, Text) wikiLinkText start middle end = do - string start - url <- many1 (noneOf $ middle ++ "\n") + string (T.unpack start) + url <- T.pack <$> many1 (noneOf $ T.unpack middle ++ "\n") seg1 <- option url linkContent seg2 <- option "" linkContent - string end + string (T.unpack end) if seg2 /= "" then return (url, seg2, seg1) @@ -644,7 +644,7 @@ wikiLinkText start middle end = do where linkContent = do char '|' - many (noneOf middle) + T.pack <$> many (noneOf $ T.unpack middle) externalLink :: PandocMonad m => TikiWikiParser m B.Inlines externalLink = makeLink "[" "]|" "]" diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index 0af52e046..996a818fd 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Txt2Tags Copyright : Copyright (C) 2014 Matthew Pickering @@ -18,7 +19,6 @@ import Prelude import Control.Monad (guard, void, when) import Control.Monad.Except (catchError, throwError) import Control.Monad.Reader (Reader, asks, runReader) -import Data.Char (toLower) import Data.Default import Data.List (intercalate, transpose) import Data.Maybe (fromMaybe) @@ -36,13 +36,13 @@ import Text.Pandoc.Parsing hiding (space, spaces, uri) import Text.Pandoc.Shared (compactify, compactifyDL, crFilter, escapeURI, underlineSpan) -type T2T = ParserT String ParserState (Reader T2TMeta) +type T2T = ParserT Text ParserState (Reader T2TMeta) -- | An object for the T2T macros meta information -- the contents of each field is simply substituted verbatim into the file data T2TMeta = T2TMeta { - date :: String -- ^ Current date - , mtime :: String -- ^ Last modification time of infile + date :: Text -- ^ Current date + , mtime :: Text -- ^ Last modification time of infile , infile :: FilePath -- ^ Input file , outfile :: FilePath -- ^ Output file } deriving Show @@ -63,7 +63,7 @@ getT2TMeta = do _ -> catchError (maximum <$> mapM getModTime inps) (const (return "")) - return $ T2TMeta curDate curMtime (intercalate ", " inps) outp + return $ T2TMeta (T.pack curDate) (T.pack curMtime) (intercalate ", " inps) outp -- | Read Txt2Tags from an input string returning a Pandoc document readTxt2Tags :: PandocMonad m @@ -74,14 +74,14 @@ readTxt2Tags opts s = do meta <- getT2TMeta let parsed = flip runReader meta $ readWithM parseT2T (def {stateOptions = opts}) $ - T.unpack (crFilter s) ++ "\n\n" + crFilter s <> "\n\n" case parsed of Right result -> return result Left e -> throwError e -- | Read Txt2Tags (ignoring all macros) from an input string returning -- a Pandoc document --- readTxt2TagsNoMacros :: PandocMonad m => ReaderOptions -> String -> m Pandoc +-- readTxt2TagsNoMacros :: PandocMonad m => ReaderOptions -> Text -> m Pandoc -- readTxt2TagsNoMacros = readTxt2Tags parseT2T :: T2T Pandoc @@ -106,7 +106,7 @@ parseHeader = do header :: T2T () header = titleline >> authorline >> dateline -headerline :: B.ToMetaValue a => String -> T2T a -> T2T () +headerline :: B.ToMetaValue a => Text -> T2T a -> T2T () headerline field p = (() <$ try blankline) <|> (p >>= updateState . B.setMeta field) @@ -123,15 +123,15 @@ authorline = dateline :: T2T () dateline = headerline "date" (trimInlines . mconcat <$> manyTill inline newline) -type Keyword = String -type Value = String +type Keyword = Text +type Value = Text setting :: T2T (Keyword, Value) setting = do string "%!" - keyword <- ignoreSpacesCap (many1 alphaNum) + keyword <- ignoreSpacesCap (many1Char alphaNum) char ':' - value <- ignoreSpacesCap (manyTill anyChar newline) + value <- ignoreSpacesCap (manyTillChar anyChar newline) return (keyword, value) -- Blocks @@ -163,10 +163,10 @@ balancedTitle c = try $ do spaces level <- length <$> many1 (char c) guard (level <= 5) -- Max header level 5 - heading <- manyTill (noneOf "\n\r") (count level (char c)) + heading <- manyTillChar (noneOf "\n\r") (count level (char c)) label <- optionMaybe (enclosed (char '[') (char ']') (alphaNum <|> oneOf "_-")) many spaceChar *> newline - let attr = maybe nullAttr (\x -> (x, [], [])) label + let attr = maybe nullAttr (\x -> (T.pack x, [], [])) label return $ B.headerWith attr level (trimInlines $ B.text heading) para :: T2T Blocks @@ -192,7 +192,7 @@ quote :: T2T Blocks quote = try $ do lookAhead tab rawQuote <- many1 (tab *> optional spaces *> anyLine) - contents <- parseFromString' parseBlocks (intercalate "\n" rawQuote ++ "\n\n") + contents <- parseFromString' parseBlocks (T.intercalate "\n" rawQuote <> "\n\n") return $ B.blockQuote contents commentLine :: T2T Inlines @@ -243,17 +243,17 @@ listItem start end = try $ do markerLength <- try start firstLine <- anyLineNewline blank <- option "" ("\n" <$ blankline) - rest <- concat <$> many (listContinuation markerLength) - parseFromString' end $ firstLine ++ blank ++ rest + rest <- T.concat <$> many (listContinuation markerLength) + parseFromString' end $ firstLine <> blank <> rest -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. listContinuation :: Int - -> T2T String + -> T2T Text listContinuation markerLength = try $ notFollowedBy' (blankline >> blankline) - *> (mappend <$> (concat <$> many1 listLine) - <*> many blankline) + *> (mappend <$> (T.concat <$> many1 listLine) + <*> manyChar blankline) where listLine = try $ indentWith markerLength *> anyLineNewline -- Table @@ -327,16 +327,16 @@ taggedBlock = do -- Generic -genericBlock :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks +genericBlock :: Monoid a => T2T a -> (a -> Blocks) -> Text -> T2T Blocks genericBlock p f s = blockMarkupArea p f s <|> blockMarkupLine p f s -blockMarkupArea :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks +blockMarkupArea :: Monoid a => T2T a -> (a -> Blocks) -> Text -> T2T Blocks blockMarkupArea p f s = try (do - string s *> blankline - f . mconcat <$> manyTill p (eof <|> void (string s *> blankline))) + textStr s *> blankline + f . mconcat <$> manyTill p (eof <|> void (textStr s *> blankline))) -blockMarkupLine :: T2T a -> (a -> Blocks) -> String -> T2T Blocks -blockMarkupLine p f s = try (f <$> (string s *> space *> p)) +blockMarkupLine :: T2T a -> (a -> Blocks) -> Text -> T2T Blocks +blockMarkupLine p f s = try (f <$> (textStr s *> space *> p)) -- Can be in either block or inline position comment :: Monoid a => T2T a @@ -385,15 +385,15 @@ italic :: T2T Inlines italic = inlineMarkup inline B.emph '/' B.str code :: T2T Inlines -code = inlineMarkup ((:[]) <$> anyChar) B.code '`' id +code = inlineMarkup (T.singleton <$> anyChar) B.code '`' id raw :: T2T Inlines -raw = inlineMarkup ((:[]) <$> anyChar) B.text '"' id +raw = inlineMarkup (T.singleton <$> anyChar) B.text '"' id tagged :: T2T Inlines tagged = do target <- getTarget - inlineMarkup ((:[]) <$> anyChar) (B.rawInline target) '\'' id + inlineMarkup (T.singleton <$> anyChar) (B.rawInline target) '\'' id -- Parser for markup indicated by a double character. -- Inline markup is greedy and glued @@ -404,33 +404,33 @@ inlineMarkup :: Monoid a => T2T a -- Content parser -> (a -> Inlines) -- Constructor -> Char -- Fence - -> (String -> a) -- Special Case to handle ****** + -> (Text -> a) -- Special Case to handle ****** -> T2T Inlines inlineMarkup p f c special = try $ do - start <- many1 (char c) - let l = length start + start <- many1Char (char c) + let l = T.length start guard (l >= 2) when (l == 2) (void $ notFollowedBy space) -- We must make sure that there is no space before the start of the -- closing tags - body <- optionMaybe (try $ manyTill (noneOf "\n\r") + body <- optionMaybe (try $ manyTillChar (noneOf "\n\r") (try $ lookAhead (noneOf " " >> string [c,c] ))) case body of Just middle -> do lastChar <- anyChar - end <- many1 (char c) + end <- many1Char (char c) let parser inp = parseFromString' (mconcat <$> many p) inp - let start' = case drop 2 start of + let start' = case T.drop 2 start of "" -> mempty xs -> special xs - body' <- parser (middle ++ [lastChar]) - let end' = case drop 2 end of + body' <- parser (middle <> T.singleton lastChar) + let end' = case T.drop 2 end of "" -> mempty xs -> special xs return $ f (start' `mappend` body' `mappend` end') Nothing -> do -- Either bad or case such as ***** guard (l >= 5) - let body' = replicate (l - 4) c + let body' = T.replicate (l - 4) $ T.singleton c return $ f (special body') link :: T2T Inlines @@ -441,12 +441,12 @@ titleLink :: T2T Inlines titleLink = try $ do char '[' notFollowedBy space - tokens <- sepBy1 (many $ noneOf " ]") space + tokens <- sepBy1 (manyChar $ noneOf " ]") space guard (length tokens >= 2) char ']' let link' = last tokens - guard $ not $ null link' - let tit = unwords (init tokens) + guard $ not $ T.null link' + let tit = T.unwords (init tokens) return $ B.link link' "" (B.text tit) -- Link with image @@ -455,7 +455,7 @@ imageLink = try $ do char '[' body <- image many1 space - l <- manyTill (noneOf "\n\r ") (char ']') + l <- manyTillChar (noneOf "\n\r ") (char ']') return (B.link l "" body) macro :: T2T Inlines @@ -466,7 +466,7 @@ macro = try $ do maybe (return mempty) (\f -> B.str <$> asks f) (lookup name commands) where commands = [ ("date", date), ("mtime", mtime) - , ("infile", infile), ("outfile", outfile)] + , ("infile", T.pack . infile), ("outfile", T.pack . outfile)] -- raw URLs in text are automatically linked url :: T2T Inlines @@ -474,7 +474,7 @@ url = try $ do (rawUrl, escapedUrl) <- try uri <|> emailAddress return $ B.link rawUrl "" (B.str escapedUrl) -uri :: T2T (String, String) +uri :: T2T (Text, Text) uri = try $ do address <- t2tURI return (address, escapeURI address) @@ -486,25 +486,25 @@ uri = try $ do --isT2TURI (parse t2tURI "" -> Right _) = True --isT2TURI _ = False -t2tURI :: T2T String +t2tURI :: T2T Text t2tURI = do - start <- try ((++) <$> proto <*> urlLogin) <|> guess - domain <- many1 chars - sep <- many (char '/') - form' <- option mempty ((:) <$> char '?' <*> many1 form) - anchor' <- option mempty ((:) <$> char '#' <*> many anchor) - return (start ++ domain ++ sep ++ form' ++ anchor') + start <- try ((<>) <$> proto <*> urlLogin) <|> guess + domain <- many1Char chars + sep <- manyChar (char '/') + form' <- option mempty (T.cons <$> char '?' <*> many1Char form) + anchor' <- option mempty (T.cons <$> char '#' <*> manyChar anchor) + return (start <> domain <> sep <> form' <> anchor') where protos = ["http", "https", "ftp", "telnet", "gopher", "wais"] - proto = (++) <$> oneOfStrings protos <*> string "://" - guess = (++) <$> (((++) <$> stringAnyCase "www" <*> option mempty ((:[]) <$> oneOf "23")) - <|> stringAnyCase "ftp") <*> ((:[]) <$> char '.') + proto = (<>) <$> oneOfStrings protos <*> textStr "://" + guess = (<>) <$> (((<>) <$> stringAnyCase "www" <*> option mempty (T.singleton <$> oneOf "23")) + <|> stringAnyCase "ftp") <*> (T.singleton <$> char '.') login = alphaNum <|> oneOf "_.-" - pass = many (noneOf " @") + pass = manyChar (noneOf " @") chars = alphaNum <|> oneOf "%._/~:,=$@&+-" anchor = alphaNum <|> oneOf "%._0" form = chars <|> oneOf ";*" - urlLogin = option mempty $ try ((\x y z -> x ++ y ++ [z]) <$> many1 login <*> option mempty ((:) <$> char ':' <*> pass) <*> char '@') + urlLogin = option mempty $ try ((\x y z -> x <> y <> T.singleton z) <$> many1Char login <*> option mempty (T.cons <$> char ':' <*> pass) <*> char '@') image :: T2T Inlines @@ -512,12 +512,12 @@ image = try $ do -- List taken from txt2tags source let extensions = [".jpg", ".jpeg", ".gif", ".png", ".eps", ".bmp"] char '[' - (path, ext) <- manyUntil (noneOf "\n\t\r ") (oneOfStrings extensions) + (path, ext) <- manyUntilChar (noneOf "\n\t\r ") (oneOfStrings extensions) char ']' - return $ B.image (path ++ ext) "" mempty + return $ B.image (path <> ext) "" mempty -- Characters used in markup -specialChars :: String +specialChars :: [Char] specialChars = "%*-_/|:+;" tab :: T2T Char @@ -526,8 +526,8 @@ tab = char '\t' space :: T2T Char space = char ' ' -spaces :: T2T String -spaces = many space +spaces :: T2T Text +spaces = manyChar space endline :: T2T Inlines endline = try $ do @@ -544,17 +544,17 @@ endline = try $ do return B.softbreak str :: T2T Inlines -str = try $ B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") +str = try $ B.str <$> many1Char (noneOf $ specialChars ++ "\n\r ") whitespace :: T2T Inlines whitespace = try $ B.space <$ spaceChar symbol :: T2T Inlines -symbol = B.str . (:[]) <$> oneOf specialChars +symbol = B.str . T.singleton <$> oneOf specialChars -- Utility -getTarget :: T2T String +getTarget :: T2T Text getTarget = do mv <- lookupMeta "target" . stateMeta <$> getState return $ case mv of @@ -565,5 +565,5 @@ getTarget = do atStart :: T2T () atStart = (sourceColumn <$> getPosition) >>= guard . (== 1) -ignoreSpacesCap :: T2T String -> T2T String -ignoreSpacesCap p = map toLower <$> (spaces *> p <* spaces) +ignoreSpacesCap :: T2T Text -> T2T Text +ignoreSpacesCap p = T.toLower <$> (spaces *> p <* spaces) diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs index 27b7d7245..f7edabc48 100644 --- a/src/Text/Pandoc/Readers/Vimwiki.hs +++ b/src/Text/Pandoc/Readers/Vimwiki.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE CPP #-} {- | Module : Text.Pandoc.Readers.Vimwiki @@ -51,9 +52,10 @@ import Prelude import Control.Monad (guard) import Control.Monad.Except (throwError) import Data.Default -import Data.List (isInfixOf, isPrefixOf) +import Data.List (isInfixOf) import Data.Maybe import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Builder (Blocks, Inlines, fromList, toList, trimInlines) import qualified Text.Pandoc.Builder as B (blockQuote, bulletList, code, codeBlockWith, definitionList, @@ -73,12 +75,13 @@ import Text.Pandoc.Options (ReaderOptions) import Text.Pandoc.Parsing (ParserState, ParserT, blanklines, emailAddress, many1Till, orderedListMarker, readWithM, registerHeader, spaceChar, stateMeta, - stateOptions, uri) -import Text.Pandoc.Shared (crFilter, splitBy, stringify, stripFirstAndLast, - isURI) + stateOptions, uri, manyTillChar, manyChar, textStr, + many1Char, countChar, many1TillChar) +import Text.Pandoc.Shared (crFilter, splitTextBy, stringify, stripFirstAndLast, + isURI, tshow) import Text.Parsec.Char (alphaNum, anyChar, char, newline, noneOf, oneOf, space, spaces, string) -import Text.Parsec.Combinator (between, choice, count, eof, lookAhead, many1, +import Text.Parsec.Combinator (between, choice, eof, lookAhead, many1, manyTill, notFollowedBy, option, skipMany1) import Text.Parsec.Prim (getState, many, try, updateState, (<|>)) @@ -128,7 +131,7 @@ block = do , definitionList , para ] - trace (take 60 $ show $ toList res) + trace (T.take 60 $ tshow $ toList res) return res blockML :: PandocMonad m => VwParser m Blocks @@ -218,32 +221,32 @@ defMarkerM = string "::" >> spaceChar defMarkerE :: PandocMonad m => VwParser m Char defMarkerE = string "::" >> newline -hasDefMarkerM :: PandocMonad m => VwParser m String -hasDefMarkerM = manyTill (noneOf "\n") (try defMarkerM) +hasDefMarkerM :: PandocMonad m => VwParser m Text +hasDefMarkerM = manyTillChar (noneOf "\n") (try defMarkerM) preformatted :: PandocMonad m => VwParser m Blocks preformatted = try $ do many spaceChar >> string "{{{" - attrText <- many (noneOf "\n") + attrText <- manyChar (noneOf "\n") lookAhead newline - contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}}" + contents <- manyTillChar anyChar (try (char '\n' >> many spaceChar >> string "}}}" >> many spaceChar >> newline)) - if (contents /= "") && (head contents == '\n') - then return $ B.codeBlockWith (makeAttr attrText) (tail contents) + if (contents /= "") && (T.head contents == '\n') + then return $ B.codeBlockWith (makeAttr attrText) (T.tail contents) else return $ B.codeBlockWith (makeAttr attrText) contents -makeAttr :: String -> Attr +makeAttr :: Text -> Attr makeAttr s = - let xs = splitBy (`elem` " \t") s in + let xs = splitTextBy (`elem` (" \t" :: String)) s in ("", [], mapMaybe nameValue xs) -nameValue :: String -> Maybe (String, String) +nameValue :: Text -> Maybe (Text, Text) nameValue s = - let t = splitBy (== '=') s in + let t = splitTextBy (== '=') s in if length t /= 2 then Nothing else let (a, b) = (head t, last t) in - if (length b < 2) || ((head b, last b) /= ('"', '"')) + if (T.length b < 2) || ((T.head b, T.last b) /= ('"', '"')) then Nothing else Just (a, stripFirstAndLast b) @@ -253,16 +256,16 @@ displayMath = try $ do many spaceChar >> string "{{$" mathTag <- option "" mathTagParser many space - contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}$" + contents <- manyTillChar anyChar (try (char '\n' >> many spaceChar >> string "}}$" >> many spaceChar >> newline)) let contentsWithTags | mathTag == "" = contents - | otherwise = "\\begin{" ++ mathTag ++ "}\n" ++ contents - ++ "\n\\end{" ++ mathTag ++ "}" + | otherwise = "\\begin{" <> mathTag <> "}\n" <> contents + <> "\n\\end{" <> mathTag <> "}" return $ B.para $ B.displayMath contentsWithTags -mathTagLaTeX :: String -> String +mathTagLaTeX :: Text -> Text mathTagLaTeX s = case s of "equation" -> "" "equation*" -> "" @@ -360,17 +363,17 @@ combineList x [y] = case toList y of _ -> x:[y] combineList x xs = x:xs -listStart :: PandocMonad m => VwParser m (Int, String) +listStart :: PandocMonad m => VwParser m (Int, Text) listStart = try $ do s <- many spaceChar listType <- bulletListMarkers <|> orderedListMarkers spaceChar return (length s, listType) -bulletListMarkers :: PandocMonad m => VwParser m String +bulletListMarkers :: PandocMonad m => VwParser m Text bulletListMarkers = "ul" <$ (char '*' <|> char '-') -orderedListMarkers :: PandocMonad m => VwParser m String +orderedListMarkers :: PandocMonad m => VwParser m Text orderedListMarkers = ("ol" <$choice (orderedListMarker Decimal Period:(($OneParen) . orderedListMarker <$> [Decimal, LowerRoman, UpperRoman, LowerAlpha, UpperAlpha]))) <|> ("ol" <$ char '#') @@ -421,9 +424,9 @@ placeholder :: PandocMonad m => VwParser m () placeholder = try $ choice (ph <$> ["title", "date"]) <|> noHtmlPh <|> templatePh -ph :: PandocMonad m => String -> VwParser m () +ph :: PandocMonad m => Text -> VwParser m () ph s = try $ do - many spaceChar >>string ('%':s) >> spaceChar + many spaceChar >> textStr (T.cons '%' s) >> spaceChar contents <- trimInlines . mconcat <$> manyTill inline (lookAhead newline) --use lookAhead because of placeholder in the whitespace parser let meta' = B.setMeta s contents nullMeta @@ -476,7 +479,7 @@ inlineML :: PandocMonad m => VwParser m Inlines inlineML = choice $ whitespace endlineML:inlineList str :: PandocMonad m => VwParser m Inlines -str = B.str <$>many1 (noneOf $ spaceChars ++ specialChars) +str = B.str <$> many1Char (noneOf $ spaceChars ++ specialChars) whitespace :: PandocMonad m => VwParser m () -> VwParser m Inlines whitespace endline = B.space <$ (skipMany1 spaceChar <|> @@ -487,7 +490,7 @@ whitespace' :: PandocMonad m => VwParser m Inlines whitespace' = B.space <$ skipMany1 spaceChar special :: PandocMonad m => VwParser m Inlines -special = B.str <$> count 1 (oneOf specialChars) +special = B.str <$> countChar 1 (oneOf specialChars) bareURL :: PandocMonad m => VwParser m Inlines bareURL = try $ do @@ -505,8 +508,8 @@ strong = try $ do return $ B.spanWith (makeId contents, [], []) mempty <> B.strong contents -makeId :: Inlines -> String -makeId i = concat (stringify <$> toList i) +makeId :: Inlines -> Text +makeId i = T.concat (stringify <$> toList i) emph :: PandocMonad m => VwParser m Inlines emph = try $ do @@ -527,7 +530,7 @@ strikeout = try $ do code :: PandocMonad m => VwParser m Inlines code = try $ do char '`' - contents <- many1Till (noneOf "\n") (char '`') + contents <- many1TillChar (noneOf "\n") (char '`') return $ B.code contents superscript :: PandocMonad m => VwParser m Inlines @@ -542,8 +545,8 @@ subscript = try $ link :: PandocMonad m => VwParser m Inlines link = try $ do string "[[" - contents <- lookAhead $ manyTill anyChar (string "]]") - case '|' `elem` contents of + contents <- lookAhead $ manyTillChar anyChar (string "]]") + case T.any (== '|') contents of False -> do manyTill anyChar (string "]]") -- not using try here because [[hell]o]] is not rendered as a link in vimwiki @@ -552,7 +555,7 @@ link = try $ do else "wikilink" return $ B.link (procLink contents) tit (B.str contents) True -> do - url <- manyTill anyChar $ char '|' + url <- manyTillChar anyChar $ char '|' lab <- mconcat <$> manyTill inline (string "]]") let tit = if isURI url then "" @@ -568,52 +571,52 @@ image = try $ do images :: PandocMonad m => Int -> VwParser m Inlines images k | k == 0 = do - imgurl <- manyTill anyChar (try $ string "}}") + imgurl <- manyTillChar anyChar (try $ string "}}") return $ B.image (procImgurl imgurl) "" (B.str "") | k == 1 = do - imgurl <- manyTill anyChar (char '|') + imgurl <- manyTillChar anyChar (char '|') alt <- mconcat <$> manyTill inline (try $ string "}}") return $ B.image (procImgurl imgurl) "" alt | k == 2 = do - imgurl <- manyTill anyChar (char '|') - alt <- mconcat <$>manyTill inline (char '|') - attrText <- manyTill anyChar (try $ string "}}") + imgurl <- manyTillChar anyChar (char '|') + alt <- mconcat <$> manyTill inline (char '|') + attrText <- manyTillChar anyChar (try $ string "}}") return $ B.imageWith (makeAttr attrText) (procImgurl imgurl) "" alt | otherwise = do - imgurl <- manyTill anyChar (char '|') - alt <- mconcat <$>manyTill inline (char '|') - attrText <- manyTill anyChar (char '|') + imgurl <- manyTillChar anyChar (char '|') + alt <- mconcat <$> manyTill inline (char '|') + attrText <- manyTillChar anyChar (char '|') manyTill anyChar (try $ string "}}") return $ B.imageWith (makeAttr attrText) (procImgurl imgurl) "" alt -procLink' :: String -> String +procLink' :: Text -> Text procLink' s - | take 6 s == "local:" = "file" ++ drop 5 s - | take 6 s == "diary:" = "diary/" ++ drop 6 s - | or ((`isPrefixOf` s) <$> [ "http:", "https:", "ftp:", "file:", "mailto:", + | T.take 6 s == "local:" = "file" <> T.drop 5 s + | T.take 6 s == "diary:" = "diary/" <> T.drop 6 s + | or ((`T.isPrefixOf` s) <$> [ "http:", "https:", "ftp:", "file:", "mailto:", "news:", "telnet:" ]) = s | s == "" = "" - | last s == '/' = s + | T.last s == '/' = s | otherwise = s -procLink :: String -> String -procLink s = procLink' x ++ y - where (x, y) = break (=='#') s +procLink :: Text -> Text +procLink s = procLink' x <> y + where (x, y) = T.break (=='#') s -procImgurl :: String -> String -procImgurl s = if take 6 s == "local:" then "file" ++ drop 5 s else s +procImgurl :: Text -> Text +procImgurl s = if T.take 6 s == "local:" then "file" <> T.drop 5 s else s inlineMath :: PandocMonad m => VwParser m Inlines inlineMath = try $ - B.math <$ char '$' <*> many1Till (noneOf "\n") (char '$') + B.math <$ char '$' <*> many1TillChar (noneOf "\n") (char '$') tag :: PandocMonad m => VwParser m Inlines tag = try $ do char ':' - s <- manyTill (noneOf spaceChars) (try (char ':' >> lookAhead space)) - guard $ not $ "::" `isInfixOf` (":" ++ s ++ ":") - let ss = splitBy (==':') s + s <- manyTillChar (noneOf spaceChars) (try (char ':' >> lookAhead space)) + guard $ not $ "::" `T.isInfixOf` (":" <> s <> ":") + let ss = splitTextBy (==':') s return $ mconcat $ makeTagSpan' (head ss):(makeTagSpan <$> tail ss) todoMark :: PandocMonad m => VwParser m Inlines @@ -646,16 +649,16 @@ nFBTTBSB = hasDefMarker :: PandocMonad m => VwParser m () hasDefMarker = () <$ manyTill (noneOf "\n") (string "::" >> oneOf spaceChars) -makeTagSpan' :: String -> Inlines -makeTagSpan' s = B.spanWith ('-' : s, [], []) (B.str "") <> +makeTagSpan' :: Text -> Inlines +makeTagSpan' s = B.spanWith (T.cons '-' s, [], []) (B.str "") <> B.spanWith (s, ["tag"], []) (B.str s) -makeTagSpan :: String -> Inlines +makeTagSpan :: Text -> Inlines makeTagSpan s = B.space <> makeTagSpan' s -mathTagParser :: PandocMonad m => VwParser m String +mathTagParser :: PandocMonad m => VwParser m Text mathTagParser = do - s <- try $ lookAhead (char '%' >> manyTill (noneOf spaceChars) + s <- try $ lookAhead (char '%' >> manyTillChar (noneOf spaceChars) (try $ char '%' >> many (noneOf $ '%':spaceChars) >> space)) - char '%' >> string s >> char '%' + char '%' >> textStr s >> char '%' return $ mathTagLaTeX s |