aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/CommonMark.hs45
-rw-r--r--src/Text/Pandoc/Readers/Creole.hs28
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs102
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs76
-rw-r--r--src/Text/Pandoc/Readers/Docx/Combine.hs3
-rw-r--r--src/Text/Pandoc/Readers/Docx/Fields.hs33
-rw-r--r--src/Text/Pandoc/Readers/Docx/Lists.hs11
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs171
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse/Styles.hs48
-rw-r--r--src/Text/Pandoc/Readers/Docx/Util.hs9
-rw-r--r--src/Text/Pandoc/Readers/DokuWiki.hs187
-rw-r--r--src/Text/Pandoc/Readers/EPUB.hs53
-rw-r--r--src/Text/Pandoc/Readers/FB2.hs123
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs231
-rw-r--r--src/Text/Pandoc/Readers/Haddock.hs52
-rw-r--r--src/Text/Pandoc/Readers/Ipynb.hs79
-rw-r--r--src/Text/Pandoc/Readers/JATS.hs76
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs334
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Lang.hs16
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Parsing.hs32
-rw-r--r--src/Text/Pandoc/Readers/Man.hs32
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs426
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs198
-rw-r--r--src/Text/Pandoc/Readers/Muse.hs144
-rw-r--r--src/Text/Pandoc/Readers/Native.hs13
-rw-r--r--src/Text/Pandoc/Readers/OPML.hs29
-rw-r--r--src/Text/Pandoc/Readers/Odt.hs3
-rw-r--r--src/Text/Pandoc/Readers/Odt/ContentReader.hs61
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs33
-rw-r--r--src/Text/Pandoc/Readers/Odt/StyleReader.hs10
-rw-r--r--src/Text/Pandoc/Readers/Org.hs4
-rw-r--r--src/Text/Pandoc/Readers/Org/BlockStarts.hs25
-rw-r--r--src/Text/Pandoc/Readers/Org/Blocks.hs219
-rw-r--r--src/Text/Pandoc/Readers/Org/DocumentTree.hs44
-rw-r--r--src/Text/Pandoc/Readers/Org/ExportSettings.hs40
-rw-r--r--src/Text/Pandoc/Readers/Org/Inlines.hs154
-rw-r--r--src/Text/Pandoc/Readers/Org/Meta.hs72
-rw-r--r--src/Text/Pandoc/Readers/Org/ParserState.hs27
-rw-r--r--src/Text/Pandoc/Readers/Org/Parsing.hs27
-rw-r--r--src/Text/Pandoc/Readers/Org/Shared.hs50
-rw-r--r--src/Text/Pandoc/Readers/RST.hs486
-rw-r--r--src/Text/Pandoc/Readers/Roff.hs185
-rw-r--r--src/Text/Pandoc/Readers/TWiki.hs117
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs273
-rw-r--r--src/Text/Pandoc/Readers/TikiWiki.hs94
-rw-r--r--src/Text/Pandoc/Readers/Txt2Tags.hs132
-rw-r--r--src/Text/Pandoc/Readers/Vimwiki.hs127
47 files changed, 2427 insertions, 2307 deletions
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index 368c86d4f..40b6f77c9 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -1,4 +1,6 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.CommonMark
Copyright : Copyright (C) 2015-2019 John MacFarlane
@@ -18,9 +20,9 @@ where
import Prelude
import CMarkGFM
import Control.Monad.State
-import Data.List (groupBy)
import qualified Data.Set as Set
-import Data.Text (Text, unpack)
+import Data.Text (Text)
+import qualified Data.Text as T
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Emoji (emojiToInline)
@@ -40,24 +42,24 @@ readCommonMark opts s = return $
[ extTable | isEnabled Ext_pipe_tables opts ] ++
[ extAutolink | isEnabled Ext_autolink_bare_uris opts ]
-convertEmojis :: String -> [Inline]
-convertEmojis s@(':':xs) =
- case break (==':') xs of
- (ys,':':zs) ->
+convertEmojis :: Text -> [Inline]
+convertEmojis s@(T.uncons -> Just (':',xs)) =
+ case T.break (==':') xs of
+ (ys, T.uncons -> Just (':',zs)) ->
case emojiToInline ys of
Just em -> em : convertEmojis zs
- Nothing -> Str (':' : ys) : convertEmojis (':':zs)
+ Nothing -> Str (":" <> ys) : convertEmojis (":" <> zs)
_ -> [Str s]
convertEmojis s =
- case break (==':') s of
+ case T.break (==':') s of
("","") -> []
(_,"") -> [Str s]
- (xs,ys) -> Str xs:convertEmojis ys
+ (xs,ys) -> Str xs : convertEmojis ys
addHeaderIdentifiers :: ReaderOptions -> Pandoc -> Pandoc
addHeaderIdentifiers opts doc = evalState (walkM (addHeaderId opts) doc) mempty
-addHeaderId :: ReaderOptions -> Block -> State (Set.Set String) Block
+addHeaderId :: ReaderOptions -> Block -> State (Set.Set Text) Block
addHeaderId opts (Header lev (_,classes,kvs) ils) = do
ids <- get
let ident = uniqueIdent (readerExtensions opts) ils ids
@@ -82,14 +84,14 @@ addBlock _ (Node _ THEMATIC_BREAK _) =
addBlock opts (Node _ BLOCK_QUOTE nodes) =
(BlockQuote (addBlocks opts nodes) :)
addBlock opts (Node _ (HTML_BLOCK t) _)
- | isEnabled Ext_raw_html opts = (RawBlock (Format "html") (unpack t) :)
+ | isEnabled Ext_raw_html opts = (RawBlock (Format "html") t :)
| otherwise = id
-- Note: the cmark parser will never generate CUSTOM_BLOCK,
-- so we don't need to handle it:
addBlock _ (Node _ (CUSTOM_BLOCK _onEnter _onExit) _nodes) =
id
addBlock _ (Node _ (CODE_BLOCK info t) _) =
- (CodeBlock ("", take 1 (words (unpack info)), []) (unpack t) :)
+ (CodeBlock ("", take 1 (T.words info), []) t :)
addBlock opts (Node _ (HEADING lev) nodes) =
(Header lev ("",[],[]) (addInlines opts nodes) :)
addBlock opts (Node _ (LIST listAttrs) nodes) =
@@ -176,29 +178,28 @@ addInlines opts = foldr (addInline opts) []
addInline :: ReaderOptions -> Node -> [Inline] -> [Inline]
addInline opts (Node _ (TEXT t) _) = (foldr ((++) . toinl) [] clumps ++)
- where raw = unpack t
- clumps = groupBy samekind raw
+ where clumps = T.groupBy samekind t
samekind ' ' ' ' = True
samekind ' ' _ = False
samekind _ ' ' = False
samekind _ _ = True
- toinl (' ':_) = [Space]
- toinl xs = if isEnabled Ext_emoji opts
- then convertEmojis xs
- else [Str xs]
+ toinl (T.uncons -> Just (' ', _)) = [Space]
+ toinl xs = if isEnabled Ext_emoji opts
+ then convertEmojis xs
+ else [Str xs]
addInline _ (Node _ LINEBREAK _) = (LineBreak :)
addInline opts (Node _ SOFTBREAK _)
| isEnabled Ext_hard_line_breaks opts = (LineBreak :)
| otherwise = (SoftBreak :)
addInline opts (Node _ (HTML_INLINE t) _)
- | isEnabled Ext_raw_html opts = (RawInline (Format "html") (unpack t) :)
+ | isEnabled Ext_raw_html opts = (RawInline (Format "html") t :)
| otherwise = id
-- Note: the cmark parser will never generate CUSTOM_BLOCK,
-- so we don't need to handle it:
addInline _ (Node _ (CUSTOM_INLINE _onEnter _onExit) _nodes) =
id
addInline _ (Node _ (CODE t) _) =
- (Code ("",[],[]) (unpack t) :)
+ (Code ("",[],[]) t :)
addInline opts (Node _ EMPH nodes) =
(Emph (addInlines opts nodes) :)
addInline opts (Node _ STRONG nodes) =
@@ -206,7 +207,7 @@ addInline opts (Node _ STRONG nodes) =
addInline opts (Node _ STRIKETHROUGH nodes) =
(Strikeout (addInlines opts nodes) :)
addInline opts (Node _ (LINK url title) nodes) =
- (Link nullAttr (addInlines opts nodes) (unpack url, unpack title) :)
+ (Link nullAttr (addInlines opts nodes) (url, title) :)
addInline opts (Node _ (IMAGE url title) nodes) =
- (Image nullAttr (addInlines opts nodes) (unpack url, unpack title) :)
+ (Image nullAttr (addInlines opts nodes) (url, title) :)
addInline _ _ = id
diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs
index ceb63ac84..1aa1dfaa4 100644
--- a/src/Text/Pandoc/Readers/Creole.hs
+++ b/src/Text/Pandoc/Readers/Creole.hs
@@ -19,6 +19,7 @@ import Control.Monad.Except (guard, liftM2, throwError)
import qualified Data.Foldable as F
import Data.Maybe (fromMaybe)
import Data.Text (Text)
+import qualified Data.Text as T
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad (..))
import Text.Pandoc.Definition
@@ -70,7 +71,6 @@ parseCreole = do
eof
return $ B.doc bs
-
--
-- block parsers
--
@@ -92,9 +92,9 @@ nowiki = try $ fmap (B.codeBlock . mconcat) (nowikiStart
>> manyTill content nowikiEnd)
where
content = brackets <|> line
- brackets = try $ option "" ((:[]) <$> newline)
- <+> (char ' ' >> (many (char ' ') <+> string "}}}") <* eol)
- line = option "" ((:[]) <$> newline) <+> manyTill anyChar eol
+ brackets = try $ option "" (T.singleton <$> newline)
+ <+> (char ' ' >> (manyChar (char ' ') <+> textStr "}}}") <* eol)
+ line = option "" (T.singleton <$> newline) <+> manyTillChar anyChar eol
eol = lookAhead $ try $ nowikiEnd <|> newline
nowikiStart = optional newline >> string "{{{" >> skipMany spaceChar >> newline
nowikiEnd = try $ linebreak >> string "}}}" >> skipMany spaceChar >> newline
@@ -106,7 +106,7 @@ header = try $ do
fmap length (many1 (char '='))
guard $ level <= 6
skipSpaces
- content <- B.str <$> manyTill (noneOf "\n") headerEnd
+ content <- B.str <$> manyTillChar (noneOf "\n") headerEnd
return $ B.header level content
where
headerEnd = try $ skipSpaces >> many (char '=') >> skipSpaces >> newline
@@ -204,7 +204,7 @@ inline = choice [ whitespace
escapedChar :: PandocMonad m => CRLParser m B.Inlines
escapedChar =
- fmap (B.str . (:[])) (try $ char '~' >> noneOf "\t\n ")
+ fmap (B.str . T.singleton) (try $ char '~' >> noneOf "\t\n ")
escapedLink :: PandocMonad m => CRLParser m B.Inlines
escapedLink = try $ do
@@ -217,8 +217,8 @@ image = try $ do
(orig, src) <- wikiImg
return $ B.image src "" (B.str orig)
where
- linkSrc = many $ noneOf "|}\n\r\t"
- linkDsc = char '|' >> many (noneOf "}\n\r\t")
+ linkSrc = manyChar $ noneOf "|}\n\r\t"
+ linkDsc = char '|' >> manyChar (noneOf "}\n\r\t")
wikiImg = try $ do
string "{{"
src <- linkSrc
@@ -231,11 +231,11 @@ link = try $ do
(orig, src) <- uriLink <|> wikiLink
return $ B.link src "" orig
where
- linkSrc = many $ noneOf "|]\n\r\t"
- linkDsc :: PandocMonad m => String -> CRLParser m B.Inlines
+ linkSrc = manyChar $ noneOf "|]\n\r\t"
+ linkDsc :: PandocMonad m => Text -> CRLParser m B.Inlines
linkDsc otxt = B.str
<$> try (option otxt
- (char '|' >> many (noneOf "]\n\r\t")))
+ (char '|' >> manyChar (noneOf "]\n\r\t")))
linkImg = try $ char '|' >> image
wikiLink = try $ do
string "[["
@@ -248,7 +248,7 @@ link = try $ do
return (B.str orig, src)
inlineNowiki :: PandocMonad m => CRLParser m B.Inlines
-inlineNowiki = B.code <$> (start >> manyTill (noneOf "\n\r") end)
+inlineNowiki = B.code <$> (start >> manyTillChar (noneOf "\n\r") end)
where
start = try $ string "{{{"
end = try $ string "}}}" >> lookAhead (noneOf "}")
@@ -271,11 +271,11 @@ linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline)
innerNewline = return B.space
symbol :: PandocMonad m => CRLParser m B.Inlines
-symbol = fmap (B.str . (:[])) (oneOf specialChars)
+symbol = fmap (B.str . T.singleton) (oneOf specialChars)
str :: PandocMonad m => CRLParser m B.Inlines
str = let strChar = noneOf ("\t\n " ++ specialChars) in
- fmap B.str (many1 strChar)
+ fmap B.str (many1Char strChar)
bold :: PandocMonad m => CRLParser m B.Inlines
bold = B.strong . mconcat <$>
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 3f44f83f8..ade9d27a3 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.DocBook
Copyright : Copyright (C) 2006-2019 John MacFarlane
@@ -536,20 +537,22 @@ instance Default DBState where
readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readDocBook _ inp = do
- let tree = normalizeTree . parseXML . handleInstructions
- $ T.unpack $ crFilter inp
+ let tree = normalizeTree . parseXML . handleInstructions $ crFilter inp
(bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree
return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
-- We treat <?asciidoc-br?> specially (issue #1236), converting it
-- to <br/>, since xml-light doesn't parse the instruction correctly.
-- Other xml instructions are simply removed from the input stream.
-handleInstructions :: String -> String
-handleInstructions ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions xs
-handleInstructions xs = case break (=='<') xs of
+handleInstructions :: Text -> Text
+handleInstructions = T.pack . handleInstructions' . T.unpack
+
+handleInstructions' :: String -> String
+handleInstructions' ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions' xs
+handleInstructions' xs = case break (=='<') xs of
(ys, []) -> ys
- ([], '<':zs) -> '<' : handleInstructions zs
- (ys, zs) -> ys ++ handleInstructions zs
+ ([], '<':zs) -> '<' : handleInstructions' zs
+ (ys, zs) -> ys ++ handleInstructions' zs
getFigure :: PandocMonad m => Element -> DB m Blocks
getFigure e = do
@@ -580,13 +583,13 @@ convertEntity :: String -> String
convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
-- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> String
+attrValue :: String -> Element -> Text
attrValue attr elt =
- fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
+ maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
-- convenience function
-named :: String -> Element -> Bool
-named s e = qName (elName e) == s
+named :: Text -> Element -> Bool
+named s e = qName (elName e) == T.unpack s
--
@@ -611,7 +614,7 @@ addMetadataFromElement e = do
[z] -> getInlines z >>= addMeta fieldname
zs -> mapM getInlines zs >>= addMeta fieldname
-addMeta :: PandocMonad m => ToMetaValue a => String -> a -> DB m ()
+addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> DB m ()
addMeta field val = modify (setMeta field val)
instance HasMeta DBState where
@@ -638,10 +641,8 @@ admonitionTags :: [String]
admonitionTags = ["important","caution","note","tip","warning"]
-- Trim leading and trailing newline characters
-trimNl :: String -> String
-trimNl = reverse . go . reverse . go
- where go ('\n':xs) = xs
- go xs = xs
+trimNl :: Text -> Text
+trimNl = T.dropAround (== '\n')
-- meld text into beginning of first paragraph of Blocks.
-- assumes Blocks start with a Para; if not, does nothing.
@@ -668,7 +669,7 @@ getMediaobject e = do
h = case atVal "depth" of
"" -> []
d -> [("height", d)]
- atr = (atVal "id", words $ atVal "role", w ++ h)
+ atr = (atVal "id", T.words $ atVal "role", w ++ h)
in return (atVal "fileref", atr)
let getCaption el = case filterChild (\x -> named "caption" x
|| named "textobject" x
@@ -691,8 +692,8 @@ parseBlock :: PandocMonad m => Content -> DB m Blocks
parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
parseBlock (Text (CData _ s _)) = if all isSpace s
then return mempty
- else return $ plain $ trimInlines $ text s
-parseBlock (CRef x) = return $ plain $ str $ map toUpper x
+ else return $ plain $ trimInlines $ text $ T.pack s
+parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x
parseBlock (Elem e) =
case qName (elName e) of
"toc" -> skip -- skip TOC, since in pandoc it's autogenerated
@@ -740,7 +741,7 @@ parseBlock (Elem e) =
"refsect2" -> sect 2
"refsect3" -> sect 3
"refsection" -> gets dbSectionLevel >>= sect . (+1)
- l@_ | l `elem` admonitionTags -> parseAdmonition l
+ l@_ | l `elem` admonitionTags -> parseAdmonition $ T.pack l
"area" -> skip
"areaset" -> skip
"areaspec" -> skip
@@ -800,7 +801,7 @@ parseBlock (Elem e) =
"subtitle" -> return mempty -- handled in parent element
_ -> skip >> getBlocks e
where skip = do
- lift $ report $ IgnoredElement $ qName (elName e)
+ lift $ report $ IgnoredElement $ T.pack $ qName (elName e)
return mempty
parseMixed container conts = do
@@ -818,7 +819,7 @@ parseBlock (Elem e) =
"" -> []
x -> [x]
return $ codeBlockWith (attrValue "id" e, classes', [])
- $ trimNl $ strContentRecursive e
+ $ trimNl $ T.pack $ strContentRecursive e
parseBlockquote = do
attrib <- case filterChild (named "attribution") e of
Nothing -> return mempty
@@ -871,9 +872,9 @@ parseBlock (Elem e) =
_ -> AlignDefault
let toWidth c = case findAttr (unqual "colwidth") c of
Just w -> fromMaybe 0
- $ safeRead $ '0': filter (\x ->
+ $ safeRead $ "0" <> T.filter (\x ->
(x >= '0' && x <= '9')
- || x == '.') w
+ || x == '.') (T.pack w)
Nothing -> 0 :: Double
let numrows = case bodyrows of
[] -> 0
@@ -938,9 +939,9 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
elementToStr x = x
parseInline :: PandocMonad m => Content -> DB m Inlines
-parseInline (Text (CData _ s _)) = return $ text s
+parseInline (Text (CData _ s _)) = return $ text $ T.pack s
parseInline (CRef ref) =
- return $ maybe (text $ map toUpper ref) text $ lookupEntity ref
+ return $ maybe (text $ T.toUpper $ T.pack ref) (text . T.pack) $ lookupEntity ref
parseInline (Elem e) =
case qName (elName e) of
"equation" -> equation e displayMath
@@ -980,7 +981,7 @@ parseInline (Elem e) =
"constant" -> codeWithLang
"userinput" -> codeWithLang
"varargs" -> return $ code "(...)"
- "keycap" -> return (str $ strContent e)
+ "keycap" -> return (str $ T.pack $ strContent e)
"keycombo" -> keycombo <$>
mapM parseInline (elContent e)
"menuchoice" -> menuchoice <$>
@@ -992,20 +993,20 @@ parseInline (Elem e) =
let title = case attrValue "endterm" e of
"" -> maybe "???" xrefTitleByElem
(findElementById linkend content)
- endterm -> maybe "???" strContent
+ endterm -> maybe "???" (T.pack . strContent)
(findElementById endterm content)
- return $ link ('#' : linkend) "" (text title)
- "email" -> return $ link ("mailto:" ++ strContent e) ""
- $ str $ strContent e
- "uri" -> return $ link (strContent e) "" $ str $ strContent e
+ return $ link ("#" <> linkend) "" (text title)
+ "email" -> return $ link ("mailto:" <> T.pack (strContent e)) ""
+ $ str $ T.pack $ strContent e
+ "uri" -> return $ link (T.pack $ strContent e) "" $ str $ T.pack $ strContent e
"ulink" -> link (attrValue "url" e) "" <$> innerInlines
"link" -> do
ils <- innerInlines
let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
- Just h -> h
- _ -> '#' : attrValue "linkend" e
+ Just h -> T.pack h
+ _ -> "#" <> attrValue "linkend" e
let ils' = if ils == mempty then str href else ils
- let attr = (attrValue "id" e, words $ attrValue "role" e, [])
+ let attr = (attrValue "id" e, T.words $ attrValue "role" e, [])
return $ linkWith attr href "" ils'
"foreignphrase" -> emph <$> innerInlines
"emphasis" -> case attrValue "role" e of
@@ -1023,7 +1024,7 @@ parseInline (Elem e) =
"br" -> return linebreak
_ -> skip >> innerInlines
where skip = do
- lift $ report $ IgnoredElement $ qName (elName e)
+ lift $ report $ IgnoredElement $ T.pack $ qName (elName e)
return mempty
innerInlines = (trimInlines . mconcat) <$>
@@ -1032,7 +1033,7 @@ parseInline (Elem e) =
let classes' = case attrValue "language" e of
"" -> []
l -> [l]
- return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e
+ return $ codeWith (attrValue "id" e,classes',[]) $ T.pack $ strContentRecursive e
simpleList = (mconcat . intersperse (str "," <> space)) <$> mapM getInlines
(filterChildren (named "member") e)
segmentedList = do
@@ -1062,8 +1063,8 @@ parseInline (Elem e) =
-- if there's no such attribute, employ some heuristics based on what
-- docbook-xsl does.
xrefTitleByElem el
- | not (null xrefLabel) = xrefLabel
- | otherwise = case qName (elName el) of
+ | not (T.null xrefLabel) = xrefLabel
+ | otherwise = case qName (elName el) of
"chapter" -> descendantContent "title" el
"section" -> descendantContent "title" el
"sect1" -> descendantContent "title" el
@@ -1073,10 +1074,10 @@ parseInline (Elem e) =
"sect5" -> descendantContent "title" el
"cmdsynopsis" -> descendantContent "command" el
"funcsynopsis" -> descendantContent "function" el
- _ -> qName (elName el) ++ "_title"
+ _ -> T.pack $ qName (elName el) ++ "_title"
where
xrefLabel = attrValue "xreflabel" el
- descendantContent name = maybe "???" strContent
+ descendantContent name = maybe "???" (T.pack . strContent)
. filterElementName (\n -> qName n == name)
-- | Extract a math equation from an element
@@ -1088,20 +1089,20 @@ equation
:: Monad m
=> Element
-- ^ The element from which to extract a mathematical equation
- -> (String -> Inlines)
+ -> (Text -> Inlines)
-- ^ A constructor for some Inlines, taking the TeX code as input
-> m Inlines
equation e constructor =
- return $ mconcat $ map constructor $ mathMLEquations ++ latexEquations
+ return $ mconcat $ map constructor $ mathMLEquations <> latexEquations
where
- mathMLEquations :: [String]
+ mathMLEquations :: [Text]
mathMLEquations = map writeTeX $ rights $ readMath
(\x -> qName (elName x) == "math" && qPrefix (elName x) == Just "mml")
- (readMathML . showElement)
+ (readMathML . T.pack . showElement)
- latexEquations :: [String]
+ latexEquations :: [Text]
latexEquations = readMath (\x -> qName (elName x) == "mathphrase")
- (concat . fmap showVerbatimCData . elContent)
+ (T.concat . fmap showVerbatimCData . elContent)
readMath :: (Element -> Bool) -> (Element -> b) -> [b]
readMath childPredicate fromElement =
@@ -1111,9 +1112,10 @@ equation e constructor =
-- | Get the actual text stored in a CData block. 'showContent'
-- returns the text still surrounded by the [[CDATA]] tags.
-showVerbatimCData :: Content -> String
-showVerbatimCData (Text (CData _ d _)) = d
-showVerbatimCData c = showContent c
+showVerbatimCData :: Content -> Text
+showVerbatimCData (Text (CData _ d _)) = T.pack d
+showVerbatimCData c = T.pack $ showContent c
+
-- | Set the prefix of a name to 'Nothing'
removePrefix :: QName -> QName
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 387c3c7e2..cd4ff01db 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -4,6 +4,7 @@
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.Docx
Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
@@ -68,12 +69,12 @@ import Data.Default (Default)
import Data.List (delete, intersect)
import Data.Char (isSpace)
import qualified Data.Map as M
+import qualified Data.Text as T
import Data.Maybe (isJust, fromMaybe)
import Data.Sequence (ViewL (..), viewl)
import qualified Data.Sequence as Seq
import qualified Data.Set as Set
import Text.Pandoc.Builder
--- import Text.Pandoc.Definition
import Text.Pandoc.MediaBag (MediaBag)
import Text.Pandoc.Options
import Text.Pandoc.Readers.Docx.Combine
@@ -101,14 +102,14 @@ readDocx opts bytes
readDocx _ _ =
throwError $ PandocSomeError "couldn't parse docx file"
-data DState = DState { docxAnchorMap :: M.Map String String
- , docxAnchorSet :: Set.Set String
- , docxImmedPrevAnchor :: Maybe String
+data DState = DState { docxAnchorMap :: M.Map T.Text T.Text
+ , docxAnchorSet :: Set.Set T.Text
+ , docxImmedPrevAnchor :: Maybe T.Text
, docxMediaBag :: MediaBag
, docxDropCap :: Inlines
-- keep track of (numId, lvl) values for
-- restarting
- , docxListState :: M.Map (String, String) Integer
+ , docxListState :: M.Map (T.Text, T.Text) Integer
, docxPrevPara :: Inlines
}
@@ -142,7 +143,7 @@ spansToKeep = []
divsToKeep :: [ParaStyleName]
divsToKeep = ["Definition", "Definition Term"]
-metaStyles :: M.Map ParaStyleName String
+metaStyles :: M.Map ParaStyleName T.Text
metaStyles = M.fromList [ ("Title", "title")
, ("Subtitle", "subtitle")
, ("Author", "author")
@@ -167,7 +168,7 @@ isEmptyPar (Paragraph _ parParts) =
isEmptyElem _ = True
isEmptyPar _ = False
-bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map String MetaValue)
+bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map T.Text MetaValue)
bodyPartsToMeta' [] = return M.empty
bodyPartsToMeta' (bp : bps)
| (Paragraph pPr parParts) <- bp
@@ -232,22 +233,22 @@ runElemToInlines Tab = space
runElemToInlines SoftHyphen = text "\xad"
runElemToInlines NoBreakHyphen = text "\x2011"
-runElemToString :: RunElem -> String
-runElemToString (TextRun s) = s
-runElemToString LnBrk = ['\n']
-runElemToString Tab = ['\t']
-runElemToString SoftHyphen = ['\xad']
-runElemToString NoBreakHyphen = ['\x2011']
+runElemToText :: RunElem -> T.Text
+runElemToText (TextRun s) = s
+runElemToText LnBrk = T.singleton '\n'
+runElemToText Tab = T.singleton '\t'
+runElemToText SoftHyphen = T.singleton '\xad'
+runElemToText NoBreakHyphen = T.singleton '\x2011'
-runToString :: Run -> String
-runToString (Run _ runElems) = concatMap runElemToString runElems
-runToString _ = ""
+runToText :: Run -> T.Text
+runToText (Run _ runElems) = T.concat $ map runElemToText runElems
+runToText _ = ""
-parPartToString :: ParPart -> String
-parPartToString (PlainRun run) = runToString run
-parPartToString (InternalHyperLink _ runs) = concatMap runToString runs
-parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs
-parPartToString _ = ""
+parPartToText :: ParPart -> T.Text
+parPartToText (PlainRun run) = runToText run
+parPartToText (InternalHyperLink _ runs) = T.concat $ map runToText runs
+parPartToText (ExternalHyperLink _ runs) = T.concat $ map runToText runs
+parPartToText _ = ""
blacklistedCharStyles :: [CharStyleName]
blacklistedCharStyles = ["Hyperlink"]
@@ -310,7 +311,7 @@ runToInlines :: PandocMonad m => Run -> DocxContext m Inlines
runToInlines (Run rs runElems)
| maybe False isCodeCharStyle $ rParentStyle rs = do
rPr <- resolveDependentRunStyle rs
- let codeString = code $ concatMap runElemToString runElems
+ let codeString = code $ T.concat $ map runElemToText runElems
return $ case rVertAlign rPr of
Just SupScrpt -> superscript codeString
Just SubScrpt -> subscript codeString
@@ -328,17 +329,17 @@ runToInlines (Endnote bps) = do
return $ note blksList
runToInlines (InlineDrawing fp title alt bs ext) = do
(lift . lift) $ P.insertMedia fp Nothing bs
- return $ imageWith (extentToAttr ext) fp title $ text alt
+ return $ imageWith (extentToAttr ext) (T.pack fp) title $ text alt
runToInlines InlineChart = return $ spanWith ("", ["chart"], []) $ text "[CHART]"
extentToAttr :: Extent -> Attr
extentToAttr (Just (w, h)) =
("", [], [("width", showDim w), ("height", showDim h)] )
where
- showDim d = show (d / 914400) ++ "in"
+ showDim d = tshow (d / 914400) <> "in"
extentToAttr _ = nullAttr
-blocksToInlinesWarn :: PandocMonad m => String -> Blocks -> DocxContext m Inlines
+blocksToInlinesWarn :: PandocMonad m => T.Text -> Blocks -> DocxContext m Inlines
blocksToInlinesWarn cmtId blks = do
let blkList = toList blks
notParaOrPlain :: Block -> Bool
@@ -347,7 +348,7 @@ blocksToInlinesWarn cmtId blks = do
notParaOrPlain _ = True
unless ( not (any notParaOrPlain blkList)) $
lift $ P.report $ DocxParserWarning $
- "Docx comment " ++ cmtId ++ " will not retain formatting"
+ "Docx comment " <> cmtId <> " will not retain formatting"
return $ blocksToInlines' blkList
-- The majority of work in this function is done in the primed
@@ -440,12 +441,12 @@ parPartToInlines' (BookMark _ anchor) =
return $ spanWith (newAnchor, ["anchor"], []) mempty
parPartToInlines' (Drawing fp title alt bs ext) = do
(lift . lift) $ P.insertMedia fp Nothing bs
- return $ imageWith (extentToAttr ext) fp title $ text alt
+ return $ imageWith (extentToAttr ext) (T.pack fp) title $ text alt
parPartToInlines' Chart =
return $ spanWith ("", ["chart"], []) $ text "[CHART]"
parPartToInlines' (InternalHyperLink anchor runs) = do
ils <- smushInlines <$> mapM runToInlines runs
- return $ link ('#' : anchor) "" ils
+ return $ link ("#" <> anchor) "" ils
parPartToInlines' (ExternalHyperLink target runs) = do
ils <- smushInlines <$> mapM runToInlines runs
return $ link target "" ils
@@ -463,7 +464,7 @@ isAnchorSpan (Span (_, classes, kvs) _) =
null kvs
isAnchorSpan _ = False
-dummyAnchors :: [String]
+dummyAnchors :: [T.Text]
dummyAnchors = ["_GoBack"]
makeHeaderAnchor :: PandocMonad m => Blocks -> DocxContext m Blocks
@@ -477,7 +478,7 @@ makeHeaderAnchor' (Header n (ident, classes, kvs) ils)
, (Span (anchIdent, ["anchor"], _) cIls) <- c = do
hdrIDMap <- gets docxAnchorMap
exts <- readerExtensions <$> asks docxOptions
- let newIdent = if null ident
+ let newIdent = if T.null ident
then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap)
else ident
newIls = concatMap f ils where f il | il == c = cIls
@@ -490,7 +491,7 @@ makeHeaderAnchor' (Header n (ident, classes, kvs) ils) =
do
hdrIDMap <- gets docxAnchorMap
exts <- readerExtensions <$> asks docxOptions
- let newIdent = if null ident
+ let newIdent = if T.null ident
then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap)
else ident
modify $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap}
@@ -558,8 +559,8 @@ parStyleToTransform pPr
else transform
parStyleToTransform _ = return id
-normalizeToClassName :: (FromStyleName a) => a -> String
-normalizeToClassName = map go . fromStyleName
+normalizeToClassName :: (FromStyleName a) => a -> T.Text
+normalizeToClassName = T.map go . fromStyleName
where go c | isSpace c = '-'
| otherwise = c
@@ -574,7 +575,8 @@ bodyPartToBlocks (Paragraph pPr parparts)
return $
transform $
codeBlock $
- concatMap parPartToString parparts
+ T.concat $
+ map parPartToText parparts
| Just (style, n) <- pHeading pPr = do
ils <-local (\s-> s{docxInHeaderBlock=True})
(smushInlines <$> mapM parPartToInlines parparts)
@@ -646,7 +648,7 @@ bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do
, ("num-id", numId)
, ("format", fmt)
, ("text", txt)
- , ("start", show start)
+ , ("start", tshow start)
]
modify $ \st -> st{ docxListState =
-- expire all the continuation data for lists of level > this one:
@@ -705,12 +707,12 @@ bodyPartToBlocks (OMathPara e) =
-- replace targets with generated anchors.
rewriteLink' :: PandocMonad m => Inline -> DocxContext m Inline
-rewriteLink' l@(Link attr ils ('#':target, title)) = do
+rewriteLink' l@(Link attr ils (T.uncons -> Just ('#',target), title)) = do
anchorMap <- gets docxAnchorMap
case M.lookup target anchorMap of
Just newTarget -> do
modify $ \s -> s{docxAnchorSet = Set.insert newTarget (docxAnchorSet s)}
- return $ Link attr ils ('#':newTarget, title)
+ return $ Link attr ils ("#" <> newTarget, title)
Nothing -> do
modify $ \s -> s{docxAnchorSet = Set.insert target (docxAnchorSet s)}
return l
diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs
index da40a80ea..82791d669 100644
--- a/src/Text/Pandoc/Readers/Docx/Combine.hs
+++ b/src/Text/Pandoc/Readers/Docx/Combine.hs
@@ -1,7 +1,8 @@
-{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE TypeSynonymInstances #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Docx.Combine
Copyright : © 2014-2019 Jesse Rosenthal <jrosenthal@jhu.edu>,
diff --git a/src/Text/Pandoc/Readers/Docx/Fields.hs b/src/Text/Pandoc/Readers/Docx/Fields.hs
index e7a916f1c..05d9dd697 100644
--- a/src/Text/Pandoc/Readers/Docx/Fields.hs
+++ b/src/Text/Pandoc/Readers/Docx/Fields.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Docx.Fields
Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
@@ -16,16 +17,18 @@ module Text.Pandoc.Readers.Docx.Fields ( FieldInfo(..)
) where
import Prelude
+import Data.Functor (($>))
+import qualified Data.Text as T
import Text.Parsec
-import Text.Parsec.String (Parser)
+import Text.Parsec.Text (Parser)
-type URL = String
+type URL = T.Text
data FieldInfo = HyperlinkField URL
| UnknownField
deriving (Show)
-parseFieldInfo :: String -> Either ParseError FieldInfo
+parseFieldInfo :: T.Text -> Either ParseError FieldInfo
parseFieldInfo = parse fieldInfo ""
fieldInfo :: Parser FieldInfo
@@ -34,31 +37,31 @@ fieldInfo =
<|>
return UnknownField
-escapedQuote :: Parser String
-escapedQuote = string "\\\""
+escapedQuote :: Parser T.Text
+escapedQuote = string "\\\"" $> "\\\""
-inQuotes :: Parser String
+inQuotes :: Parser T.Text
inQuotes =
- (try escapedQuote) <|> (anyChar >>= (\c -> return [c]))
+ (try escapedQuote) <|> (anyChar >>= (\c -> return $ T.singleton c))
-quotedString :: Parser String
+quotedString :: Parser T.Text
quotedString = do
char '"'
- concat <$> manyTill inQuotes (try (char '"'))
+ T.concat <$> manyTill inQuotes (try (char '"'))
-unquotedString :: Parser String
-unquotedString = manyTill anyChar (try $ lookAhead space *> return () <|> eof)
+unquotedString :: Parser T.Text
+unquotedString = T.pack <$> manyTill anyChar (try $ lookAhead space *> return () <|> eof)
-fieldArgument :: Parser String
+fieldArgument :: Parser T.Text
fieldArgument = quotedString <|> unquotedString
-- there are other switches, but this is the only one I've seen in the wild so far, so it's the first one I'll implement. See §17.16.5.25
-hyperlinkSwitch :: Parser (String, String)
+hyperlinkSwitch :: Parser (T.Text, T.Text)
hyperlinkSwitch = do
sw <- string "\\l"
spaces
farg <- fieldArgument
- return (sw, farg)
+ return (T.pack sw, farg)
hyperlink :: Parser URL
hyperlink = do
@@ -68,6 +71,6 @@ hyperlink = do
farg <- fieldArgument
switches <- spaces *> many hyperlinkSwitch
let url = case switches of
- ("\\l", s) : _ -> farg ++ ('#': s)
+ ("\\l", s) : _ -> farg <> "#" <> s
_ -> farg
return url
diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs
index eb24640c5..b7b7a3835 100644
--- a/src/Text/Pandoc/Readers/Docx/Lists.hs
+++ b/src/Text/Pandoc/Readers/Docx/Lists.hs
@@ -22,6 +22,7 @@ import Prelude
import Data.List
import Data.Maybe
import Data.String (fromString)
+import qualified Data.Text as T
import Text.Pandoc.Generic (bottomUp)
import Text.Pandoc.JSON
import Text.Pandoc.Readers.Docx.Parse (ParaStyleName)
@@ -45,20 +46,20 @@ getNumId _ = Nothing
getNumIdN :: Block -> Integer
getNumIdN b = fromMaybe (-1) (getNumId b)
-getText :: Block -> Maybe String
+getText :: Block -> Maybe T.Text
getText (Div (_, _, kvs) _) = lookup "text" kvs
getText _ = Nothing
data ListType = Itemized | Enumerated ListAttributes
-listStyleMap :: [(String, ListNumberStyle)]
+listStyleMap :: [(T.Text, ListNumberStyle)]
listStyleMap = [("upperLetter", UpperAlpha),
("lowerLetter", LowerAlpha),
("upperRoman", UpperRoman),
("lowerRoman", LowerRoman),
("decimal", Decimal)]
-listDelimMap :: [(String, ListNumberDelim)]
+listDelimMap :: [(T.Text, ListNumberDelim)]
listDelimMap = [("%1)", OneParen),
("(%1)", TwoParens),
("%1.", Period)]
@@ -82,11 +83,11 @@ getListType b@(Div (_, _, kvs) _) | isListItem b =
_ -> Nothing
getListType _ = Nothing
-listParagraphDivs :: [String]
+listParagraphDivs :: [T.Text]
listParagraphDivs = ["list-paragraph"]
listParagraphStyles :: [ParaStyleName]
-listParagraphStyles = map fromString listParagraphDivs
+listParagraphStyles = map (fromString . T.unpack) listParagraphDivs
-- This is a first stab at going through and attaching meaning to list
-- paragraphs, without an item marker, following a list item. We
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 889bd80fc..8598ada6f 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -1,5 +1,6 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Docx.Parse
Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
@@ -63,6 +64,7 @@ import qualified Data.ByteString.Lazy as B
import Data.Char (chr, ord, readLitChar)
import Data.List
import qualified Data.Map as M
+import qualified Data.Text as T
import Data.Maybe
import System.FilePath
import Text.Pandoc.Readers.Docx.Util
@@ -71,7 +73,7 @@ import Text.Pandoc.Shared (filteredFilesFromArchive, safeRead)
import qualified Text.Pandoc.UTF8 as UTF8
import Text.TeXMath (Exp)
import Text.TeXMath.Readers.OMML (readOMML)
-import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, stringToFont)
+import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont)
import Text.XML.Light
import qualified Text.XML.Light.Cursor as XMLC
@@ -88,7 +90,7 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes
}
deriving Show
-data ReaderState = ReaderState { stateWarnings :: [String]
+data ReaderState = ReaderState { stateWarnings :: [T.Text]
, stateFldCharState :: FldCharState
}
deriving Show
@@ -119,7 +121,6 @@ eitherToD (Left _) = throwError DocxError
concatMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b]
concatMapM f xs = liftM concat (mapM f xs)
-
-- This is similar to `mapMaybe`: it maps a function returning the D
-- monad over a list, and only keeps the non-erroring return values.
mapD :: (a -> D b) -> [a] -> D [b]
@@ -178,18 +179,18 @@ type ParStyleMap = M.Map ParaStyleId ParStyle
data Numbering = Numbering NameSpaces [Numb] [AbstractNumb]
deriving Show
-data Numb = Numb String String [LevelOverride]
+data Numb = Numb T.Text T.Text [LevelOverride]
deriving Show
-- ilvl startOverride lvl
-data LevelOverride = LevelOverride String (Maybe Integer) (Maybe Level)
+data LevelOverride = LevelOverride T.Text (Maybe Integer) (Maybe Level)
deriving Show
-data AbstractNumb = AbstractNumb String [Level]
+data AbstractNumb = AbstractNumb T.Text [Level]
deriving Show
-- ilvl format string start
-data Level = Level String String String (Maybe Integer)
+data Level = Level T.Text T.Text T.Text (Maybe Integer)
deriving Show
data DocumentLocation = InDocument | InFootnote | InEndnote
@@ -199,11 +200,11 @@ data Relationship = Relationship DocumentLocation RelId Target
deriving Show
data Notes = Notes NameSpaces
- (Maybe (M.Map String Element))
- (Maybe (M.Map String Element))
+ (Maybe (M.Map T.Text Element))
+ (Maybe (M.Map T.Text Element))
deriving Show
-data Comments = Comments NameSpaces (M.Map String Element)
+data Comments = Comments NameSpaces (M.Map T.Text Element)
deriving Show
data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer
@@ -238,8 +239,8 @@ defaultParagraphStyle = ParagraphStyle { pStyle = []
data BodyPart = Paragraph ParagraphStyle [ParPart]
- | ListItem ParagraphStyle String String (Maybe Level) [ParPart]
- | Tbl String TblGrid TblLook [Row]
+ | ListItem ParagraphStyle T.Text T.Text (Maybe Level) [ParPart]
+ | Tbl T.Text TblGrid TblLook [Row]
| OMathPara [Exp]
deriving Show
@@ -279,7 +280,7 @@ data ParPart = PlainRun Run
| BookMark BookMarkId Anchor
| InternalHyperLink Anchor [Run]
| ExternalHyperLink URL [Run]
- | Drawing FilePath String String B.ByteString Extent -- title, alt
+ | Drawing FilePath T.Text T.Text B.ByteString Extent -- title, alt
| Chart -- placeholder for now
| PlainOMath [Exp]
| Field FieldInfo [Run]
@@ -290,28 +291,28 @@ data ParPart = PlainRun Run
data Run = Run RunStyle [RunElem]
| Footnote [BodyPart]
| Endnote [BodyPart]
- | InlineDrawing FilePath String String B.ByteString Extent -- title, alt
+ | InlineDrawing FilePath T.Text T.Text B.ByteString Extent -- title, alt
| InlineChart -- placeholder
deriving Show
-data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen
+data RunElem = TextRun T.Text | LnBrk | Tab | SoftHyphen | NoBreakHyphen
deriving Show
-type Target = String
-type Anchor = String
-type URL = String
-type BookMarkId = String
-type RelId = String
-type ChangeId = String
-type CommentId = String
-type Author = String
-type ChangeDate = String
-type CommentDate = String
+type Target = T.Text
+type Anchor = T.Text
+type URL = T.Text
+type BookMarkId = T.Text
+type RelId = T.Text
+type ChangeId = T.Text
+type CommentId = T.Text
+type Author = T.Text
+type ChangeDate = T.Text
+type CommentDate = T.Text
archiveToDocx :: Archive -> Either DocxError Docx
archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive
-archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [String])
+archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [T.Text])
archiveToDocxWithWarnings archive = do
docXmlPath <- case getDocumentXmlPath archive of
Just fp -> Right fp
@@ -341,7 +342,7 @@ archiveToDocxWithWarnings archive = do
Right doc -> Right (Docx doc, stateWarnings st)
Left e -> Left e
-getDocumentXmlPath :: Archive -> Maybe String
+getDocumentXmlPath :: Archive -> Maybe FilePath
getDocumentXmlPath zf = do
entry <- findEntryByPath "_rels/.rels" zf
relsElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
@@ -394,7 +395,7 @@ constructBogusParStyleData stName = ParStyle
, numInfo = Nothing
, psParentStyle = Nothing
, pStyleName = stName
- , pStyleId = ParaStyleId . filter (/=' ') . fromStyleName $ stName
+ , pStyleId = ParaStyleId . T.filter (/=' ') . fromStyleName $ stName
}
archiveToNotes :: Archive -> Notes
@@ -441,8 +442,8 @@ filePathToRelType path docXmlPath =
relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship
relElemToRelationship relType element | qName (elName element) == "Relationship" =
do
- relId <- findAttr (QName "Id" Nothing Nothing) element
- target <- findAttr (QName "Target" Nothing Nothing) element
+ relId <- findAttrText (QName "Id" Nothing Nothing) element
+ target <- findAttrText (QName "Target" Nothing Nothing) element
return $ Relationship relType relId target
relElemToRelationship _ _ = Nothing
@@ -464,7 +465,7 @@ filePathIsMedia fp =
in
(dir == "word/media/")
-lookupLevel :: String -> String -> Numbering -> Maybe Level
+lookupLevel :: T.Text -> T.Text -> Numbering -> Maybe Level
lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
(absNumId, ovrrides) <- lookup numId $
map (\(Numb nid absnumid ovrRides) -> (nid, (absnumid, ovrRides))) numbs
@@ -483,7 +484,7 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
loElemToLevelOverride :: NameSpaces -> Element -> Maybe LevelOverride
loElemToLevelOverride ns element
| isElem ns "w" "lvlOverride" element = do
- ilvl <- findAttrByName ns "w" "ilvl" element
+ ilvl <- findAttrTextByName ns "w" "ilvl" element
let startOverride = findChildByName ns "w" "startOverride" element
>>= findAttrByName ns "w" "val"
>>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
@@ -495,9 +496,9 @@ loElemToLevelOverride _ _ = Nothing
numElemToNum :: NameSpaces -> Element -> Maybe Numb
numElemToNum ns element
| isElem ns "w" "num" element = do
- numId <- findAttrByName ns "w" "numId" element
+ numId <- findAttrTextByName ns "w" "numId" element
absNumId <- findChildByName ns "w" "abstractNumId" element
- >>= findAttrByName ns "w" "val"
+ >>= findAttrTextByName ns "w" "val"
let lvlOverrides = mapMaybe
(loElemToLevelOverride ns)
(findChildrenByName ns "w" "lvlOverride" element)
@@ -507,7 +508,7 @@ numElemToNum _ _ = Nothing
absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb
absNumElemToAbsNum ns element
| isElem ns "w" "abstractNum" element = do
- absNumId <- findAttrByName ns "w" "abstractNumId" element
+ absNumId <- findAttrTextByName ns "w" "abstractNumId" element
let levelElems = findChildrenByName ns "w" "lvl" element
levels = mapMaybe (levelElemToLevel ns) levelElems
return $ AbstractNumb absNumId levels
@@ -516,11 +517,11 @@ absNumElemToAbsNum _ _ = Nothing
levelElemToLevel :: NameSpaces -> Element -> Maybe Level
levelElemToLevel ns element
| isElem ns "w" "lvl" element = do
- ilvl <- findAttrByName ns "w" "ilvl" element
+ ilvl <- findAttrTextByName ns "w" "ilvl" element
fmt <- findChildByName ns "w" "numFmt" element
- >>= findAttrByName ns "w" "val"
+ >>= findAttrTextByName ns "w" "val"
txt <- findChildByName ns "w" "lvlText" element
- >>= findAttrByName ns "w" "val"
+ >>= findAttrTextByName ns "w" "val"
let start = findChildByName ns "w" "start" element
>>= findAttrByName ns "w" "val"
>>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
@@ -544,11 +545,11 @@ archiveToNumbering :: Archive -> Numbering
archiveToNumbering archive =
fromMaybe (Numbering [] [] []) (archiveToNumbering' archive)
-elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map String Element)
+elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map T.Text Element)
elemToNotes ns notetype element
- | isElem ns "w" (notetype ++ "s") element =
+ | isElem ns "w" (notetype <> "s") element =
let pairs = mapMaybe
- (\e -> findAttrByName ns "w" "id" e >>=
+ (\e -> findAttrTextByName ns "w" "id" e >>=
(\a -> Just (a, e)))
(findChildrenByName ns "w" notetype element)
in
@@ -556,11 +557,11 @@ elemToNotes ns notetype element
M.fromList pairs
elemToNotes _ _ _ = Nothing
-elemToComments :: NameSpaces -> Element -> M.Map String Element
+elemToComments :: NameSpaces -> Element -> M.Map T.Text Element
elemToComments ns element
| isElem ns "w" "comments" element =
let pairs = mapMaybe
- (\e -> findAttrByName ns "w" "id" e >>=
+ (\e -> findAttrTextByName ns "w" "id" e >>=
(\a -> Just (a, e)))
(findChildrenByName ns "w" "comment" element)
in
@@ -632,7 +633,7 @@ testBitMask bitMaskS n =
pHeading :: ParagraphStyle -> Maybe (ParaStyleName, Int)
pHeading = getParStyleField headingLev . pStyle
-pNumInfo :: ParagraphStyle -> Maybe (String, String)
+pNumInfo :: ParagraphStyle -> Maybe (T.Text, T.Text)
pNumInfo = getParStyleField numInfo . pStyle
elemToBodyPart :: NameSpaces -> Element -> D BodyPart
@@ -640,7 +641,7 @@ elemToBodyPart ns element
| isElem ns "w" "p" element
, (c:_) <- findChildrenByName ns "m" "oMathPara" element =
do
- expsLst <- eitherToD $ readOMML $ showElement c
+ expsLst <- eitherToD $ readOMML $ T.pack $ showElement c
return $ OMathPara expsLst
elemToBodyPart ns element
| isElem ns "w" "p" element
@@ -664,7 +665,7 @@ elemToBodyPart ns element
| isElem ns "w" "tbl" element = do
let caption' = findChildByName ns "w" "tblPr" element
>>= findChildByName ns "w" "tblCaption"
- >>= findAttrByName ns "w" "val"
+ >>= findAttrTextByName ns "w" "val"
caption = fromMaybe "" caption'
grid' = case findChildByName ns "w" "tblGrid" element of
Just g -> elemToTblGrid ns g
@@ -687,10 +688,10 @@ lookupRelationship docLocation relid rels =
where
pairs = map (\(Relationship loc relid' target) -> ((loc, relid'), target)) rels
-expandDrawingId :: String -> D (FilePath, B.ByteString)
+expandDrawingId :: T.Text -> D (FilePath, B.ByteString)
expandDrawingId s = do
location <- asks envLocation
- target <- asks (lookupRelationship location s . envRelationships)
+ target <- asks (fmap T.unpack . lookupRelationship location s . envRelationships)
case target of
Just filepath -> do
bytes <- asks (lookup ("word/" ++ filepath) . envMedia)
@@ -699,12 +700,12 @@ expandDrawingId s = do
Nothing -> throwError DocxError
Nothing -> throwError DocxError
-getTitleAndAlt :: NameSpaces -> Element -> (String, String)
+getTitleAndAlt :: NameSpaces -> Element -> (T.Text, T.Text)
getTitleAndAlt ns element =
let mbDocPr = findChildByName ns "wp" "inline" element >>=
findChildByName ns "wp" "docPr"
- title = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "title")
- alt = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "descr")
+ title = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "title")
+ alt = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "descr")
in (title, alt)
elemToParPart :: NameSpaces -> Element -> D ParPart
@@ -716,7 +717,7 @@ elemToParPart ns element
= let (title, alt) = getTitleAndAlt ns drawingElem
a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
- >>= findAttrByName ns "r" "embed"
+ >>= findAttrTextByName ns "r" "embed"
in
case drawing of
Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem)
@@ -726,7 +727,7 @@ elemToParPart ns element
| isElem ns "w" "r" element
, Just _ <- findChildByName ns "w" "pict" element =
let drawing = findElement (elemName ns "v" "imagedata") element
- >>= findAttrByName ns "r" "id"
+ >>= findAttrTextByName ns "r" "id"
in
case drawing of
-- Todo: check out title and attr for deprecated format.
@@ -795,7 +796,7 @@ elemToParPart ns element
fldCharState <- gets stateFldCharState
case fldCharState of
FldCharOpen -> do
- info <- eitherToD $ parseFieldInfo $ strContent instrText
+ info <- eitherToD $ parseFieldInfo $ T.pack $ strContent instrText
modify $ \st -> st{stateFldCharState = FldCharFieldInfo info}
return NullParPart
_ -> return NullParPart
@@ -816,56 +817,56 @@ elemToParPart ns element
return $ ChangedRuns change runs
elemToParPart ns element
| isElem ns "w" "bookmarkStart" element
- , Just bmId <- findAttrByName ns "w" "id" element
- , Just bmName <- findAttrByName ns "w" "name" element =
+ , Just bmId <- findAttrTextByName ns "w" "id" element
+ , Just bmName <- findAttrTextByName ns "w" "name" element =
return $ BookMark bmId bmName
elemToParPart ns element
| isElem ns "w" "hyperlink" element
- , Just relId <- findAttrByName ns "r" "id" element = do
+ , Just relId <- findAttrTextByName ns "r" "id" element = do
location <- asks envLocation
runs <- mapD (elemToRun ns) (elChildren element)
rels <- asks envRelationships
case lookupRelationship location relId rels of
Just target ->
- case findAttrByName ns "w" "anchor" element of
- Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs
+ case findAttrTextByName ns "w" "anchor" element of
+ Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) runs
Nothing -> return $ ExternalHyperLink target runs
Nothing -> return $ ExternalHyperLink "" runs
elemToParPart ns element
| isElem ns "w" "hyperlink" element
- , Just anchor <- findAttrByName ns "w" "anchor" element = do
+ , Just anchor <- findAttrTextByName ns "w" "anchor" element = do
runs <- mapD (elemToRun ns) (elChildren element)
return $ InternalHyperLink anchor runs
elemToParPart ns element
| isElem ns "w" "commentRangeStart" element
- , Just cmtId <- findAttrByName ns "w" "id" element = do
+ , Just cmtId <- findAttrTextByName ns "w" "id" element = do
(Comments _ commentMap) <- asks envComments
case M.lookup cmtId commentMap of
Just cmtElem -> elemToCommentStart ns cmtElem
Nothing -> throwError WrongElem
elemToParPart ns element
| isElem ns "w" "commentRangeEnd" element
- , Just cmtId <- findAttrByName ns "w" "id" element =
+ , Just cmtId <- findAttrTextByName ns "w" "id" element =
return $ CommentEnd cmtId
elemToParPart ns element
| isElem ns "m" "oMath" element =
- fmap PlainOMath (eitherToD $ readOMML $ showElement element)
+ fmap PlainOMath (eitherToD $ readOMML $ T.pack $ showElement element)
elemToParPart _ _ = throwError WrongElem
elemToCommentStart :: NameSpaces -> Element -> D ParPart
elemToCommentStart ns element
| isElem ns "w" "comment" element
- , Just cmtId <- findAttrByName ns "w" "id" element
- , Just cmtAuthor <- findAttrByName ns "w" "author" element
- , Just cmtDate <- findAttrByName ns "w" "date" element = do
+ , Just cmtId <- findAttrTextByName ns "w" "id" element
+ , Just cmtAuthor <- findAttrTextByName ns "w" "author" element
+ , Just cmtDate <- findAttrTextByName ns "w" "date" element = do
bps <- mapD (elemToBodyPart ns) (elChildren element)
return $ CommentStart cmtId cmtAuthor cmtDate bps
elemToCommentStart _ _ = throwError WrongElem
-lookupFootnote :: String -> Notes -> Maybe Element
+lookupFootnote :: T.Text -> Notes -> Maybe Element
lookupFootnote s (Notes _ fns _) = fns >>= M.lookup s
-lookupEndnote :: String -> Notes -> Maybe Element
+lookupEndnote :: T.Text -> Notes -> Maybe Element
lookupEndnote s (Notes _ _ ens) = ens >>= M.lookup s
elemToExtent :: Element -> Extent
@@ -876,7 +877,7 @@ elemToExtent drawingElem =
where
wp_ns = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem
- >>= findAttr (QName at Nothing Nothing) >>= safeRead
+ >>= findAttr (QName at Nothing Nothing) >>= safeRead . T.pack
childElemToRun :: NameSpaces -> Element -> D Run
@@ -887,7 +888,7 @@ childElemToRun ns element
= let (title, alt) = getTitleAndAlt ns element
a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
- >>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
+ >>= findAttrText (QName "embed" (lookup "r" ns) (Just "r"))
in
case drawing of
Just s -> expandDrawingId s >>=
@@ -900,7 +901,7 @@ childElemToRun ns element
= return InlineChart
childElemToRun ns element
| isElem ns "w" "footnoteReference" element
- , Just fnId <- findAttrByName ns "w" "id" element = do
+ , Just fnId <- findAttrTextByName ns "w" "id" element = do
notes <- asks envNotes
case lookupFootnote fnId notes of
Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e)
@@ -908,7 +909,7 @@ childElemToRun ns element
Nothing -> return $ Footnote []
childElemToRun ns element
| isElem ns "w" "endnoteReference" element
- , Just enId <- findAttrByName ns "w" "id" element = do
+ , Just enId <- findAttrTextByName ns "w" "id" element = do
notes <- asks envNotes
case lookupEndnote enId notes of
Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e)
@@ -961,15 +962,15 @@ getParStyleField _ _ = Nothing
getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange
getTrackedChange ns element
| isElem ns "w" "ins" element || isElem ns "w" "moveTo" element
- , Just cId <- findAttrByName ns "w" "id" element
- , Just cAuthor <- findAttrByName ns "w" "author" element
- , Just cDate <- findAttrByName ns "w" "date" element =
+ , Just cId <- findAttrTextByName ns "w" "id" element
+ , Just cAuthor <- findAttrTextByName ns "w" "author" element
+ , Just cDate <- findAttrTextByName ns "w" "date" element =
Just $ TrackedChange Insertion (ChangeInfo cId cAuthor cDate)
getTrackedChange ns element
| isElem ns "w" "del" element || isElem ns "w" "moveFrom" element
- , Just cId <- findAttrByName ns "w" "id" element
- , Just cAuthor <- findAttrByName ns "w" "author" element
- , Just cDate <- findAttrByName ns "w" "date" element =
+ , Just cId <- findAttrTextByName ns "w" "id" element
+ , Just cAuthor <- findAttrTextByName ns "w" "author" element
+ , Just cDate <- findAttrTextByName ns "w" "date" element =
Just $ TrackedChange Deletion (ChangeInfo cId cAuthor cDate)
getTrackedChange _ _ = Nothing
@@ -978,7 +979,7 @@ elemToParagraphStyle ns element sty
| Just pPr <- findChildByName ns "w" "pPr" element =
let style =
mapMaybe
- (fmap ParaStyleId . findAttrByName ns "w" "val")
+ (fmap ParaStyleId . findAttrTextByName ns "w" "val")
(findChildrenByName ns "w" "pStyle" pPr)
in ParagraphStyle
{pStyle = mapMaybe (`M.lookup` sty) style
@@ -1010,7 +1011,7 @@ elemToRunStyleD ns element
charStyles <- asks envCharStyles
let parentSty =
findChildByName ns "w" "rStyle" rPr >>=
- findAttrByName ns "w" "val" >>=
+ findAttrTextByName ns "w" "val" >>=
flip M.lookup charStyles . CharStyleId
return $ elemToRunStyle ns element parentSty
elemToRunStyleD _ _ = return defaultRunStyle
@@ -1020,12 +1021,12 @@ elemToRunElem ns element
| isElem ns "w" "t" element
|| isElem ns "w" "delText" element
|| isElem ns "m" "t" element = do
- let str = strContent element
+ let str = T.pack $ strContent element
font <- asks envFont
case font of
Nothing -> return $ TextRun str
Just f -> return . TextRun $
- map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str
+ T.map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str
| isElem ns "w" "br" element = return LnBrk
| isElem ns "w" "tab" element = return Tab
| isElem ns "w" "softHyphen" element = return SoftHyphen
@@ -1043,11 +1044,11 @@ getSymChar ns element
| Just s <- lowerFromPrivate <$> getCodepoint
, Just font <- getFont =
case readLitChar ("\\x" ++ s) of
- [(char, _)] -> TextRun . maybe "" (:[]) $ getUnicode font char
+ [(char, _)] -> TextRun . maybe "" T.singleton $ getUnicode font char
_ -> TextRun ""
where
getCodepoint = findAttrByName ns "w" "char" element
- getFont = stringToFont =<< findAttrByName ns "w" "font" element
+ getFont = textToFont . T.pack =<< findAttrByName ns "w" "font" element
lowerFromPrivate ('F':xs) = '0':xs
lowerFromPrivate xs = xs
getSymChar _ _ = TextRun ""
@@ -1059,7 +1060,7 @@ elemToRunElems ns element
let qualName = elemName ns "w"
let font = do
fontElem <- findElement (qualName "rFonts") element
- stringToFont =<<
+ textToFont . T.pack =<<
foldr ((<|>) . (flip findAttr fontElem . qualName)) Nothing ["ascii", "hAnsi"]
local (setFont font) (mapD (elemToRunElem ns) (elChildren element))
elemToRunElems _ _ = throwError WrongElem
diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
index ac2d6fa07..f81707e92 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
@@ -3,6 +3,7 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Docx.Parse.Styles
Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
@@ -46,20 +47,19 @@ import Prelude
import Codec.Archive.Zip
import Control.Applicative ((<|>))
import Control.Monad.Except
-import Data.Char (toLower)
-import Data.List
import Data.Function (on)
import Data.String (IsString(..))
import qualified Data.Map as M
+import qualified Data.Text as T
import Data.Maybe
import Data.Coerce
import Text.Pandoc.Readers.Docx.Util
import qualified Text.Pandoc.UTF8 as UTF8
import Text.XML.Light
-newtype CharStyleId = CharStyleId String
+newtype CharStyleId = CharStyleId T.Text
deriving (Show, Eq, Ord, IsString, FromStyleId)
-newtype ParaStyleId = ParaStyleId String
+newtype ParaStyleId = ParaStyleId T.Text
deriving (Show, Eq, Ord, IsString, FromStyleId)
newtype CharStyleName = CharStyleName CIString
@@ -68,25 +68,31 @@ newtype ParaStyleName = ParaStyleName CIString
deriving (Show, Eq, Ord, IsString, FromStyleName)
-- Case-insensitive comparisons
-newtype CIString = CIString String deriving (Show, IsString, FromStyleName)
+newtype CIString = CIString T.Text deriving (Show, IsString, FromStyleName)
class FromStyleName a where
- fromStyleName :: a -> String
+ fromStyleName :: a -> T.Text
instance FromStyleName String where
+ fromStyleName = T.pack
+
+instance FromStyleName T.Text where
fromStyleName = id
class FromStyleId a where
- fromStyleId :: a -> String
+ fromStyleId :: a -> T.Text
instance FromStyleId String where
+ fromStyleId = T.pack
+
+instance FromStyleId T.Text where
fromStyleId = id
instance Eq CIString where
- (==) = (==) `on` map toLower . coerce
+ (==) = (==) `on` T.toCaseFold . coerce
instance Ord CIString where
- compare = compare `on` map toLower . coerce
+ compare = compare `on` T.toCaseFold . coerce
data VertAlign = BaseLn | SupScrpt | SubScrpt
deriving Show
@@ -108,7 +114,7 @@ data RunStyle = RunStyle { isBold :: Maybe Bool
deriving Show
data ParStyle = ParStyle { headingLev :: Maybe (ParaStyleName, Int)
- , numInfo :: Maybe (String, String)
+ , numInfo :: Maybe (T.Text, T.Text)
, psParentStyle :: Maybe ParStyle
, pStyleName :: ParaStyleName
, pStyleId :: ParaStyleId
@@ -146,7 +152,7 @@ isBasedOnStyle ns element parentStyle
, Just styleType <- findAttrByName ns "w" "type" element
, styleType == cStyleType parentStyle
, Just basedOnVal <- findChildByName ns "w" "basedOn" element >>=
- findAttrByName ns "w" "val"
+ findAttrTextByName ns "w" "val"
, Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps)
| isElem ns "w" "style" element
, Just styleType <- findAttrByName ns "w" "type" element
@@ -234,7 +240,7 @@ checkOnOff _ _ _ = Nothing
elemToCharStyle :: NameSpaces
-> Element -> Maybe CharStyle -> Maybe CharStyle
elemToCharStyle ns element parentStyle
- = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element)
+ = CharStyle <$> (CharStyleId <$> findAttrTextByName ns "w" "styleId" element)
<*> getElementStyleName ns element
<*> (Just $ elemToRunStyle ns element parentStyle)
@@ -267,32 +273,32 @@ elemToRunStyle _ _ _ = defaultRunStyle
getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int)
getHeaderLevel ns element
| Just styleName <- getElementStyleName ns element
- , Just n <- stringToInteger =<<
- (stripPrefix "heading " . map toLower $
+ , Just n <- stringToInteger . T.unpack =<<
+ (T.stripPrefix "heading " . T.toLower $
fromStyleName styleName)
, n > 0 = Just (styleName, fromInteger n)
getHeaderLevel _ _ = Nothing
-getElementStyleName :: Coercible String a => NameSpaces -> Element -> Maybe a
+getElementStyleName :: Coercible T.Text a => NameSpaces -> Element -> Maybe a
getElementStyleName ns el = coerce <$>
- ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val")
- <|> findAttrByName ns "w" "styleId" el)
+ ((findChildByName ns "w" "name" el >>= findAttrTextByName ns "w" "val")
+ <|> findAttrTextByName ns "w" "styleId" el)
-getNumInfo :: NameSpaces -> Element -> Maybe (String, String)
+getNumInfo :: NameSpaces -> Element -> Maybe (T.Text, T.Text)
getNumInfo ns element = do
let numPr = findChildByName ns "w" "pPr" element >>=
findChildByName ns "w" "numPr"
lvl = fromMaybe "0" (numPr >>=
findChildByName ns "w" "ilvl" >>=
- findAttrByName ns "w" "val")
+ findAttrTextByName ns "w" "val")
numId <- numPr >>=
findChildByName ns "w" "numId" >>=
- findAttrByName ns "w" "val"
+ findAttrTextByName ns "w" "val"
return (numId, lvl)
elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle
elemToParStyleData ns element parentStyle
- | Just styleId <- findAttrByName ns "w" "styleId" element
+ | Just styleId <- findAttrTextByName ns "w" "styleId" element
, Just styleName <- getElementStyleName ns element
= Just $ ParStyle
{
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
index f4855efd2..0de1114bd 100644
--- a/src/Text/Pandoc/Readers/Docx/Util.hs
+++ b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -19,11 +19,14 @@ module Text.Pandoc.Readers.Docx.Util (
, elemToNameSpaces
, findChildByName
, findChildrenByName
+ , findAttrText
, findAttrByName
+ , findAttrTextByName
) where
import Prelude
import Data.Maybe (mapMaybe)
+import qualified Data.Text as T
import Text.XML.Light
type NameSpaces = [(String, String)]
@@ -55,7 +58,13 @@ findChildrenByName ns pref name el =
let ns' = ns ++ elemToNameSpaces el
in findChildren (elemName ns' pref name) el
+findAttrText :: QName -> Element -> Maybe T.Text
+findAttrText x = fmap T.pack . findAttr x
+
findAttrByName :: NameSpaces -> String -> String -> Element -> Maybe String
findAttrByName ns pref name el =
let ns' = ns ++ elemToNameSpaces el
in findAttr (elemName ns' pref name) el
+
+findAttrTextByName :: NameSpaces -> String -> String -> Element -> Maybe T.Text
+findAttrTextByName a b c = fmap T.pack . findAttrByName a b c
diff --git a/src/Text/Pandoc/Readers/DokuWiki.hs b/src/Text/Pandoc/Readers/DokuWiki.hs
index 60d406df1..3a92cfa19 100644
--- a/src/Text/Pandoc/Readers/DokuWiki.hs
+++ b/src/Text/Pandoc/Readers/DokuWiki.hs
@@ -2,6 +2,7 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.DokuWiki
Copyright : Copyright (C) 2018-2019 Alexander Krotov
@@ -20,8 +21,7 @@ import Control.Monad
import Control.Monad.Except (throwError)
import Data.Char (isAlphaNum, isDigit)
import qualified Data.Foldable as F
-import Data.List (intercalate, transpose, isPrefixOf, isSuffixOf)
-import Data.List.Split (splitOn)
+import Data.List (transpose)
import Data.Maybe (fromMaybe, catMaybes)
import Data.Text (Text)
import qualified Data.Text as T
@@ -31,7 +31,7 @@ import Text.Pandoc.Definition
import Text.Pandoc.Error (PandocError (PandocParsecError))
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed, nested)
-import Text.Pandoc.Shared (crFilter, trim, underlineSpan)
+import Text.Pandoc.Shared (crFilter, trim, underlineSpan, tshow)
-- | Read DokuWiki from an input string and return a Pandoc document.
readDokuWiki :: PandocMonad m
@@ -42,7 +42,7 @@ readDokuWiki opts s = do
let input = crFilter s
res <- runParserT parseDokuWiki def {stateOptions = opts } "source" input
case res of
- Left e -> throwError $ PandocParsecError (T.unpack input) e
+ Left e -> throwError $ PandocParsecError input e
Right d -> return d
type DWParser = ParserT Text ParserState
@@ -71,9 +71,9 @@ parseDokuWiki =
B.doc . mconcat <$> many block <* spaces <* eof
-- | Parse <code> and <file> attributes
-codeLanguage :: PandocMonad m => DWParser m (String, [String], [(String, String)])
+codeLanguage :: PandocMonad m => DWParser m (Text, [Text], [(Text, Text)])
codeLanguage = try $ do
- rawLang <- option "-" (spaceChar *> manyTill anyChar (lookAhead (spaceChar <|> char '>')))
+ rawLang <- option "-" (spaceChar *> manyTillChar anyChar (lookAhead (spaceChar <|> char '>')))
let attr = case rawLang of
"-" -> []
l -> [l]
@@ -81,16 +81,16 @@ codeLanguage = try $ do
-- | Generic parser for <code> and <file> tags
codeTag :: PandocMonad m
- => ((String, [String], [(String, String)]) -> String -> a)
- -> String
+ => ((Text, [Text], [(Text, Text)]) -> Text -> a)
+ -> Text
-> DWParser m a
codeTag f tag = try $ f
<$ char '<'
- <* string tag
+ <* textStr tag
<*> codeLanguage
<* manyTill anyChar (char '>')
<* optional (manyTill spaceChar eol)
- <*> manyTill anyChar (try $ string "</" <* string tag <* char '>')
+ <*> manyTillChar anyChar (try $ string "</" <* textStr tag <* char '>')
-- * Inline parsers
@@ -167,19 +167,19 @@ underlined :: PandocMonad m => DWParser m B.Inlines
underlined = try $ underlineSpan <$> enclosed (string "__") nestedInlines
nowiki :: PandocMonad m => DWParser m B.Inlines
-nowiki = try $ B.text <$ string "<nowiki>" <*> manyTill anyChar (try $ string "</nowiki>")
+nowiki = try $ B.text <$ string "<nowiki>" <*> manyTillChar anyChar (try $ string "</nowiki>")
percent :: PandocMonad m => DWParser m B.Inlines
-percent = try $ B.text <$> enclosed (string "%%") nestedString
+percent = try $ B.text <$> enclosed (string "%%") nestedText
-nestedString :: (Show a, PandocMonad m)
- => DWParser m a -> DWParser m String
-nestedString end = innerSpace <|> count 1 nonspaceChar
+nestedText :: (Show a, PandocMonad m)
+ => DWParser m a -> DWParser m Text
+nestedText end = innerSpace <|> countChar 1 nonspaceChar
where
- innerSpace = try $ many1 spaceChar <* notFollowedBy end
+ innerSpace = try $ many1Char spaceChar <* notFollowedBy end
monospaced :: PandocMonad m => DWParser m B.Inlines
-monospaced = try $ B.code <$> enclosed (string "''") nestedString
+monospaced = try $ B.code <$> enclosed (string "''") nestedText
subscript :: PandocMonad m => DWParser m B.Inlines
subscript = try $ B.subscript <$> between (string "<sub>") (try $ string "</sub>") nestedInlines
@@ -201,12 +201,12 @@ inlineFile :: PandocMonad m => DWParser m B.Inlines
inlineFile = codeTag B.codeWith "file"
inlineHtml :: PandocMonad m => DWParser m B.Inlines
-inlineHtml = try $ B.rawInline "html" <$ string "<html>" <*> manyTill anyChar (try $ string "</html>")
+inlineHtml = try $ B.rawInline "html" <$ string "<html>" <*> manyTillChar anyChar (try $ string "</html>")
inlinePhp :: PandocMonad m => DWParser m B.Inlines
-inlinePhp = try $ B.codeWith ("", ["php"], []) <$ string "<php>" <*> manyTill anyChar (try $ string "</php>")
+inlinePhp = try $ B.codeWith ("", ["php"], []) <$ string "<php>" <*> manyTillChar anyChar (try $ string "</php>")
-makeLink :: (String, String) -> B.Inlines
+makeLink :: (Text, Text) -> B.Inlines
makeLink (text, url) = B.link url "" $ B.str text
autoEmail :: PandocMonad m => DWParser m B.Inlines
@@ -220,7 +220,7 @@ autoLink = try $ do
state <- getState
guard $ stateAllowLinks state
(text, url) <- uri
- guard $ checkLink (last url)
+ guard $ checkLink (T.last url)
return $ makeLink (text, url)
where
checkLink c
@@ -234,10 +234,10 @@ nocache :: PandocMonad m => DWParser m B.Inlines
nocache = try $ mempty <$ string "~~NOCACHE~~"
str :: PandocMonad m => DWParser m B.Inlines
-str = B.str <$> (many1 alphaNum <|> count 1 characterReference)
+str = B.str <$> (many1Char alphaNum <|> countChar 1 characterReference)
symbol :: PandocMonad m => DWParser m B.Inlines
-symbol = B.str <$> count 1 nonspaceChar
+symbol = B.str <$> countChar 1 nonspaceChar
link :: PandocMonad m => DWParser m B.Inlines
link = try $ do
@@ -248,77 +248,78 @@ link = try $ do
setState $ st{ stateAllowLinks = True }
return l
-isExternalLink :: String -> Bool
-isExternalLink s =
- case dropWhile (\c -> isAlphaNum c || (c `elem` ['-', '.', '+'])) s of
- (':':'/':'/':_) -> True
- _ -> False
-
-isAbsolutePath :: String -> Bool
-isAbsolutePath ('.':_) = False
-isAbsolutePath s = ':' `elem` s
-
-normalizeDots :: String -> String
-normalizeDots path@('.':_) =
- case dropWhile (== '.') path of
- ':':_ -> path
- _ -> takeWhile (== '.') path ++ ':':dropWhile (== '.') path
-normalizeDots path = path
+isExternalLink :: Text -> Bool
+isExternalLink s = "://" `T.isPrefixOf` sSuff
+ where
+ sSuff = T.dropWhile (\c -> isAlphaNum c || (c `elem` ['-', '.', '+'])) s
+
+isAbsolutePath :: Text -> Bool
+isAbsolutePath (T.uncons -> Just ('.', _)) = False
+isAbsolutePath s = T.any (== ':') s
+
+normalizeDots :: Text -> Text
+normalizeDots path
+ | not (T.null pref) = case T.uncons suff of
+ Just (':', _) -> path
+ _ -> pref <> ":" <> suff
+ | otherwise = path
+ where
+ (pref, suff) = T.span (== '.') path
-normalizeInternalPath :: String -> String
+normalizeInternalPath :: Text -> Text
normalizeInternalPath path =
if isAbsolutePath path
then ensureAbsolute normalizedPath
else normalizedPath
where
- normalizedPath = intercalate "/" $ dropWhile (== ".") $ splitOn ":" $ normalizeDots path
- ensureAbsolute s@('/':_) = s
- ensureAbsolute s = '/':s
+ normalizedPath = T.intercalate "/" $ dropWhile (== ".") $ T.splitOn ":" $ normalizeDots path
+ ensureAbsolute s@(T.uncons -> Just ('/', _)) = s
+ ensureAbsolute s = "/" <> s
-normalizePath :: String -> String
+normalizePath :: Text -> Text
normalizePath path =
if isExternalLink path
then path
else normalizeInternalPath path
-urlToText :: String -> String
+urlToText :: Text -> Text
urlToText url =
if isExternalLink url
then url
- else reverse $ takeWhile (/= ':') $ reverse url
+ else T.takeWhileEnd (/= ':') url
-- Parse link or image
parseLink :: PandocMonad m
- => (String -> Maybe B.Inlines -> B.Inlines)
- -> String
- -> String
+ => (Text -> Maybe B.Inlines -> B.Inlines)
+ -> Text
+ -> Text
-> DWParser m B.Inlines
parseLink f l r = f
- <$ string l
- <*> many1Till anyChar (lookAhead (void (char '|') <|> try (void $ string r)))
- <*> optionMaybe (B.trimInlines . mconcat <$> (char '|' *> manyTill inline (try $ lookAhead $ string r)))
- <* string r
+ <$ textStr l
+ <*> many1TillChar anyChar (lookAhead (void (char '|') <|> try (void $ textStr r)))
+ <*> optionMaybe (B.trimInlines . mconcat <$> (char '|' *> manyTill inline (try $ lookAhead $ textStr r)))
+ <* textStr r
-- | Split Interwiki link into left and right part
-- | Return Nothing if it is not Interwiki link
-splitInterwiki :: String -> Maybe (String, String)
+splitInterwiki :: Text -> Maybe (Text, Text)
splitInterwiki path =
- case span (\c -> isAlphaNum c || c == '.') path of
- (l, '>':r) -> Just (l, r)
+ case T.span (\c -> isAlphaNum c || c == '.') path of
+ (l, T.uncons -> Just ('>', r)) -> Just (l, r)
_ -> Nothing
-interwikiToUrl :: String -> String -> String
-interwikiToUrl "callto" page = "callto://" ++ page
-interwikiToUrl "doku" page = "https://www.dokuwiki.org/" ++ page
-interwikiToUrl "phpfn" page = "https://secure.php.net/" ++ page
-interwikiToUrl "tel" page = "tel:" ++ page
-interwikiToUrl "wp" page = "https://en.wikipedia.org/wiki/" ++ page
-interwikiToUrl "wpde" page = "https://de.wikipedia.org/wiki/" ++ page
-interwikiToUrl "wpes" page = "https://es.wikipedia.org/wiki/" ++ page
-interwikiToUrl "wpfr" page = "https://fr.wikipedia.org/wiki/" ++ page
-interwikiToUrl "wpjp" page = "https://jp.wikipedia.org/wiki/" ++ page
-interwikiToUrl "wppl" page = "https://pl.wikipedia.org/wiki/" ++ page
-interwikiToUrl _ page = "https://www.google.com/search?q=" ++ page ++ "&btnI=lucky"
+interwikiToUrl :: Text -> Text -> Text
+interwikiToUrl "callto" page = "callto://" <> page
+interwikiToUrl "doku" page = "https://www.dokuwiki.org/" <> page
+interwikiToUrl "phpfn" page = "https://secure.php.net/" <> page
+interwikiToUrl "tel" page = "tel:" <> page
+interwikiToUrl "wp" page = "https://en.wikipedia.org/wiki/" <> page
+interwikiToUrl "wpde" page = "https://de.wikipedia.org/wiki/" <> page
+interwikiToUrl "wpes" page = "https://es.wikipedia.org/wiki/" <> page
+interwikiToUrl "wpfr" page = "https://fr.wikipedia.org/wiki/" <> page
+interwikiToUrl "wpjp" page = "https://jp.wikipedia.org/wiki/" <> page
+interwikiToUrl "wppl" page = "https://pl.wikipedia.org/wiki/" <> page
+interwikiToUrl _ page = "https://www.google.com/search?q=" <> page <> "&btnI=lucky"
linkText :: PandocMonad m => DWParser m B.Inlines
linkText = parseLink fromRaw "[[" "]]"
@@ -338,23 +339,23 @@ linkText = parseLink fromRaw "[[" "]]"
Just (_, r) -> r
-- Matches strings like "100x100" (width x height) and "50" (width)
-isWidthHeightParameter :: String -> Bool
+isWidthHeightParameter :: Text -> Bool
isWidthHeightParameter s =
- case s of
- (x:xs) ->
- isDigit x && case dropWhile isDigit xs of
- ('x':ys@(_:_)) -> all isDigit ys
- "" -> True
+ case T.uncons s of
+ Just (x, xs) ->
+ isDigit x && case T.uncons $ T.dropWhile isDigit xs of
+ Just ('x', ys) | not (T.null ys) -> T.all isDigit ys
+ Nothing -> True
_ -> False
_ -> False
-parseWidthHeight :: String -> (Maybe String, Maybe String)
+parseWidthHeight :: Text -> (Maybe Text, Maybe Text)
parseWidthHeight s = (width, height)
where
- width = Just $ takeWhile isDigit s
+ width = Just $ T.takeWhile isDigit s
height =
- case dropWhile isDigit s of
- ('x':xs) -> Just xs
+ case T.uncons $ T.dropWhile isDigit s of
+ Just ('x', xs) -> Just xs
_ -> Nothing
image :: PandocMonad m => DWParser m B.Inlines
@@ -365,17 +366,17 @@ image = try $ parseLink fromRaw "{{" "}}"
then B.link normalizedPath "" (fromMaybe defaultDescription description)
else B.imageWith ("", classes, attributes) normalizedPath "" (fromMaybe defaultDescription description)
where
- (path', parameters) = span (/= '?') $ trim path
+ (path', parameters) = T.span (/= '?') $ trim path
normalizedPath = normalizePath path'
- leftPadding = " " `isPrefixOf` path
- rightPadding = " " `isSuffixOf` path
+ leftPadding = " " `T.isPrefixOf` path
+ rightPadding = " " `T.isSuffixOf` path
classes =
case (leftPadding, rightPadding) of
(False, False) -> []
(False, True) -> ["align-left"]
(True, False) -> ["align-right"]
(True, True) -> ["align-center"]
- parameterList = splitOn "&" $ drop 1 parameters
+ parameterList = T.splitOn "&" $ T.drop 1 parameters
linkOnly = "linkonly" `elem` parameterList
(width, height) = maybe (Nothing, Nothing) parseWidthHeight (F.find isWidthHeightParameter parameterList)
attributes = catMaybes [fmap ("width",) width, fmap ("height",) height]
@@ -389,7 +390,7 @@ block = do
<|> blockElements
<|> para
skipMany blankline
- trace (take 60 $ show $ B.toList res)
+ trace (T.take 60 $ tshow $ B.toList res)
return res
blockElements :: PandocMonad m => DWParser m B.Blocks
@@ -417,30 +418,30 @@ header = try $ do
attr <- registerHeader nullAttr contents
return $ B.headerWith attr (7 - lev) contents
-list :: PandocMonad m => String -> DWParser m B.Blocks
+list :: PandocMonad m => Text -> DWParser m B.Blocks
list prefix = bulletList prefix <|> orderedList prefix
-bulletList :: PandocMonad m => String -> DWParser m B.Blocks
+bulletList :: PandocMonad m => Text -> DWParser m B.Blocks
bulletList prefix = try $ B.bulletList <$> parseList prefix '*'
-orderedList :: PandocMonad m => String -> DWParser m B.Blocks
+orderedList :: PandocMonad m => Text -> DWParser m B.Blocks
orderedList prefix = try $ B.orderedList <$> parseList prefix '-'
parseList :: PandocMonad m
- => String
+ => Text
-> Char
-> DWParser m [B.Blocks]
parseList prefix marker =
many1 ((<>) <$> item <*> fmap mconcat (many continuation))
where
- continuation = try $ list (" " ++ prefix)
- item = try $ string prefix *> char marker *> char ' ' *> itemContents
+ continuation = try $ list (" " <> prefix)
+ item = try $ textStr prefix *> char marker *> char ' ' *> itemContents
itemContents = B.plain . mconcat <$> many1Till inline' eol
indentedCode :: PandocMonad m => DWParser m B.Blocks
-indentedCode = try $ B.codeBlock . unlines <$> many1 indentedLine
+indentedCode = try $ B.codeBlock . T.unlines <$> many1 indentedLine
where
- indentedLine = try $ string " " *> manyTill anyChar eol
+ indentedLine = try $ string " " *> manyTillChar anyChar eol
quote :: PandocMonad m => DWParser m B.Blocks
quote = try $ nestedQuote 0
@@ -456,13 +457,13 @@ blockHtml :: PandocMonad m => DWParser m B.Blocks
blockHtml = try $ B.rawBlock "html"
<$ string "<HTML>"
<* optional (manyTill spaceChar eol)
- <*> manyTill anyChar (try $ string "</HTML>")
+ <*> manyTillChar anyChar (try $ string "</HTML>")
blockPhp :: PandocMonad m => DWParser m B.Blocks
blockPhp = try $ B.codeBlockWith ("", ["php"], [])
<$ string "<PHP>"
<* optional (manyTill spaceChar eol)
- <*> manyTill anyChar (try $ string "</PHP>")
+ <*> manyTillChar anyChar (try $ string "</PHP>")
table :: PandocMonad m => DWParser m B.Blocks
table = do
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index 8e9746090..93ddeb9ee 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -1,6 +1,7 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.EPUB
Copyright : Copyright (C) 2014-2019 Matthew Pickering
@@ -24,7 +25,8 @@ import Control.DeepSeq (NFData, deepseq)
import Control.Monad (guard, liftM, liftM2, mplus)
import Control.Monad.Except (throwError)
import qualified Data.ByteString.Lazy as BL (ByteString)
-import Data.List (isInfixOf, isPrefixOf)
+import Data.List (isInfixOf)
+import qualified Data.Text as T
import qualified Data.Map as M (Map, elems, fromList, lookup)
import Data.Maybe (fromMaybe, mapMaybe)
import qualified Data.Text.Lazy as TL
@@ -67,9 +69,9 @@ archiveToEPUB os archive = do
-- No need to collapse here as the image path is from the manifest file
let coverDoc = fromMaybe mempty (imageToPandoc <$> cover)
spine <- parseSpine items content
- let escapedSpine = map (escapeURI . takeFileName . fst) spine
+ let escapedSpine = map (escapeURI . T.pack . takeFileName . fst) spine
Pandoc _ bs <-
- foldM' (\a b -> ((a <>) . walk (prependHash escapedSpine))
+ foldM' (\a b -> ((a <>) . walk (prependHash $ escapedSpine))
`liftM` parseSpineElem root b) mempty spine
let ast = coverDoc <> Pandoc meta bs
fetchImages (M.elems items) root archive ast
@@ -79,7 +81,7 @@ archiveToEPUB os archive = do
parseSpineElem :: PandocMonad m => FilePath -> (FilePath, MimeType) -> m Pandoc
parseSpineElem (normalise -> r) (normalise -> path, mime) = do
doc <- mimeToReader mime r path
- let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty
+ let docSpan = B.doc $ B.para $ B.spanWith (T.pack $ takeFileName path, [], []) mempty
return $ docSpan <> doc
mimeToReader :: PandocMonad m => MimeType -> FilePath -> FilePath -> m Pandoc
mimeToReader "application/xhtml+xml" (unEscapeString -> root)
@@ -108,18 +110,19 @@ fetchImages mimes root arc (query iq -> links) =
<$> findEntryByPath abslink arc
iq :: Inline -> [FilePath]
-iq (Image _ _ (url, _)) = [url]
+iq (Image _ _ (url, _)) = [T.unpack url]
iq _ = []
-- Remove relative paths
renameImages :: FilePath -> Inline -> Inline
renameImages root img@(Image attr a (url, b))
- | "data:" `isPrefixOf` url = img
- | otherwise = Image attr a (collapseFilePath (root </> url), b)
+ | "data:" `T.isPrefixOf` url = img
+ | otherwise = Image attr a ( T.pack $ collapseFilePath (root </> T.unpack url)
+ , b)
renameImages _ x = x
imageToPandoc :: FilePath -> Pandoc
-imageToPandoc s = B.doc . B.para $ B.image s "" mempty
+imageToPandoc s = B.doc . B.para $ B.image (T.pack s) "" mempty
imageMimes :: [MimeType]
imageMimes = ["image/gif", "image/jpeg", "image/png"]
@@ -144,7 +147,7 @@ parseManifest content coverId = do
uid <- findAttrE (emptyName "id") e
href <- findAttrE (emptyName "href") e
mime <- findAttrE (emptyName "media-type") e
- return (uid, (href, mime))
+ return (uid, (href, T.pack mime))
parseSpine :: PandocMonad m => Items -> Element -> m [(FilePath, MimeType)]
parseSpine is e = do
@@ -172,11 +175,11 @@ parseMeta content = do
-- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem
parseMetaItem :: Element -> Meta -> Meta
parseMetaItem e@(stripNamespace . elName -> field) meta =
- addMetaField (renameMeta field) (B.str $ strContent e) meta
+ addMetaField (renameMeta field) (B.str $ T.pack $ strContent e) meta
-renameMeta :: String -> String
+renameMeta :: String -> T.Text
renameMeta "creator" = "author"
-renameMeta s = s
+renameMeta s = T.pack s
getManifest :: PandocMonad m => Archive -> m (String, Element)
getManifest archive = do
@@ -197,26 +200,26 @@ getManifest archive = do
fixInternalReferences :: FilePath -> Pandoc -> Pandoc
fixInternalReferences pathToFile =
walk (renameImages root)
- . walk (fixBlockIRs filename)
+ . walk (fixBlockIRs filename)
. walk (fixInlineIRs filename)
where
- (root, escapeURI -> filename) = splitFileName pathToFile
+ (root, T.unpack . escapeURI . T.pack -> filename) = splitFileName pathToFile
fixInlineIRs :: String -> Inline -> Inline
fixInlineIRs s (Span as v) =
Span (fixAttrs s as) v
fixInlineIRs s (Code as code) =
Code (fixAttrs s as) code
-fixInlineIRs s (Link as is ('#':url, tit)) =
+fixInlineIRs s (Link as is (T.uncons -> Just ('#', url), tit)) =
Link (fixAttrs s as) is (addHash s url, tit)
fixInlineIRs s (Link as is t) =
Link (fixAttrs s as) is t
fixInlineIRs _ v = v
-prependHash :: [String] -> Inline -> Inline
+prependHash :: [T.Text] -> Inline -> Inline
prependHash ps l@(Link attr is (url, tit))
- | or [s `isPrefixOf` url | s <- ps] =
- Link attr is ('#':url, tit)
+ | or [s `T.isPrefixOf` url | s <- ps] =
+ Link attr is ("#" <> url, tit)
| otherwise = l
prependHash _ i = i
@@ -230,17 +233,17 @@ fixBlockIRs s (CodeBlock as code) =
fixBlockIRs _ b = b
fixAttrs :: FilePath -> B.Attr -> B.Attr
-fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . null) cs, removeEPUBAttrs kvs)
+fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . T.null) cs, removeEPUBAttrs kvs)
-addHash :: String -> String -> String
+addHash :: String -> T.Text -> T.Text
addHash _ "" = ""
-addHash s ident = takeFileName s ++ "#" ++ ident
+addHash s ident = T.pack (takeFileName s) <> "#" <> ident
-removeEPUBAttrs :: [(String, String)] -> [(String, String)]
+removeEPUBAttrs :: [(T.Text, T.Text)] -> [(T.Text, T.Text)]
removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs
-isEPUBAttr :: (String, String) -> Bool
-isEPUBAttr (k, _) = "epub:" `isPrefixOf` k
+isEPUBAttr :: (T.Text, a) -> Bool
+isEPUBAttr (k, _) = "epub:" `T.isPrefixOf` k
-- Library
@@ -291,4 +294,4 @@ findElementE :: PandocMonad m => QName -> Element -> m Element
findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x
mkE :: PandocMonad m => String -> Maybe a -> m a
-mkE s = maybe (throwError . PandocParseError $ s) return
+mkE s = maybe (throwError . PandocParseError $ T.pack $ s) return
diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs
index 0b25b9fed..6eed3c104 100644
--- a/src/Text/Pandoc/Readers/FB2.hs
+++ b/src/Text/Pandoc/Readers/FB2.hs
@@ -1,5 +1,6 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.FB2
Copyright : Copyright (C) 2018-2019 Alexander Krotov
@@ -27,12 +28,11 @@ import Control.Monad.Except (throwError)
import Control.Monad.State.Strict
import Data.ByteString.Lazy.Char8 ( pack )
import Data.ByteString.Base64.Lazy
-import Data.Char (isSpace, toUpper)
import Data.Functor
-import Data.List (dropWhileEnd, intersperse)
-import Data.List.Split (splitOn)
+import Data.List (intersperse)
import qualified Data.Map as M
import Data.Text (Text)
+import qualified Data.Text as T
import Data.Default
import Data.Maybe
import Text.HTML.TagSoup.Entity (lookupEntity)
@@ -48,8 +48,8 @@ type FB2 m = StateT FB2State m
data FB2State = FB2State{ fb2SectionLevel :: Int
, fb2Meta :: Meta
- , fb2Authors :: [String]
- , fb2Notes :: M.Map String Blocks
+ , fb2Authors :: [Text]
+ , fb2Notes :: M.Map Text Blocks
} deriving Show
instance Default FB2State where
@@ -76,19 +76,20 @@ readFB2 _ inp =
-- * Utility functions
-trim :: String -> String
-trim = dropWhileEnd isSpace . dropWhile isSpace
+trim :: Text -> Text
+trim = T.strip
-removeHash :: String -> String
-removeHash ('#':xs) = xs
-removeHash xs = xs
+removeHash :: Text -> Text
+removeHash t = case T.uncons t of
+ Just ('#', xs) -> xs
+ _ -> t
-convertEntity :: String -> String
-convertEntity e = fromMaybe (map toUpper e) (lookupEntity e)
+convertEntity :: String -> Text
+convertEntity e = maybe (T.toUpper $ T.pack e) T.pack $ lookupEntity e
parseInline :: PandocMonad m => Content -> FB2 m Inlines
parseInline (Elem e) =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"strong" -> strong <$> parseStyleType e
"emphasis" -> emph <$> parseStyleType e
"style" -> parseNamedStyle e
@@ -96,12 +97,12 @@ parseInline (Elem e) =
"strikethrough" -> strikeout <$> parseStyleType e
"sub" -> subscript <$> parseStyleType e
"sup" -> superscript <$> parseStyleType e
- "code" -> pure $ code $ strContent e
+ "code" -> pure $ code $ T.pack $ strContent e
"image" -> parseInlineImageElement e
name -> do
report $ IgnoredElement name
pure mempty
-parseInline (Text x) = pure $ text $ cdData x
+parseInline (Text x) = pure $ text $ T.pack $ cdData x
parseInline (CRef r) = pure $ str $ convertEntity r
parseSubtitle :: PandocMonad m => Element -> FB2 m Blocks
@@ -111,7 +112,7 @@ parseSubtitle e = headerWith ("", ["unnumbered"], []) <$> gets fb2SectionLevel <
parseRootElement :: PandocMonad m => Element -> FB2 m Blocks
parseRootElement e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"FictionBook" -> do
-- Parse notes before parsing the rest of the content.
case filterChild isNotesBody e of
@@ -144,7 +145,7 @@ parseNote e =
Just sectionId -> do
content <- mconcat <$> mapM parseSectionChild (dropTitle $ elChildren e)
oldNotes <- gets fb2Notes
- modify $ \s -> s { fb2Notes = M.insert ("#" ++ sectionId) content oldNotes }
+ modify $ \s -> s { fb2Notes = M.insert ("#" <> T.pack sectionId) content oldNotes }
pure ()
where
isTitle x = qName (elName x) == "title"
@@ -156,7 +157,7 @@ parseNote e =
-- | Parse a child of @\<FictionBook>@ element.
parseFictionBookChild :: PandocMonad m => Element -> FB2 m Blocks
parseFictionBookChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"stylesheet" -> pure mempty -- stylesheet is ignored
"description" -> mempty <$ mapM_ parseDescriptionChild (elChildren e)
"body" -> if isNotesBody e
@@ -168,7 +169,7 @@ parseFictionBookChild e =
-- | Parse a child of @\<description>@ element.
parseDescriptionChild :: PandocMonad m => Element -> FB2 m ()
parseDescriptionChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"title-info" -> mapM_ parseTitleInfoChild (elChildren e)
"src-title-info" -> pure () -- ignore
"document-info" -> pure ()
@@ -176,13 +177,13 @@ parseDescriptionChild e =
"custom-info" -> pure ()
"output" -> pure ()
name -> do
- report $ IgnoredElement $ name ++ " in description"
+ report $ IgnoredElement $ name <> " in description"
pure mempty
-- | Parse a child of @\<body>@ element.
parseBodyChild :: PandocMonad m => Element -> FB2 m Blocks
parseBodyChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"image" -> parseImageElement e
"title" -> header <$> gets fb2SectionLevel <*> parseTitleType (elContent e)
"epigraph" -> parseEpigraph e
@@ -196,25 +197,25 @@ parseBinaryElement e =
(Nothing, _) -> report $ IgnoredElement "binary without id attribute"
(Just _, Nothing) ->
report $ IgnoredElement "binary without content-type attribute"
- (Just filename, contentType) -> insertMedia filename contentType (decodeLenient (pack (strContent e)))
+ (Just filename, contentType) -> insertMedia filename (T.pack <$> contentType) (decodeLenient (pack (strContent e)))
-- * Type parsers
-- | Parse @authorType@
-parseAuthor :: PandocMonad m => Element -> FB2 m String
-parseAuthor e = unwords . catMaybes <$> mapM parseAuthorChild (elChildren e)
+parseAuthor :: PandocMonad m => Element -> FB2 m Text
+parseAuthor e = T.unwords . catMaybes <$> mapM parseAuthorChild (elChildren e)
-parseAuthorChild :: PandocMonad m => Element -> FB2 m (Maybe String)
+parseAuthorChild :: PandocMonad m => Element -> FB2 m (Maybe Text)
parseAuthorChild e =
- case qName $ elName e of
- "first-name" -> pure $ Just $ strContent e
- "middle-name" -> pure $ Just $ strContent e
- "last-name" -> pure $ Just $ strContent e
- "nickname" -> pure $ Just $ strContent e
- "home-page" -> pure $ Just $ strContent e
- "email" -> pure $ Just $ strContent e
+ case T.pack $ qName $ elName e of
+ "first-name" -> pure $ Just $ T.pack $ strContent e
+ "middle-name" -> pure $ Just $ T.pack $ strContent e
+ "last-name" -> pure $ Just $ T.pack $ strContent e
+ "nickname" -> pure $ Just $ T.pack $ strContent e
+ "home-page" -> pure $ Just $ T.pack $ strContent e
+ "email" -> pure $ Just $ T.pack $ strContent e
name -> do
- report $ IgnoredElement $ name ++ " in author"
+ report $ IgnoredElement $ name <> " in author"
pure Nothing
-- | Parse @titleType@
@@ -236,13 +237,13 @@ parseTitleContent _ = pure Nothing
parseImageElement :: PandocMonad m => Element -> FB2 m Blocks
parseImageElement e =
case href of
- Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash src) title alt
+ Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash $ T.pack src) title alt
Nothing -> do
report $ IgnoredElement " image without href"
pure mempty
- where alt = maybe mempty str $ findAttr (unqual "alt") e
- title = fromMaybe "" $ findAttr (unqual "title") e
- imgId = fromMaybe "" $ findAttr (unqual "id") e
+ where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e
+ title = maybe "" T.pack $ findAttr (unqual "title") e
+ imgId = maybe "" T.pack $ findAttr (unqual "id") e
href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
-- | Parse @pType@
@@ -256,7 +257,7 @@ parseCite e = blockQuote . mconcat <$> mapM parseCiteChild (elChildren e)
-- | Parse @citeType@ child
parseCiteChild :: PandocMonad m => Element -> FB2 m Blocks
parseCiteChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"p" -> para <$> parsePType e
"poem" -> parsePoem e
"empty-line" -> pure horizontalRule
@@ -271,13 +272,13 @@ parsePoem e = mconcat <$> mapM parsePoemChild (elChildren e)
parsePoemChild :: PandocMonad m => Element -> FB2 m Blocks
parsePoemChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"title" -> parseTitle e
"subtitle" -> parseSubtitle e
"epigraph" -> parseEpigraph e
"stanza" -> parseStanza e
"text-author" -> para <$> parsePType e
- "date" -> pure $ para $ text $ strContent e
+ "date" -> pure $ para $ text $ T.pack $ strContent e
name -> report (UnexpectedXmlElement name "poem") $> mempty
parseStanza :: PandocMonad m => Element -> FB2 m Blocks
@@ -290,7 +291,7 @@ joinLineBlocks [] = []
parseStanzaChild :: PandocMonad m => Element -> FB2 m Blocks
parseStanzaChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"title" -> parseTitle e
"subtitle" -> parseSubtitle e
"v" -> lineBlock . (:[]) <$> parsePType e
@@ -300,11 +301,11 @@ parseStanzaChild e =
parseEpigraph :: PandocMonad m => Element -> FB2 m Blocks
parseEpigraph e =
divWith (divId, ["epigraph"], []) . mconcat <$> mapM parseEpigraphChild (elChildren e)
- where divId = fromMaybe "" $ findAttr (unqual "id") e
+ where divId = maybe "" T.pack $ findAttr (unqual "id") e
parseEpigraphChild :: PandocMonad m => Element -> FB2 m Blocks
parseEpigraphChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"p" -> para <$> parsePType e
"poem" -> parsePoem e
"cite" -> parseCite e
@@ -318,7 +319,7 @@ parseAnnotation e = mconcat <$> mapM parseAnnotationChild (elChildren e)
parseAnnotationChild :: PandocMonad m => Element -> FB2 m Blocks
parseAnnotationChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"p" -> para <$> parsePType e
"poem" -> parsePoem e
"cite" -> parseCite e
@@ -332,14 +333,14 @@ parseSection :: PandocMonad m => Element -> FB2 m Blocks
parseSection e = do
n <- gets fb2SectionLevel
modify $ \st -> st{ fb2SectionLevel = n + 1 }
- let sectionId = fromMaybe "" $ findAttr (unqual "id") e
+ let sectionId = maybe "" T.pack $ findAttr (unqual "id") e
bs <- divWith (sectionId, ["section"], []) . mconcat <$> mapM parseSectionChild (elChildren e)
modify $ \st -> st{ fb2SectionLevel = n }
pure bs
parseSectionChild :: PandocMonad m => Element -> FB2 m Blocks
parseSectionChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"title" -> parseBodyChild e
"epigraph" -> parseEpigraph e
"image" -> parseImageElement e
@@ -361,16 +362,16 @@ parseStyleType e = mconcat <$> mapM parseInline (elContent e)
parseNamedStyle :: PandocMonad m => Element -> FB2 m Inlines
parseNamedStyle e = do
content <- mconcat <$> mapM parseNamedStyleChild (elContent e)
- let lang = maybeToList $ ("lang",) <$> findAttr (QName "lang" Nothing (Just "xml")) e
+ let lang = maybeToList $ ("lang",) . T.pack <$> findAttr (QName "lang" Nothing (Just "xml")) e
case findAttr (unqual "name") e of
- Just name -> pure $ spanWith ("", [name], lang) content
+ Just name -> pure $ spanWith ("", [T.pack name], lang) content
Nothing -> do
report $ IgnoredElement "link without required name"
pure mempty
parseNamedStyleChild :: PandocMonad m => Content -> FB2 m Inlines
parseNamedStyleChild (Elem e) =
- case qName (elName e) of
+ case T.pack $ qName (elName e) of
"strong" -> strong <$> parseStyleType e
"emphasis" -> emph <$> parseStyleType e
"style" -> parseNamedStyle e
@@ -378,10 +379,10 @@ parseNamedStyleChild (Elem e) =
"strikethrough" -> strikeout <$> parseStyleType e
"sub" -> subscript <$> parseStyleType e
"sup" -> superscript <$> parseStyleType e
- "code" -> pure $ code $ strContent e
+ "code" -> pure $ code $ T.pack $ strContent e
"image" -> parseInlineImageElement e
name -> do
- report $ IgnoredElement $ name ++ " in style"
+ report $ IgnoredElement $ name <> " in style"
pure mempty
parseNamedStyleChild x = parseInline x
@@ -390,7 +391,7 @@ parseLinkType :: PandocMonad m => Element -> FB2 m Inlines
parseLinkType e = do
content <- mconcat <$> mapM parseStyleLinkType (elContent e)
notes <- gets fb2Notes
- case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
+ case T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
Just href -> case findAttr (QName "type" Nothing Nothing) e of
Just "note" -> case M.lookup href notes of
Nothing -> pure $ link href "" content
@@ -417,19 +418,21 @@ parseTable _ = pure mempty -- TODO: tables are not supported yet
-- | Parse @title-infoType@
parseTitleInfoChild :: PandocMonad m => Element -> FB2 m ()
parseTitleInfoChild e =
- case qName (elName e) of
+ case T.pack $ qName (elName e) of
"genre" -> pure ()
"author" -> parseAuthor e >>= \author -> modify (\st -> st {fb2Authors = author:fb2Authors st})
- "book-title" -> modify (setMeta "title" (text $ strContent e))
+ "book-title" -> modify (setMeta "title" (text $ T.pack $ strContent e))
"annotation" -> parseAnnotation e >>= modify . setMeta "abstract"
- "keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ splitOn "," $ strContent e))
- "date" -> modify (setMeta "date" (text $ strContent e))
+ "keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ T.splitOn ","
+ $ T.pack
+ $ strContent e))
+ "date" -> modify (setMeta "date" (text $ T.pack $ strContent e))
"coverpage" -> parseCoverPage e
"lang" -> pure ()
"src-lang" -> pure ()
"translator" -> pure ()
"sequence" -> pure ()
- name -> report $ IgnoredElement $ name ++ " in title-info"
+ name -> report $ IgnoredElement $ name <> " in title-info"
parseCoverPage :: PandocMonad m => Element -> FB2 m ()
parseCoverPage e =
@@ -437,7 +440,7 @@ parseCoverPage e =
Just img -> case href of
Just src -> modify (setMeta "cover-image" (MetaString $ removeHash src))
Nothing -> pure ()
- where href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img
+ where href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img
Nothing -> pure ()
-- | Parse @inlineImageType@ element
@@ -450,5 +453,5 @@ parseInlineImageElement e =
Nothing -> do
report $ IgnoredElement "inline image without href"
pure mempty
- where alt = maybe mempty str $ findAttr (unqual "alt") e
- href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
+ where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e
+ href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index e03ac6a97..1c2892d6a 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -1,5 +1,5 @@
-{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
@@ -35,8 +35,7 @@ import Control.Monad.Reader (ReaderT, ask, asks, lift, local, runReaderT)
import Data.Char (isAlphaNum, isLetter)
import Data.Default (Default (..), def)
import Data.Foldable (for_)
-import Data.List (isPrefixOf)
-import Data.List.Split (wordsBy, splitWhen)
+import Data.List.Split (splitWhen)
import qualified Data.Map as M
import Data.Maybe (fromMaybe, isJust, isNothing)
import Data.Monoid (First (..))
@@ -62,8 +61,8 @@ import Text.Pandoc.Options (
extensionEnabled)
import Text.Pandoc.Parsing hiding ((<|>))
import Text.Pandoc.Shared (addMetaField, blocksToInlines', crFilter, escapeURI,
- extractSpaces, htmlSpanLikeElements,
- onlySimpleTableCells, safeRead, underlineSpan)
+ extractSpaces, htmlSpanLikeElements, elemText, splitTextBy,
+ onlySimpleTableCells, safeRead, underlineSpan, tshow)
import Text.Pandoc.Walk
import Text.Parsec.Error
import Text.TeXMath (readMathML, writeTeX)
@@ -93,14 +92,14 @@ readHtml opts inp = do
"source" tags
case result of
Right doc -> return doc
- Left err -> throwError $ PandocParseError $ getError err
+ Left err -> throwError $ PandocParseError $ T.pack $ getError err
replaceNotes :: PandocMonad m => [Block] -> TagParser m [Block]
replaceNotes bs = do
st <- getState
return $ walk (replaceNotes' (noteTable st)) bs
-replaceNotes' :: [(String, Blocks)] -> Inline -> Inline
+replaceNotes' :: [(Text, Blocks)] -> Inline -> Inline
replaceNotes' noteTbl (RawInline (Format "noteref") ref) =
maybe (Str "") (Note . B.toList) $ lookup ref noteTbl
replaceNotes' _ x = x
@@ -108,9 +107,9 @@ replaceNotes' _ x = x
data HTMLState =
HTMLState
{ parserState :: ParserState,
- noteTable :: [(String, Blocks)],
+ noteTable :: [(Text, Blocks)],
baseHref :: Maybe URI,
- identifiers :: Set.Set String,
+ identifiers :: Set.Set Text,
logMessages :: [LogMessage],
macros :: M.Map Text Macro
}
@@ -134,7 +133,7 @@ pHtml :: PandocMonad m => TagParser m Blocks
pHtml = try $ do
(TagOpen "html" attr) <- lookAhead pAny
for_ (lookup "lang" attr) $
- updateState . B.setMeta "lang" . B.text . T.unpack
+ updateState . B.setMeta "lang" . B.text
pInTags "html" block
pBody :: PandocMonad m => TagParser m Blocks
@@ -146,11 +145,11 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAny)
setTitle t = mempty <$ updateState (B.setMeta "title" t)
pMetaTag = do
mt <- pSatisfy (matchTagOpen "meta" [])
- let name = T.unpack $ fromAttrib "name" mt
- if null name
+ let name = fromAttrib "name" mt
+ if T.null name
then return mempty
else do
- let content = T.unpack $ fromAttrib "content" mt
+ let content = fromAttrib "content" mt
updateState $ \s ->
let ps = parserState s in
s{ parserState = ps{
@@ -187,13 +186,13 @@ block = do
, pFigure
, pRawHtmlBlock
]
- trace (take 60 $ show $ B.toList res)
+ trace (T.take 60 $ tshow $ B.toList res)
return res
-namespaces :: PandocMonad m => [(String, TagParser m Inlines)]
+namespaces :: PandocMonad m => [(Text, TagParser m Inlines)]
namespaces = [(mathMLNamespace, pMath True)]
-mathMLNamespace :: String
+mathMLNamespace :: Text
mathMLNamespace = "http://www.w3.org/1998/Math/MathML"
eSwitch :: (PandocMonad m, Monoid a)
@@ -233,7 +232,7 @@ eFootnote = try $ do
content <- pInTags tag block
addNote ident content
-addNote :: PandocMonad m => String -> Blocks -> TagParser m ()
+addNote :: PandocMonad m => Text -> Blocks -> TagParser m ()
addNote uid cont = updateState (\s -> s {noteTable = (uid, cont) : noteTable s})
eNoteref :: PandocMonad m => TagParser m Inlines
@@ -245,8 +244,8 @@ eNoteref = try $ do
-> (lookup "type" as <|> lookup "epub:type" as)
== Just "noteref"
_ -> False)
- ident <- case T.unpack <$> lookup "href" attr of
- Just ('#':rest) -> return rest
+ ident <- case lookup "href" attr >>= T.uncons of
+ Just ('#', rest) -> return rest
_ -> mzero
_ <- manyTill pAny (pSatisfy (\case
TagClose t -> t == tag
@@ -287,7 +286,7 @@ pListItem nonItem = do
maybe id addId (lookup "id" attr) <$>
pInTags "li" block <* skipMany nonItem
-parseListStyleType :: String -> ListNumberStyle
+parseListStyleType :: Text -> ListNumberStyle
parseListStyleType "lower-roman" = LowerRoman
parseListStyleType "upper-roman" = UpperRoman
parseListStyleType "lower-alpha" = LowerAlpha
@@ -295,7 +294,7 @@ parseListStyleType "upper-alpha" = UpperAlpha
parseListStyleType "decimal" = Decimal
parseListStyleType _ = DefaultStyle
-parseTypeAttr :: String -> ListNumberStyle
+parseTypeAttr :: Text -> ListNumberStyle
parseTypeAttr "i" = LowerRoman
parseTypeAttr "I" = UpperRoman
parseTypeAttr "a" = LowerAlpha
@@ -404,20 +403,19 @@ pDiv = try $ do
pRawHtmlBlock :: PandocMonad m => TagParser m Blocks
pRawHtmlBlock = do
- raw <- T.unpack <$>
- (pHtmlBlock "script" <|> pHtmlBlock "style" <|> pHtmlBlock "textarea"
- <|> pRawTag)
+ raw <- (pHtmlBlock "script" <|> pHtmlBlock "style" <|> pHtmlBlock "textarea"
+ <|> pRawTag)
exts <- getOption readerExtensions
- if extensionEnabled Ext_raw_html exts && not (null raw)
+ if extensionEnabled Ext_raw_html exts && not (T.null raw)
then return $ B.rawBlock "html" raw
else ignore raw
-ignore :: (Monoid a, PandocMonad m) => String -> TagParser m a
+ignore :: (Monoid a, PandocMonad m) => Text -> TagParser m a
ignore raw = do
pos <- getPosition
-- raw can be null for tags like <!DOCTYPE>; see paRawTag
-- in this case we don't want a warning:
- unless (null raw) $
+ unless (T.null raw) $
logMessage $ SkippedContent raw pos
return mempty
@@ -438,7 +436,7 @@ eSection = try $ do
headerLevel :: Text -> TagParser m Int
headerLevel tagtype =
- case safeRead (T.unpack (T.drop 1 tagtype)) of
+ case safeRead (T.drop 1 tagtype) of
Just level ->
-- try (do
-- guardEnabled Ext_epub_html_exts
@@ -468,7 +466,7 @@ pHeader = try $ do
level <- headerLevel tagtype
contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof)
let ident = fromMaybe "" $ lookup "id" attr
- let classes = maybe [] words $ lookup "class" attr
+ let classes = maybe [] T.words $ lookup "class" attr
let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"]
attr'' <- registerHeader (ident, classes, keyvals) contents
return $ if bodyTitle
@@ -529,14 +527,14 @@ pCol = try $ do
optional $ pSatisfy (matchTagClose "col")
skipMany pBlank
let width = case lookup "width" attribs of
- Nothing -> case lookup "style" attribs of
- Just ('w':'i':'d':'t':'h':':':xs) | '%' `elem` xs ->
- fromMaybe 0.0 $ safeRead (filter
- (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
- _ -> 0.0
- Just x | not (null x) && last x == '%' ->
- fromMaybe 0.0 $ safeRead (init x)
- _ -> 0.0
+ Nothing -> case lookup "style" attribs of
+ Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs ->
+ fromMaybe 0.0 $ safeRead (T.filter
+ (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
+ _ -> 0.0
+ Just (T.unsnoc -> Just (xs, '%')) ->
+ fromMaybe 0.0 $ safeRead xs
+ _ -> 0.0
if width > 0.0
then return $ width / 100.0
else return 0.0
@@ -562,7 +560,7 @@ pCell celltype = try $ do
let extractAlign' [] = ""
extractAlign' ("text-align":x:_) = x
extractAlign' (_:xs) = extractAlign' xs
- let extractAlign = extractAlign' . wordsBy (`elem` [' ','\t',';',':'])
+ let extractAlign = extractAlign' . splitTextBy (`elemText` " \t;:")
let align = case maybeFromAttrib "align" tag `mplus`
(extractAlign <$> maybeFromAttrib "style" tag) of
Just "left" -> AlignLeft
@@ -610,7 +608,7 @@ pFigure = try $ do
let caption = fromMaybe mempty mbcap
case B.toList <$> mbimg of
Just [Image attr _ (url, tit)] ->
- return $ B.para $ B.imageWith attr url ("fig:" ++ tit) caption
+ return $ B.para $ B.imageWith attr url ("fig:" <> tit) caption
_ -> mzero
pCodeBlock :: PandocMonad m => TagParser m Blocks
@@ -618,21 +616,21 @@ pCodeBlock = try $ do
TagOpen _ attr' <- pSatisfy (matchTagOpen "pre" [])
let attr = toStringAttr attr'
contents <- manyTill pAny (pCloses "pre" <|> eof)
- let rawText = concatMap tagToString contents
+ let rawText = T.concat $ map tagToText contents
-- drop leading newline if any
- let result' = case rawText of
- '\n':xs -> xs
- _ -> rawText
+ let result' = case T.uncons rawText of
+ Just ('\n', xs) -> xs
+ _ -> rawText
-- drop trailing newline if any
- let result = case reverse result' of
- '\n':_ -> init result'
- _ -> result'
+ let result = case T.unsnoc result' of
+ Just (result'', '\n') -> result''
+ _ -> result'
return $ B.codeBlockWith (mkAttr attr) result
-tagToString :: Tag Text -> String
-tagToString (TagText s) = T.unpack s
-tagToString (TagOpen "br" _) = "\n"
-tagToString _ = ""
+tagToText :: Tag Text -> Text
+tagToText (TagText s) = s
+tagToText (TagOpen "br" _) = "\n"
+tagToText _ = ""
inline :: PandocMonad m => TagParser m Inlines
inline = choice
@@ -667,7 +665,7 @@ pLocation = do
pSat :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text)
pSat f = do
pos <- getPosition
- token show (const pos) (\x -> if f x then Just x else Nothing)
+ token tshow (const pos) (\x -> if f x then Just x else Nothing)
pSatisfy :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy f = try $ optional pLocation >> pSat f
@@ -688,10 +686,10 @@ pQ = choice $ map try [citedQuote, normalQuote]
where citedQuote = do
tag <- pSatisfy $ tagOpenLit "q" (any ((=="cite") . fst))
- url <- canonicalizeUrl $ T.unpack $ fromAttrib "cite" tag
- let uid = fromMaybe (T.unpack $ fromAttrib "name" tag) $
+ url <- canonicalizeUrl $ fromAttrib "cite" tag
+ let uid = fromMaybe (fromAttrib "name" tag) $
maybeFromAttrib "id" tag
- let cls = words $ T.unpack $ fromAttrib "class" tag
+ let cls = T.words $ fromAttrib "class" tag
makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url)])
normalQuote = do
@@ -729,7 +727,7 @@ pSpanLike =
TagOpen _ attrs <- pSatisfy $ tagOpenLit tagName (const True)
let (ids, cs, kvs) = mkAttr . toStringAttr $ attrs
content <- mconcat <$> manyTill inline (pCloses tagName <|> eof)
- return $ B.spanWith (ids, T.unpack tagName : cs, kvs) content
+ return $ B.spanWith (ids, tagName : cs, kvs) content
pSmall :: PandocMonad m => TagParser m Inlines
pSmall = pInlinesInTags "small" (B.spanWith ("",["small"],[]))
@@ -753,19 +751,18 @@ pLineBreak = do
-- Unlike fromAttrib from tagsoup, this distinguishes
-- between a missing attribute and an attribute with empty content.
-maybeFromAttrib :: String -> Tag Text -> Maybe String
-maybeFromAttrib name (TagOpen _ attrs) =
- T.unpack <$> lookup (T.pack name) attrs
+maybeFromAttrib :: Text -> Tag Text -> Maybe Text
+maybeFromAttrib name (TagOpen _ attrs) = lookup name attrs
maybeFromAttrib _ _ = Nothing
pLink :: PandocMonad m => TagParser m Inlines
pLink = try $ do
tag <- pSatisfy $ tagOpenLit "a" (const True)
- let title = T.unpack $ fromAttrib "title" tag
+ let title = fromAttrib "title" tag
-- take id from id attribute if present, otherwise name
- let uid = fromMaybe (T.unpack $ fromAttrib "name" tag) $
+ let uid = fromMaybe (fromAttrib "name" tag) $
maybeFromAttrib "id" tag
- let cls = words $ T.unpack $ fromAttrib "class" tag
+ let cls = T.words $ fromAttrib "class" tag
lab <- mconcat <$> manyTill inline (pCloses "a")
-- check for href; if href, then a link, otherwise a span
case maybeFromAttrib "href" tag of
@@ -778,34 +775,33 @@ pLink = try $ do
pImage :: PandocMonad m => TagParser m Inlines
pImage = do
tag <- pSelfClosing (=="img") (isJust . lookup "src")
- url <- canonicalizeUrl $ T.unpack $ fromAttrib "src" tag
- let title = T.unpack $ fromAttrib "title" tag
- let alt = T.unpack $ fromAttrib "alt" tag
- let uid = T.unpack $ fromAttrib "id" tag
- let cls = words $ T.unpack $ fromAttrib "class" tag
+ url <- canonicalizeUrl $ fromAttrib "src" tag
+ let title = fromAttrib "title" tag
+ let alt = fromAttrib "alt" tag
+ let uid = fromAttrib "id" tag
+ let cls = T.words $ fromAttrib "class" tag
let getAtt k = case fromAttrib k tag of
"" -> []
- v -> [(T.unpack k, T.unpack v)]
+ v -> [(k, v)]
let kvs = concatMap getAtt ["width", "height", "sizes", "srcset"]
return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt)
-pCodeWithClass :: PandocMonad m => [(T.Text,String)] -> TagParser m Inlines
-pCodeWithClass elemToClass = try $ do
+pCodeWithClass :: PandocMonad m => [(T.Text,Text)] -> TagParser m Inlines
+pCodeWithClass elemToClass = try $ do
let tagTest = flip elem . fmap fst $ elemToClass
TagOpen open attr' <- pSatisfy $ tagOpen tagTest (const True)
result <- manyTill pAny (pCloses open)
let (ids,cs,kvs) = mkAttr . toStringAttr $ attr'
cs' = maybe cs (:cs) . lookup open $ elemToClass
return . B.codeWith (ids,cs',kvs) .
- unwords . lines . T.unpack . innerText $ result
+ T.unwords . T.lines . innerText $ result
pCode :: PandocMonad m => TagParser m Inlines
pCode = try $ do
(TagOpen open attr') <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True)
let attr = toStringAttr attr'
result <- manyTill pAny (pCloses open)
- return $ B.codeWith (mkAttr attr) $ unwords $ lines $ T.unpack $
- innerText result
+ return $ B.codeWith (mkAttr attr) $ T.unwords $ T.lines $ innerText result
pSpan :: PandocMonad m => TagParser m Inlines
pSpan = try $ do
@@ -817,7 +813,7 @@ pSpan = try $ do
where styleAttr = fromMaybe "" $ lookup "style" attr
fontVariant = fromMaybe "" $ pickStyleAttrProps ["font-variant"] styleAttr
classes = fromMaybe [] $
- words <$> lookup "class" attr
+ T.words <$> lookup "class" attr
let tag = if isSmallCaps then B.smallcaps else B.spanWith (mkAttr attr)
return $ tag contents
@@ -829,18 +825,17 @@ pRawHtmlInline = do
then pSatisfy (not . isBlockTag)
else pSatisfy isInlineTag
exts <- getOption readerExtensions
- let raw = T.unpack $ renderTags' [result]
+ let raw = renderTags' [result]
if extensionEnabled Ext_raw_html exts
then return $ B.rawInline "html" raw
else ignore raw
-mathMLToTeXMath :: String -> Either String String
+mathMLToTeXMath :: Text -> Either Text Text
mathMLToTeXMath s = writeTeX <$> readMathML s
-toStringAttr :: [(Text, Text)] -> [(String, String)]
+toStringAttr :: [(Text, Text)] -> [(Text, Text)]
toStringAttr = map go
- where go (x,y) = (T.unpack (fromMaybe x $ T.stripPrefix "data-" x),
- T.unpack y)
+ where go (x,y) = (fromMaybe x $ T.stripPrefix "data-" x, y)
pScriptMath :: PandocMonad m => TagParser m Inlines
pScriptMath = try $ do
@@ -849,8 +844,7 @@ pScriptMath = try $ do
Just x | "math/tex" `T.isPrefixOf` x
-> return $ "display" `T.isSuffixOf` x
_ -> mzero
- contents <- T.unpack . innerText <$>
- manyTill pAny (pSatisfy (matchTagClose "script"))
+ contents <- innerText <$> manyTill pAny (pSatisfy (matchTagClose "script"))
return $ (if isdisplay then B.displayMath else B.math) contents
pMath :: PandocMonad m => Bool -> TagParser m Inlines
@@ -862,11 +856,11 @@ pMath inCase = try $ do
unless inCase $
guard (maybe True (== mathMLNamespace) (lookup "xmlns" attr))
contents <- manyTill pAny (pSatisfy (matchTagClose "math"))
- case mathMLToTeXMath (T.unpack $ renderTags $
+ case mathMLToTeXMath (renderTags $
[open] <> contents <> [TagClose "math"]) of
Left _ -> return $ B.spanWith ("",["math"],attr) $ B.text $
- T.unpack $ innerText contents
- Right [] -> return mempty
+ innerText contents
+ Right "" -> return mempty
Right x -> return $ case lookup "display" attr of
Just "block" -> B.displayMath x
_ -> B.math x
@@ -925,7 +919,7 @@ pTagText = try $ do
parsed <- lift $ lift $
flip runReaderT qu $ runParserT (many pTagContents) st "text" str
case parsed of
- Left _ -> throwError $ PandocParseError $ "Could not parse `" <> T.unpack str <> "'"
+ Left _ -> throwError $ PandocParseError $ "Could not parse `" <> str <> "'"
Right result -> return $ mconcat result
pBlank :: PandocMonad m => TagParser m ()
@@ -954,11 +948,11 @@ pRawTeX = do
guardEnabled Ext_raw_tex
inp <- getInput
st <- getState
- res <- lift $ runParserT (withRaw rawLaTeXInline) st "chunk" (T.unpack inp)
+ res <- lift $ runParserT (withRaw rawLaTeXInline) st "chunk" inp
case res of
Left _ -> mzero
Right (contents, raw) -> do
- _ <- count (length raw) anyChar
+ _ <- count (T.length raw) anyChar
return $ B.rawInline "tex" contents
pStr :: PandocMonad m => InlinesParser m Inlines
@@ -966,7 +960,7 @@ pStr = do
result <- many1 $ satisfy $ \c ->
not (isSpace c) && not (isSpecial c) && not (isBad c)
updateLastStrPos
- return $ B.str result
+ return $ B.str $ T.pack result
isSpecial :: Char -> Bool
isSpecial '"' = True
@@ -982,7 +976,7 @@ isSpecial '\8221' = True
isSpecial _ = False
pSymbol :: PandocMonad m => InlinesParser m Inlines
-pSymbol = satisfy isSpecial >>= return . B.str . (:[])
+pSymbol = satisfy isSpecial >>= return . B.str . T.singleton
isBad :: Char -> Bool
isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML
@@ -1019,7 +1013,7 @@ pBad = do
'\158' -> '\382'
'\159' -> '\376'
_ -> '?'
- return $ B.str [c']
+ return $ B.str $ T.singleton c'
pSpace :: PandocMonad m => InlinesParser m Inlines
pSpace = many1 (satisfy isSpace) >>= \xs ->
@@ -1156,8 +1150,8 @@ _ `closes` _ = False
-- | Matches a stretch of HTML in balanced tags.
htmlInBalanced :: Monad m
- => (Tag String -> Bool)
- -> ParserT String st m String
+ => (Tag Text -> Bool)
+ -> ParserT Text st m Text
htmlInBalanced f = try $ do
lookAhead (char '<')
inp <- getInput
@@ -1174,21 +1168,21 @@ htmlInBalanced f = try $ do
(TagClose _ : TagPosition er ec : _) -> do
let ls = er - sr
let cs = ec - sc
- lscontents <- unlines <$> count ls anyLine
+ lscontents <- T.unlines <$> count ls anyLine
cscontents <- count cs anyChar
closetag <- do
x <- many (satisfy (/='>'))
char '>'
return (x <> ">")
- return (lscontents <> cscontents <> closetag)
+ return $ lscontents <> T.pack cscontents <> T.pack closetag
_ -> mzero
_ -> mzero
-htmlInBalanced' :: String
- -> [Tag String]
- -> [Tag String]
+htmlInBalanced' :: Text
+ -> [Tag Text]
+ -> [Tag Text]
htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts
- where go :: Int -> [Tag String] -> Maybe [Tag String]
+ where go :: Int -> [Tag Text] -> Maybe [Tag Text]
go n (t@(TagOpen tn' _):rest) | tn' == tagname =
(t :) <$> go (n + 1) rest
go 1 (t@(TagClose tn'):_) | tn' == tagname =
@@ -1204,8 +1198,8 @@ hasTagWarning _ = False
-- | Matches a tag meeting a certain condition.
htmlTag :: (HasReaderOptions st, Monad m)
- => (Tag String -> Bool)
- -> ParserT [Char] st m (Tag String, String)
+ => (Tag Text -> Bool)
+ -> ParserT Text st m (Tag Text, Text)
htmlTag f = try $ do
lookAhead (char '<')
startpos <- getPosition
@@ -1213,7 +1207,7 @@ htmlTag f = try $ do
let ts = canonicalizeTags $ parseTagsOptions
parseOptions{ optTagWarning = False
, optTagPosition = True }
- (inp ++ " ") -- add space to ensure that
+ (inp <> " ") -- add space to ensure that
-- we get a TagPosition after the tag
(next, ln, col) <- case ts of
(TagPosition{} : next : TagPosition ln col : _)
@@ -1225,13 +1219,12 @@ htmlTag f = try $ do
-- so we exclude . even though it's a valid character
-- in XML element names
let isNameChar c = isAlphaNum c || c == ':' || c == '-' || c == '_'
- let isName s = case s of
- [] -> False
- (c:cs) -> isLetter c && all isNameChar cs
- let isPI s = case s of
- ('?':_) -> True -- processing instruction
- _ -> False
-
+ let isName s = case T.uncons s of
+ Nothing -> False
+ Just (c, cs) -> isLetter c && T.all isNameChar cs
+ let isPI s = case T.uncons s of
+ Just ('?', _) -> True -- processing instruction
+ _ -> False
let endpos = if ln == 1
then setSourceColumn startpos
(sourceColumn startpos + (col - 1))
@@ -1247,18 +1240,18 @@ htmlTag f = try $ do
-- basic sanity check, since the parser is very forgiving
-- and finds tags in stuff like x<y)
guard $ isName tagname || isPI tagname
- guard $ not $ null tagname
+ guard $ not $ T.null tagname
-- <https://example.org> should NOT be a tag either.
-- tagsoup will parse it as TagOpen "https:" [("example.org","")]
- guard $ last tagname /= ':'
+ guard $ T.last tagname /= ':'
char '<'
rendered <- manyTill anyChar endAngle
- return (next, "<" ++ rendered ++ ">")
+ return (next, T.pack $ "<" ++ rendered ++ ">")
case next of
TagComment s
- | "<!--" `isPrefixOf` inp -> do
+ | "<!--" `T.isPrefixOf` inp -> do
string "<!--"
- count (length s) anyChar
+ count (T.length s) anyChar
string "-->"
stripComments <- getOption readerStripComments
if stripComments
@@ -1272,12 +1265,12 @@ htmlTag f = try $ do
handleTag tagname
_ -> mzero
-mkAttr :: [(String, String)] -> Attr
+mkAttr :: [(Text, Text)] -> Attr
mkAttr attr = (attribsId, attribsClasses, attribsKV)
where attribsId = fromMaybe "" $ lookup "id" attr
- attribsClasses = words (fromMaybe "" $ lookup "class" attr) <> epubTypes
+ attribsClasses = T.words (fromMaybe "" $ lookup "class" attr) <> epubTypes
attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
- epubTypes = words $ fromMaybe "" $ lookup "epub:type" attr
+ epubTypes = T.words $ fromMaybe "" $ lookup "epub:type" attr
-- Strip namespace prefixes
stripPrefixes :: [Tag Text] -> [Tag Text]
@@ -1304,11 +1297,11 @@ isSpace _ = False
-- Utilities
-- | Adjusts a url according to the document's base URL.
-canonicalizeUrl :: PandocMonad m => String -> TagParser m String
+canonicalizeUrl :: PandocMonad m => Text -> TagParser m Text
canonicalizeUrl url = do
mbBaseHref <- baseHref <$> getState
- return $ case (parseURIReference url, mbBaseHref) of
- (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs)
+ return $ case (parseURIReference (T.unpack url), mbBaseHref) of
+ (Just rel, Just bs) -> tshow (rel `nonStrictRelativeTo` bs)
_ -> url
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 0a048b6e6..3fc2f9715 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -1,5 +1,6 @@
{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE CPP #-}
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Haddock
Copyright : Copyright (C) 2013 David Lazar
@@ -17,9 +18,10 @@ module Text.Pandoc.Readers.Haddock
import Prelude
import Control.Monad.Except (throwError)
-import Data.List (intersperse, stripPrefix)
+import Data.List (intersperse)
import Data.Maybe (fromMaybe)
import Data.Text (Text, unpack)
+import qualified Data.Text as T
import Documentation.Haddock.Parser
import Documentation.Haddock.Types as H
import Text.Pandoc.Builder (Blocks, Inlines)
@@ -28,7 +30,7 @@ import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Error
import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter, splitBy, trim)
+import Text.Pandoc.Shared (crFilter, splitTextBy, trim)
-- | Parse Haddock markup and return a 'Pandoc' document.
@@ -51,7 +53,7 @@ docHToBlocks d' =
case d' of
DocEmpty -> mempty
DocAppend (DocParagraph (DocHeader h)) (DocParagraph (DocAName ident)) ->
- B.headerWith (ident,[],[]) (headerLevel h)
+ B.headerWith (T.pack ident,[],[]) (headerLevel h)
(docHToInlines False $ headerTitle h)
DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2)
DocString _ -> inlineFallback
@@ -73,12 +75,12 @@ docHToBlocks d' =
DocDefList items -> B.definitionList (map (\(d,t) ->
(docHToInlines False d,
[consolidatePlains $ docHToBlocks t])) items)
- DocCodeBlock (DocString s) -> B.codeBlockWith ("",[],[]) s
+ DocCodeBlock (DocString s) -> B.codeBlockWith ("",[],[]) $ T.pack s
DocCodeBlock d -> B.para $ docHToInlines True d
DocHyperlink _ -> inlineFallback
DocPic _ -> inlineFallback
DocAName _ -> inlineFallback
- DocProperty s -> B.codeBlockWith ("",["property","haskell"],[]) (trim s)
+ DocProperty s -> B.codeBlockWith ("",["property","haskell"],[]) (trim $ T.pack s)
DocExamples es -> mconcat $ map (\e ->
makeExample ">>>" (exampleExpression e) (exampleResult e)) es
DocTable H.Table{ tableHeaderRows = headerRows
@@ -114,58 +116,58 @@ docHToInlines isCode d' =
(docHToInlines isCode d2)
DocString s
| isCode -> mconcat $ intersperse B.linebreak
- $ map B.code $ splitBy (=='\n') s
- | otherwise -> B.text s
+ $ map B.code $ splitTextBy (=='\n') $ T.pack s
+ | otherwise -> B.text $ T.pack s
DocParagraph _ -> mempty
DocIdentifier ident ->
case toRegular (DocIdentifier ident) of
- DocIdentifier s -> B.codeWith ("",["haskell","identifier"],[]) s
+ DocIdentifier s -> B.codeWith ("",["haskell","identifier"],[]) $ T.pack s
_ -> mempty
- DocIdentifierUnchecked s -> B.codeWith ("",["haskell","identifier"],[]) s
- DocModule s -> B.codeWith ("",["haskell","module"],[]) s
+ DocIdentifierUnchecked s -> B.codeWith ("",["haskell","identifier"],[]) $ T.pack s
+ DocModule s -> B.codeWith ("",["haskell","module"],[]) $ T.pack s
DocWarning _ -> mempty -- TODO
DocEmphasis d -> B.emph (docHToInlines isCode d)
- DocMonospaced (DocString s) -> B.code s
+ DocMonospaced (DocString s) -> B.code $ T.pack s
DocMonospaced d -> docHToInlines True d
DocBold d -> B.strong (docHToInlines isCode d)
- DocMathInline s -> B.math s
- DocMathDisplay s -> B.displayMath s
+ DocMathInline s -> B.math $ T.pack s
+ DocMathDisplay s -> B.displayMath $ T.pack s
DocHeader _ -> mempty
DocUnorderedList _ -> mempty
DocOrderedList _ -> mempty
DocDefList _ -> mempty
DocCodeBlock _ -> mempty
- DocHyperlink h -> B.link (hyperlinkUrl h) (hyperlinkUrl h)
- (maybe (B.text $ hyperlinkUrl h) (docHToInlines isCode)
+ DocHyperlink h -> B.link (T.pack $ hyperlinkUrl h) (T.pack $ hyperlinkUrl h)
+ (maybe (B.text $ T.pack $ hyperlinkUrl h) (docHToInlines isCode)
(hyperlinkLabel h))
- DocPic p -> B.image (pictureUri p) (fromMaybe (pictureUri p) $ pictureTitle p)
- (maybe mempty B.text $ pictureTitle p)
- DocAName s -> B.spanWith (s,["anchor"],[]) mempty
+ DocPic p -> B.image (T.pack $ pictureUri p) (T.pack $ fromMaybe (pictureUri p) $ pictureTitle p)
+ (maybe mempty (B.text . T.pack) $ pictureTitle p)
+ DocAName s -> B.spanWith (T.pack s,["anchor"],[]) mempty
DocProperty _ -> mempty
DocExamples _ -> mempty
DocTable _ -> mempty
-- | Create an 'Example', stripping superfluous characters as appropriate
-makeExample :: String -> String -> [String] -> Blocks
+makeExample :: T.Text -> String -> [String] -> Blocks
makeExample prompt expression result =
B.para $ B.codeWith ("",["prompt"],[]) prompt
<> B.space
- <> B.codeWith ([], ["haskell","expr"], []) (trim expression)
+ <> B.codeWith ("", ["haskell","expr"], []) (trim $ T.pack expression)
<> B.linebreak
<> mconcat (intersperse B.linebreak $ map coder result')
where
-- 1. drop trailing whitespace from the prompt, remember the prefix
- prefix = takeWhile (`elem` " \t") prompt
+ prefix = T.takeWhile (`elem` (" \t" :: String)) prompt
-- 2. drop, if possible, the exact same sequence of whitespace
-- characters from each result line
--
-- 3. interpret lines that only contain the string "<BLANKLINE>" as an
-- empty line
- result' = map (substituteBlankLine . tryStripPrefix prefix) result
+ result' = map (substituteBlankLine . tryStripPrefix prefix . T.pack) result
where
- tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys
+ tryStripPrefix xs ys = fromMaybe ys $ T.stripPrefix xs ys
substituteBlankLine "<BLANKLINE>" = ""
substituteBlankLine line = line
- coder = B.codeWith ([], ["result"], [])
+ coder = B.codeWith ("", ["result"], [])
diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs
index dbca5a59f..8efc230cc 100644
--- a/src/Text/Pandoc/Readers/Ipynb.hs
+++ b/src/Text/Pandoc/Readers/Ipynb.hs
@@ -4,6 +4,7 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Ipynb
Copyright : Copyright (C) 2019 John MacFarlane
@@ -19,7 +20,6 @@ module Text.Pandoc.Readers.Ipynb ( readIpynb )
where
import Prelude
import Data.Char (isDigit)
-import Data.List (isPrefixOf)
import Data.Maybe (fromMaybe)
import Data.Digest.Pure.SHA (sha1, showDigest)
import Text.Pandoc.Options
@@ -30,6 +30,7 @@ import Text.Pandoc.Definition
import Data.Ipynb as Ipynb
import Text.Pandoc.Class
import Text.Pandoc.MIME (extensionFromMimeType)
+import Text.Pandoc.Shared (tshow)
import Text.Pandoc.UTF8
import Text.Pandoc.Walk (walk)
import Text.Pandoc.Error
@@ -51,15 +52,15 @@ readIpynb opts t = do
Left _ ->
case eitherDecode src of
Right (notebook3 :: Notebook NbV3) -> notebookToPandoc opts notebook3
- Left err -> throwError $ PandocIpynbDecodingError err
+ Left err -> throwError $ PandocIpynbDecodingError $ T.pack err
notebookToPandoc :: PandocMonad m
=> ReaderOptions -> Notebook a -> m Pandoc
notebookToPandoc opts notebook = do
let cells = notebookCells notebook
let (fmt,fmtminor) = notebookFormat notebook
- let m = M.insert "nbformat" (MetaString $ show fmt) $
- M.insert "nbformat_minor" (MetaString $ show fmtminor) $
+ let m = M.insert "nbformat" (MetaString $ tshow fmt) $
+ M.insert "nbformat_minor" (MetaString $ tshow fmtminor) $
jsonMetaToMeta (notebookMetadata notebook)
let lang = case M.lookup "kernelspec" m of
Just (MetaMap ks) ->
@@ -72,7 +73,7 @@ notebookToPandoc opts notebook = do
return $ Pandoc (Meta $ M.insert "jupyter" (MetaMap m) mempty) blocks
cellToBlocks :: PandocMonad m
- => ReaderOptions -> String -> Cell a -> m B.Blocks
+ => ReaderOptions -> Text -> Cell a -> m B.Blocks
cellToBlocks opts lang c = do
let Source ts = cellSource c
let source = mconcat ts
@@ -100,19 +101,18 @@ cellToBlocks opts lang c = do
"text/markdown" -> "markdown"
"text/x-rsrt" -> "rst"
_ -> format
- return $ B.divWith ("",["cell","raw"],kvs) $ B.rawBlock format'
- $ T.unpack source
+ return $ B.divWith ("",["cell","raw"],kvs) $ B.rawBlock format' source
Ipynb.Code{ codeOutputs = outputs, codeExecutionCount = ec } -> do
outputBlocks <- mconcat <$> mapM outputToBlock outputs
- let kvs' = maybe kvs (\x -> ("execution_count", show x):kvs) ec
+ let kvs' = maybe kvs (\x -> ("execution_count", tshow x):kvs) ec
return $ B.divWith ("",["cell","code"],kvs') $
- B.codeBlockWith ("",[lang],[]) (T.unpack source)
+ B.codeBlockWith ("",[lang],[]) source
<> outputBlocks
-- Remove attachment: prefix from images...
fixImage :: Inline -> Inline
fixImage (Image attr lab (src,tit))
- | "attachment:" `isPrefixOf` src = Image attr lab (drop 11 src, tit)
+ | "attachment:" `T.isPrefixOf` src = Image attr lab (T.drop 11 src, tit)
fixImage x = x
addAttachment :: PandocMonad m => (Text, MimeBundle) -> m ()
@@ -120,19 +120,19 @@ addAttachment (fname, mimeBundle) = do
let fp = T.unpack fname
case M.toList (unMimeBundle mimeBundle) of
(mimeType, BinaryData bs):_ ->
- insertMedia fp (Just $ T.unpack mimeType) (BL.fromStrict bs)
+ insertMedia fp (Just mimeType) (BL.fromStrict bs)
(mimeType, TextualData t):_ ->
- insertMedia fp (Just $ T.unpack mimeType)
+ insertMedia fp (Just mimeType)
(BL.fromStrict $ TE.encodeUtf8 t)
(mimeType, JsonData v):_ ->
- insertMedia fp (Just $ T.unpack mimeType) (encode v)
- [] -> report $ CouldNotFetchResource fp "no attachment"
+ insertMedia fp (Just mimeType) (encode v)
+ [] -> report $ CouldNotFetchResource fname "no attachment"
outputToBlock :: PandocMonad m => Output a -> m B.Blocks
outputToBlock Stream{ streamName = sName,
streamText = Source text } = do
- return $ B.divWith ("",["output","stream",T.unpack sName],[])
- $ B.codeBlock $ T.unpack . mconcat $ text
+ return $ B.divWith ("",["output","stream",sName],[])
+ $ B.codeBlock $ T.concat $ text
outputToBlock DisplayData{ displayData = data',
displayMetadata = metadata' } =
B.divWith ("",["output", "display_data"],[]) <$>
@@ -140,15 +140,15 @@ outputToBlock DisplayData{ displayData = data',
outputToBlock ExecuteResult{ executeCount = ec,
executeData = data',
executeMetadata = metadata' } =
- B.divWith ("",["output", "execute_result"],[("execution_count",show ec)])
+ B.divWith ("",["output", "execute_result"],[("execution_count",tshow ec)])
<$> handleData metadata' data'
outputToBlock Err{ errName = ename,
errValue = evalue,
errTraceback = traceback } = do
return $ B.divWith ("",["output","error"],
- [("ename",T.unpack ename),
- ("evalue",T.unpack evalue)])
- $ B.codeBlock $ T.unpack . T.unlines $ traceback
+ [("ename",ename),
+ ("evalue",evalue)])
+ $ B.codeBlock $ T.unlines $ traceback
-- We want to display the richest output possible given
-- the output format.
@@ -174,54 +174,53 @@ handleData metadata (MimeBundle mb) =
let metaPairs = jsonMetaToPairs meta
let bl = BL.fromStrict bs
-- SHA1 hash for filename
- let mt' = T.unpack mt
- let fname = showDigest (sha1 bl) ++
- case extensionFromMimeType mt' of
+ let fname = T.pack (showDigest (sha1 bl)) <>
+ case extensionFromMimeType mt of
Nothing -> ""
- Just ext -> '.':ext
- insertMedia fname (Just mt') bl
+ Just ext -> "." <> ext
+ insertMedia (T.unpack fname) (Just mt) bl
return $ B.para $ B.imageWith ("",[],metaPairs) fname "" mempty
| otherwise = return mempty
dataBlock ("text/html", TextualData t)
- = return $ B.rawBlock "html" $ T.unpack t
+ = return $ B.rawBlock "html" $ t
dataBlock ("text/latex", TextualData t)
- = return $ B.rawBlock "latex" $ T.unpack t
+ = return $ B.rawBlock "latex" $ t
dataBlock ("text/plain", TextualData t) =
- return $ B.codeBlock $ T.unpack t
+ return $ B.codeBlock $ t
dataBlock (_, JsonData v) =
- return $ B.codeBlockWith ("",["json"],[]) $ toStringLazy $ encode v
+ return $ B.codeBlockWith ("",["json"],[]) $ T.pack $ toStringLazy $ encode v
dataBlock _ = return mempty
-jsonMetaToMeta :: JSONMeta -> M.Map String MetaValue
-jsonMetaToMeta = M.mapKeys T.unpack . M.map valueToMetaValue
+jsonMetaToMeta :: JSONMeta -> M.Map Text MetaValue
+jsonMetaToMeta = M.map valueToMetaValue
where
valueToMetaValue :: Value -> MetaValue
valueToMetaValue x@(Object{}) =
case fromJSON x of
- Error s -> MetaString s
+ Error s -> MetaString $ T.pack s
Success jm' -> MetaMap $ jsonMetaToMeta jm'
valueToMetaValue x@(Array{}) =
case fromJSON x of
- Error s -> MetaString s
+ Error s -> MetaString $ T.pack s
Success xs -> MetaList $ map valueToMetaValue xs
valueToMetaValue (Bool b) = MetaBool b
- valueToMetaValue (String t) = MetaString (T.unpack t)
+ valueToMetaValue (String t) = MetaString t
valueToMetaValue (Number n)
- | Scientific.isInteger n = MetaString (show (floor n :: Integer))
- | otherwise = MetaString (show n)
+ | Scientific.isInteger n = MetaString (tshow (floor n :: Integer))
+ | otherwise = MetaString (tshow n)
valueToMetaValue Aeson.Null = MetaString ""
-jsonMetaToPairs :: JSONMeta -> [(String, String)]
-jsonMetaToPairs = M.toList . M.mapKeys T.unpack . M.map
+jsonMetaToPairs :: JSONMeta -> [(Text, Text)]
+jsonMetaToPairs = M.toList . M.map
(\case
String t
| not (T.all isDigit t)
, t /= "true"
, t /= "false"
- -> T.unpack t
- x -> UTF8.toStringLazy $ Aeson.encode x)
+ -> t
+ x -> T.pack $ UTF8.toStringLazy $ Aeson.encode x)
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index e074599eb..320b9c1dd 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -1,5 +1,6 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.JATS
Copyright : Copyright (C) 2017-2019 Hamish Mackenzie
@@ -76,13 +77,13 @@ convertEntity :: String -> String
convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
-- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> String
+attrValue :: String -> Element -> Text
attrValue attr =
fromMaybe "" . maybeAttrValue attr
-maybeAttrValue :: String -> Element -> Maybe String
+maybeAttrValue :: String -> Element -> Maybe Text
maybeAttrValue attr elt =
- lookupAttrBy (\x -> qName x == attr) (elAttribs elt)
+ T.pack <$> lookupAttrBy (\x -> qName x == attr) (elAttribs elt)
-- convenience function
named :: String -> Element -> Bool
@@ -90,7 +91,7 @@ named s e = qName (elName e) == s
--
-addMeta :: PandocMonad m => ToMetaValue a => String -> a -> JATS m ()
+addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> JATS m ()
addMeta field val = modify (setMeta field val)
instance HasMeta JATSState where
@@ -126,15 +127,13 @@ isBlockElement (Elem e) = qName (elName e) `S.member` blocktags
isBlockElement _ = False
-- Trim leading and trailing newline characters
-trimNl :: String -> String
-trimNl = reverse . go . reverse . go
- where go ('\n':xs) = xs
- go xs = xs
+trimNl :: Text -> Text
+trimNl = T.dropAround (== '\n')
-- function that is used by both graphic (in parseBlock)
-- and inline-graphic (in parseInline)
getGraphic :: PandocMonad m
- => Maybe (Inlines, String) -> Element -> JATS m Inlines
+ => Maybe (Inlines, Text) -> Element -> JATS m Inlines
getGraphic mbfigdata e = do
let atVal a = attrValue a e
(ident, title, caption) =
@@ -142,7 +141,7 @@ getGraphic mbfigdata e = do
Just (capt, i) -> (i, "fig:" <> atVal "title", capt)
Nothing -> (atVal "id", atVal "title",
text (atVal "alt-text"))
- attr = (ident, words $ atVal "role", [])
+ attr = (ident, T.words $ atVal "role", [])
imageUrl = atVal "href"
return $ imageWith attr imageUrl title caption
@@ -155,8 +154,8 @@ parseBlock :: PandocMonad m => Content -> JATS m Blocks
parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
parseBlock (Text (CData _ s _)) = if all isSpace s
then return mempty
- else return $ plain $ trimInlines $ text s
-parseBlock (CRef x) = return $ plain $ str $ map toUpper x
+ else return $ plain $ trimInlines $ text $ T.pack s
+parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x
parseBlock (Elem e) =
case qName (elName e) of
"p" -> parseMixed para (elContent e)
@@ -167,7 +166,7 @@ parseBlock (Elem e) =
"bullet" -> bulletList <$> listitems
listType -> do
let start = fromMaybe 1 $
- (strContent <$> (filterElement (named "list-item") e
+ (textContent <$> (filterElement (named "list-item") e
>>= filterElement (named "label")))
>>= safeRead
orderedListWith (start, parseListStyleType listType, DefaultDelim)
@@ -204,7 +203,7 @@ parseBlock (Elem e) =
"" -> []
x -> [x]
return $ codeBlockWith (attrValue "id" e, classes', [])
- $ trimNl $ strContentRecursive e
+ $ trimNl $ textContentRecursive e
parseBlockquote = do
attrib <- case filterChild (named "attribution") e of
Nothing -> return mempty
@@ -267,9 +266,9 @@ parseBlock (Elem e) =
Just "right" -> AlignRight
Just "center" -> AlignCenter
_ -> AlignDefault
- let toWidth c = case findAttr (unqual "colwidth") c of
+ let toWidth c = case findAttrText (unqual "colwidth") c of
Just w -> fromMaybe 0
- $ safeRead $ '0': filter (\x ->
+ $ safeRead $ "0" <> T.filter (\x ->
isDigit x || x == '.') w
Nothing -> 0 :: Double
let numrows = foldl' max 0 $ map length bodyrows
@@ -363,7 +362,7 @@ parseRefList e = do
return mempty
parseRef :: PandocMonad m
- => Element -> JATS m (Map.Map String MetaValue)
+ => Element -> JATS m (Map.Map Text MetaValue)
parseRef e = do
let refId = text $ attrValue "id" e
let getInlineText n = maybe (return mempty) getInlines . filterChild (named n)
@@ -396,7 +395,7 @@ parseRef e = do
family <- maybe (return mempty) getInlines
$ filterChild (named "surname") nm
return $ toMetaValue $ Map.fromList [
- ("given", given)
+ ("given" :: Text, given)
, ("family", family)
]
personGroups <- mapM (\pg ->
@@ -406,7 +405,7 @@ parseRef e = do
toMetaValue names))
personGroups'
return $ Map.fromList $
- [ ("id", toMetaValue refId)
+ [ ("id" :: Text, toMetaValue refId)
, ("type", toMetaValue refType)
, ("title", toMetaValue refTitle)
, ("container-title", toMetaValue refContainerTitle)
@@ -415,7 +414,7 @@ parseRef e = do
, ("title", toMetaValue refTitle)
, ("issued", toMetaValue
$ Map.fromList [
- ("year", refYear)
+ ("year" :: Text, refYear)
])
, ("volume", toMetaValue refVolume)
, ("page", toMetaValue refPages)
@@ -424,6 +423,15 @@ parseRef e = do
Nothing -> return $ Map.insert "id" (toMetaValue refId) mempty
-- TODO handle mixed-citation
+findAttrText :: QName -> Element -> Maybe Text
+findAttrText x = fmap T.pack . findAttr x
+
+textContent :: Element -> Text
+textContent = T.pack . strContent
+
+textContentRecursive :: Element -> Text
+textContentRecursive = T.pack . strContentRecursive
+
strContentRecursive :: Element -> String
strContentRecursive = strContent .
(\e' -> e'{ elContent = map elementToStr $ elContent e' })
@@ -433,9 +441,9 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
elementToStr x = x
parseInline :: PandocMonad m => Content -> JATS m Inlines
-parseInline (Text (CData _ s _)) = return $ text s
+parseInline (Text (CData _ s _)) = return $ text $ T.pack s
parseInline (CRef ref) =
- return $ maybe (text $ map toUpper ref) text $ lookupEntity ref
+ return $ maybe (text $ T.toUpper $ T.pack ref) text $ T.pack <$> lookupEntity ref
parseInline (Elem e) =
case qName (elName e) of
"italic" -> emph <$> innerInlines
@@ -464,7 +472,7 @@ parseInline (Elem e) =
"xref" -> do
ils <- innerInlines
let rid = attrValue "rid" e
- let rids = words rid
+ let rids = T.words rid
let refType = ("ref-type",) <$> maybeAttrValue "ref-type" e
let attr = (attrValue "id" e, [], maybeToList refType)
return $ if refType == Just ("ref-type","bibr")
@@ -477,13 +485,13 @@ parseInline (Elem e) =
, citationNoteNum = 0
, citationHash = 0}) rids)
ils
- else linkWith attr ('#' : rid) "" ils
+ else linkWith attr ("#" <> rid) "" ils
"ext-link" -> do
ils <- innerInlines
- let title = fromMaybe "" $ findAttr (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e
+ let title = fromMaybe "" $ findAttrText (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e
let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
- Just h -> h
- _ -> '#' : attrValue "rid" e
+ Just h -> T.pack h
+ _ -> "#" <> attrValue "rid" e
let ils' = if ils == mempty then str href else ils
let attr = (attrValue "id" e, [], [])
return $ linkWith attr href title ils'
@@ -491,23 +499,23 @@ parseInline (Elem e) =
"disp-formula" -> formula displayMath
"inline-formula" -> formula math
"math" | qPrefix (elName e) == Just "mml" -> return . math $ mathML e
- "tex-math" -> return . math $ strContent e
+ "tex-math" -> return . math $ textContent e
- "email" -> return $ link ("mailto:" ++ strContent e) ""
- $ str $ strContent e
- "uri" -> return $ link (strContent e) "" $ str $ strContent e
+ "email" -> return $ link ("mailto:" <> textContent e) ""
+ $ str $ textContent e
+ "uri" -> return $ link (textContent e) "" $ str $ textContent e
"fn" -> (note . mconcat) <$>
mapM parseBlock (elContent e)
_ -> innerInlines
where innerInlines = (trimInlines . mconcat) <$>
mapM parseInline (elContent e)
mathML x =
- case readMathML . showElement $ everywhere (mkT removePrefix) x of
+ case readMathML . T.pack . showElement $ everywhere (mkT removePrefix) x of
Left _ -> mempty
Right m -> writeTeX m
formula constructor = do
let whereToLook = fromMaybe e $ filterElement (named "alternatives") e
- texMaths = map strContent $
+ texMaths = map textContent $
filterChildren (named "tex-math") whereToLook
mathMLs = map mathML $
filterChildren isMathML whereToLook
@@ -520,4 +528,4 @@ parseInline (Elem e) =
let classes' = case attrValue "language" e of
"" -> []
l -> [l]
- return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e
+ return $ codeWith (attrValue "id" e,classes',[]) $ textContentRecursive e
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index be19964a4..5c9a3e69c 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1,9 +1,10 @@
-{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.LaTeX
Copyright : Copyright (C) 2006-2019 John MacFarlane
@@ -29,9 +30,9 @@ import Prelude
import Control.Applicative (many, optional, (<|>))
import Control.Monad
import Control.Monad.Except (throwError)
-import Data.Char (isDigit, isLetter, toLower, toUpper, chr)
+import Data.Char (isDigit, isLetter, toUpper, chr)
import Data.Default
-import Data.List (intercalate, isPrefixOf)
+import Data.List (intercalate)
import qualified Data.Map as M
import Data.Maybe (fromMaybe, maybeToList)
import qualified Data.Set as Set
@@ -44,7 +45,7 @@ import Text.Pandoc.Builder
import Text.Pandoc.Class (PandocMonad, PandocPure, getResourcePath, lookupEnv,
readFileFromDirs, report, setResourcePath,
setTranslations, translateTerm, trace, fileExists)
-import Text.Pandoc.Error (PandocError ( PandocParseError, PandocParsecError))
+import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError))
import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension)
import Text.Pandoc.ImageSize (numUnit, showFl)
import Text.Pandoc.Logging
@@ -77,7 +78,7 @@ readLaTeX opts ltx = do
(tokenize "source" (crFilter ltx))
case parsed of
Right result -> return result
- Left e -> throwError $ PandocParsecError (T.unpack ltx) e
+ Left e -> throwError $ PandocParsecError ltx e
parseLaTeX :: PandocMonad m => LP m Pandoc
parseLaTeX = do
@@ -99,13 +100,13 @@ parseLaTeX = do
walk (resolveRefs (sLabels st)) doc'
return $ Pandoc meta bs'
-resolveRefs :: M.Map String [Inline] -> Inline -> Inline
+resolveRefs :: M.Map Text [Inline] -> Inline -> Inline
resolveRefs labels x@(Link (ident,classes,kvs) _ _) =
case (lookup "reference-type" kvs,
lookup "reference" kvs) of
(Just "ref", Just lab) ->
case M.lookup lab labels of
- Just txt -> Link (ident,classes,kvs) txt ('#':lab, "")
+ Just txt -> Link (ident,classes,kvs) txt ("#" <> lab, "")
Nothing -> x
_ -> x
resolveRefs _ x = x
@@ -123,11 +124,11 @@ resolveRefs _ x = x
rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s)
- => ParserT String s m String
+ => ParserT Text s m Text
rawLaTeXBlock = do
lookAhead (try (char '\\' >> letter))
inp <- getInput
- let toks = tokenize "source" $ T.pack inp
+ let toks = tokenize "source" inp
snd <$> (rawLaTeXParser toks False (macroDef (const mempty)) blocks
<|> (rawLaTeXParser toks True
(do choice (map controlSeq
@@ -151,14 +152,14 @@ beginOrEndCommand = try $ do
(inlineEnvironments :: M.Map Text (LP PandocPure Inlines))
then mzero
else return $ rawBlock "latex"
- (T.unpack (txt <> untokenize rawargs))
+ (txt <> untokenize rawargs)
rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s)
- => ParserT String s m String
+ => ParserT Text s m Text
rawLaTeXInline = do
lookAhead (try (char '\\' >> letter))
inp <- getInput
- let toks = tokenize "source" $ T.pack inp
+ let toks = tokenize "source" inp
raw <- snd <$>
( rawLaTeXParser toks True
(mempty <$ (controlSeq "input" >> skipMany opt >> braced))
@@ -167,23 +168,23 @@ rawLaTeXInline = do
inlines
)
finalbraces <- mconcat <$> many (try (string "{}")) -- see #5439
- return $ raw <> finalbraces
+ return $ raw <> T.pack finalbraces
-inlineCommand :: PandocMonad m => ParserT String ParserState m Inlines
+inlineCommand :: PandocMonad m => ParserT Text ParserState m Inlines
inlineCommand = do
lookAhead (try (char '\\' >> letter))
inp <- getInput
- let toks = tokenize "source" $ T.pack inp
+ let toks = tokenize "source" inp
fst <$> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand')
inlines
-- inline elements:
word :: PandocMonad m => LP m Inlines
-word = (str . T.unpack . untoken) <$> satisfyTok isWordTok
+word = (str . untoken) <$> satisfyTok isWordTok
regularSymbol :: PandocMonad m => LP m Inlines
-regularSymbol = (str . T.unpack . untoken) <$> satisfyTok isRegularSymbol
+regularSymbol = (str . untoken) <$> satisfyTok isRegularSymbol
where isRegularSymbol (Tok _ Symbol t) = not $ T.any isSpecial t
isRegularSymbol _ = False
isSpecial c = c `Set.member` specialChars
@@ -199,14 +200,14 @@ inlineGroup = do
doLHSverb :: PandocMonad m => LP m Inlines
doLHSverb =
- (codeWith ("",["haskell"],[]) . T.unpack . untokenize)
+ (codeWith ("",["haskell"],[]) . untokenize)
<$> manyTill (satisfyTok (not . isNewlineTok)) (symbol '|')
-mkImage :: PandocMonad m => [(String, String)] -> String -> LP m Inlines
-mkImage options src = do
+mkImage :: PandocMonad m => [(Text, Text)] -> Text -> LP m Inlines
+mkImage options (T.unpack -> src) = do
let replaceTextwidth (k,v) =
case numUnit v of
- Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%")
+ Just (num, "\\textwidth") -> (k, showFl (num * 100) <> "%")
_ -> (k, v)
let kvs = map replaceTextwidth
$ filter (\(k,_) -> k `elem` ["width", "height"]) options
@@ -223,10 +224,10 @@ mkImage options src = do
then return s'
else findFile s es
src' <- case takeExtension src of
- "" | not (null defaultExt) -> return $ addExtension src defaultExt
+ "" | not (T.null defaultExt) -> return $ addExtension src $ T.unpack defaultExt
| otherwise -> findFile src exts
_ -> return src
- return $ imageWith attr src' "" alt
+ return $ imageWith attr (T.pack src') "" alt
doxspace :: PandocMonad m => LP m Inlines
doxspace =
@@ -435,7 +436,7 @@ siUnitMap = M.fromList
, ("zetta", str "Z")
]
-lit :: String -> LP m Inlines
+lit :: Text -> LP m Inlines
lit = pure . str
removeDoubleQuotes :: Text -> Text
@@ -471,7 +472,7 @@ quoted' :: PandocMonad m
-> LP m ()
-> LP m Inlines
quoted' f starter ender = do
- startchs <- (T.unpack . untokenize) <$> starter
+ startchs <- untokenize <$> starter
smart <- extensionEnabled Ext_smart <$> getOption readerExtensions
if smart
then do
@@ -487,7 +488,7 @@ quoted' f starter ender = do
enquote :: PandocMonad m => Bool -> Maybe Text -> LP m Inlines
enquote starred mblang = do
skipopts
- let lang = (T.unpack <$> mblang) >>= babelLangToBCP47
+ let lang = mblang >>= babelLangToBCP47
let langspan = case lang of
Nothing -> id
Just l -> spanWith ("",[],[("lang", renderLang l)])
@@ -503,27 +504,27 @@ blockquote citations mblang = do
cs <- cites NormalCitation False
return $ para (cite cs mempty)
else return mempty
- let lang = (T.unpack <$> mblang) >>= babelLangToBCP47
+ let lang = mblang >>= babelLangToBCP47
let langdiv = case lang of
Nothing -> id
Just l -> divWith ("",[],[("lang", renderLang l)])
bs <- grouped block
return $ blockQuote . langdiv $ (bs <> citePar)
-doAcronym :: PandocMonad m => String -> LP m Inlines
+doAcronym :: PandocMonad m => Text -> LP m Inlines
doAcronym form = do
acro <- braced
- return . mconcat $ [spanWith ("",[],[("acronym-label", toksToString acro),
- ("acronym-form", "singular+" ++ form)])
- $ str $ toksToString acro]
+ return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
+ ("acronym-form", "singular+" <> form)])
+ $ str $ untokenize acro]
-doAcronymPlural :: PandocMonad m => String -> LP m Inlines
+doAcronymPlural :: PandocMonad m => Text -> LP m Inlines
doAcronymPlural form = do
acro <- braced
plural <- lit "s"
- return . mconcat $ [spanWith ("",[],[("acronym-label", toksToString acro),
- ("acronym-form", "plural+" ++ form)]) $
- mconcat [str $ toksToString acro, plural]]
+ return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
+ ("acronym-form", "plural+" <> form)]) $
+ mconcat [str $ untokenize acro, plural]]
doverb :: PandocMonad m => LP m Inlines
doverb = do
@@ -532,7 +533,7 @@ doverb = do
Just (c, ts) | T.null ts -> return c
_ -> mzero
withVerbatimMode $
- (code . T.unpack . untokenize) <$>
+ (code . untokenize) <$>
manyTill (notFollowedBy newlineTok >> verbTok marker) (symbol marker)
verbTok :: PandocMonad m => Char -> LP m Tok
@@ -547,7 +548,7 @@ verbTok stopchar = do
: totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp
return $ Tok pos toktype t1
-listingsLanguage :: [(String, String)] -> Maybe String
+listingsLanguage :: [(Text, Text)] -> Maybe Text
listingsLanguage opts =
case lookup "language" opts of
Nothing -> Nothing
@@ -562,10 +563,10 @@ dolstinline = do
domintinline :: PandocMonad m => LP m Inlines
domintinline = do
skipopts
- cls <- toksToString <$> braced
+ cls <- untokenize <$> braced
doinlinecode [cls]
-doinlinecode :: PandocMonad m => [String] -> LP m Inlines
+doinlinecode :: PandocMonad m => [Text] -> LP m Inlines
doinlinecode classes = do
Tok _ Symbol t <- anySymbol
marker <- case T.uncons t of
@@ -573,14 +574,14 @@ doinlinecode classes = do
_ -> mzero
let stopchar = if marker == '{' then '}' else marker
withVerbatimMode $
- (codeWith ("",classes,[]) . map nlToSpace . T.unpack . untokenize) <$>
+ (codeWith ("",classes,[]) . T.map nlToSpace . untokenize) <$>
manyTill (verbTok stopchar) (symbol stopchar)
nlToSpace :: Char -> Char
nlToSpace '\n' = ' '
nlToSpace x = x
-keyval :: PandocMonad m => LP m (String, String)
+keyval :: PandocMonad m => LP m (Text, Text)
keyval = try $ do
Tok _ Word key <- satisfyTok isWordTok
optional sp
@@ -601,35 +602,34 @@ keyval = try $ do
_ -> True))))))
optional (symbol ',')
optional sp
- return (T.unpack key, T.unpack $ T.strip val)
+ return (key, T.strip val)
-keyvals :: PandocMonad m => LP m [(String, String)]
+keyvals :: PandocMonad m => LP m [(Text, Text)]
keyvals = try $ symbol '[' >> manyTill keyval (symbol ']')
accent :: PandocMonad m => Char -> Maybe Char -> LP m Inlines
accent combiningAccent fallBack = try $ do
ils <- tok
case toList ils of
- (Str (x:xs) : ys) -> return $ fromList $
+ (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $
-- try to normalize to the combined character:
- Str (T.unpack
- (Normalize.normalize Normalize.NFC
- (T.pack [x, combiningAccent])) ++ xs) : ys
- [Space] -> return $ str [fromMaybe combiningAccent fallBack]
- [] -> return $ str [fromMaybe combiningAccent fallBack]
+ Str (Normalize.normalize Normalize.NFC
+ (T.pack [x, combiningAccent]) <> xs) : ys
+ [Space] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
+ [] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
_ -> return ils
-mathDisplay :: String -> Inlines
+mathDisplay :: Text -> Inlines
mathDisplay = displayMath . trimMath
-mathInline :: String -> Inlines
+mathInline :: Text -> Inlines
mathInline = math . trimMath
dollarsMath :: PandocMonad m => LP m Inlines
dollarsMath = do
symbol '$'
display <- option False (True <$ symbol '$')
- (do contents <- try $ T.unpack . untokenize <$> pDollarsMath 0
+ (do contents <- try $ untokenize <$> pDollarsMath 0
if display
then (mathDisplay contents <$ symbol '$')
else return $ mathInline contents)
@@ -682,10 +682,10 @@ simpleCiteArgs = try $ do
}
return $ addPrefix pre $ addSuffix suf $ map conv keys
-citationLabel :: PandocMonad m => LP m String
+citationLabel :: PandocMonad m => LP m Text
citationLabel = do
optional spaces
- toksToString <$>
+ untokenize <$>
(many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar)
<* optional spaces
<* optional (symbol ',')
@@ -729,10 +729,10 @@ cites mode multi = try $ do
addMprenote _ _ = []
addMpostnote = addSuffix . mpostnote
-citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines
+citation :: PandocMonad m => Text -> CitationMode -> Bool -> LP m Inlines
citation name mode multi = do
(c,raw) <- withRaw $ cites mode multi
- return $ cite c (rawInline "latex" $ "\\" ++ name ++ toksToString raw)
+ return $ cite c (rawInline "latex" $ "\\" <> name <> untokenize raw)
handleCitationPart :: Inlines -> [Citation]
handleCitationPart ils =
@@ -756,7 +756,7 @@ complexNatbibCitation mode = try $ do
case cs of
[] -> mzero
(c:cits) -> return $ cite (c{ citationMode = mode }:cits)
- (rawInline "latex" $ "\\citetext" ++ toksToString raw)
+ (rawInline "latex" $ "\\citetext" <> untokenize raw)
inNote :: Inlines -> Inlines
inNote ils =
@@ -780,10 +780,10 @@ tok :: PandocMonad m => LP m Inlines
tok = try $ spaces >> grouped inline <|> inlineCommand' <|> singleChar'
where singleChar' = do
Tok _ _ t <- singleChar
- return (str (T.unpack t))
+ return $ str t
opt :: PandocMonad m => LP m Inlines
-opt = bracketed inline <|> (str . T.unpack <$> rawopt)
+opt = bracketed inline <|> (str <$> rawopt)
paropt :: PandocMonad m => LP m Inlines
paropt = parenWrapped inline
@@ -822,26 +822,31 @@ overlayTok =
inBrackets :: Inlines -> Inlines
inBrackets x = str "[" <> x <> str "]"
-unescapeURL :: String -> String
-unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
- where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String)
-unescapeURL (x:xs) = x:unescapeURL xs
-unescapeURL [] = ""
+unescapeURL :: Text -> Text
+unescapeURL = T.concat . go . T.splitOn "\\"
+ where
+ isEscapable c = c `elemText` "#$%&~_^\\{}"
+ go (x:xs) = x : map unescapeInterior xs
+ go [] = []
+ unescapeInterior t
+ | Just (c, _) <- T.uncons t
+ , isEscapable c = t
+ | otherwise = "\\" <> t
mathEnvWith :: PandocMonad m
=> (Inlines -> a) -> Maybe Text -> Text -> LP m a
mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name
where inner x = case innerEnv of
Nothing -> x
- Just y -> "\\begin{" ++ T.unpack y ++ "}\n" ++ x ++
- "\\end{" ++ T.unpack y ++ "}"
+ Just y -> "\\begin{" <> y <> "}\n" <> x <>
+ "\\end{" <> y <> "}"
-mathEnv :: PandocMonad m => Text -> LP m String
+mathEnv :: PandocMonad m => Text -> LP m Text
mathEnv name = do
skipopts
optional blankline
res <- manyTill anyTok (end_ name)
- return $ stripTrailingNewlines $ T.unpack $ untokenize res
+ return $ stripTrailingNewlines $ untokenize res
inlineEnvironment :: PandocMonad m => LP m Inlines
inlineEnvironment = try $ do
@@ -914,9 +919,9 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
, ("mbox", rawInlineOr "mbox" $ processHBox <$> tok)
, ("hbox", rawInlineOr "hbox" $ processHBox <$> tok)
, ("lettrine", optional opt >> extractSpaces (spanWith ("",["lettrine"],[])) <$> tok)
- , ("(", mathInline . toksToString <$> manyTill anyTok (controlSeq ")"))
- , ("[", mathDisplay . toksToString <$> manyTill anyTok (controlSeq "]"))
- , ("ensuremath", mathInline . toksToString <$> braced)
+ , ("(", mathInline . untokenize <$> manyTill anyTok (controlSeq ")"))
+ , ("[", mathDisplay . untokenize <$> manyTill anyTok (controlSeq "]"))
+ , ("ensuremath", mathInline . untokenize <$> braced)
, ("texorpdfstring", const <$> tok <*> tok)
, ("P", lit "¶")
, ("S", lit "§")
@@ -1008,16 +1013,15 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
, ("lstinline", dolstinline)
, ("mintinline", domintinline)
, ("Verb", doverb)
- , ("url", ((unescapeURL . T.unpack . untokenize) <$> bracedUrl) >>= \url ->
+ , ("url", ((unescapeURL . untokenize) <$> bracedUrl) >>= \url ->
pure (link url "" (str url)))
- , ("nolinkurl", ((unescapeURL . T.unpack . untokenize) <$> bracedUrl) >>= \url ->
+ , ("nolinkurl", ((unescapeURL . untokenize) <$> bracedUrl) >>= \url ->
pure (code url))
- , ("href", (unescapeURL . toksToString <$>
+ , ("href", (unescapeURL . untokenize <$>
bracedUrl <* optional sp) >>= \url ->
tok >>= \lab -> pure (link url "" lab))
, ("includegraphics", do options <- option [] keyvals
- src <- unescapeURL . T.unpack .
- removeDoubleQuotes . untokenize <$> braced
+ src <- unescapeURL . removeDoubleQuotes . untokenize <$> braced
mkImage options src)
, ("enquote*", enquote True Nothing)
, ("enquote", enquote False Nothing)
@@ -1172,22 +1176,21 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
ifdim :: PandocMonad m => LP m Inlines
ifdim = do
contents <- manyTill anyTok (controlSeq "fi")
- return $ rawInline "latex" $ T.unpack $
- "\\ifdim" <> untokenize contents <> "\\fi"
+ return $ rawInline "latex" $ "\\ifdim" <> untokenize contents <> "\\fi"
makeUppercase :: Inlines -> Inlines
-makeUppercase = fromList . walk (alterStr (map toUpper)) . toList
+makeUppercase = fromList . walk (alterStr T.toUpper) . toList
makeLowercase :: Inlines -> Inlines
-makeLowercase = fromList . walk (alterStr (map toLower)) . toList
+makeLowercase = fromList . walk (alterStr T.toLower) . toList
-alterStr :: (String -> String) -> Inline -> Inline
+alterStr :: (Text -> Text) -> Inline -> Inline
alterStr f (Str xs) = Str (f xs)
alterStr _ x = x
foreignlanguage :: PandocMonad m => LP m Inlines
foreignlanguage = do
- babelLang <- T.unpack . untokenize <$> braced
+ babelLang <- untokenize <$> braced
case babelLangToBCP47 babelLang of
Just lang -> spanWith ("", [], [("lang", renderLang lang)]) <$> tok
_ -> tok
@@ -1196,24 +1199,24 @@ inlineLanguageCommands :: PandocMonad m => M.Map Text (LP m Inlines)
inlineLanguageCommands = M.fromList $ mk <$> M.toList polyglossiaLangToBCP47
where
mk (polyglossia, bcp47Func) =
- ("text" <> T.pack polyglossia, inlineLanguage bcp47Func)
+ ("text" <> polyglossia, inlineLanguage bcp47Func)
-inlineLanguage :: PandocMonad m => (String -> Lang) -> LP m Inlines
+inlineLanguage :: PandocMonad m => (Text -> Lang) -> LP m Inlines
inlineLanguage bcp47Func = do
- o <- option "" $ (T.unpack . T.filter (\c -> c /= '[' && c /= ']'))
+ o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
<$> rawopt
let lang = renderLang $ bcp47Func o
extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok
hyperlink :: PandocMonad m => LP m Inlines
hyperlink = try $ do
- src <- toksToString <$> braced
+ src <- untokenize <$> braced
lab <- tok
- return $ link ('#':src) "" lab
+ return $ link ("#" <> src) "" lab
hypertargetBlock :: PandocMonad m => LP m Blocks
hypertargetBlock = try $ do
- ref <- toksToString <$> braced
+ ref <- untokenize <$> braced
bs <- grouped block
case toList bs of
[Header 1 (ident,_,_) _] | ident == ref -> return bs
@@ -1221,7 +1224,7 @@ hypertargetBlock = try $ do
hypertargetInline :: PandocMonad m => LP m Inlines
hypertargetInline = try $ do
- ref <- toksToString <$> braced
+ ref <- untokenize <$> braced
ils <- grouped inline
return $ spanWith (ref, [], []) ils
@@ -1231,7 +1234,7 @@ romanNumeralUpper =
romanNumeralLower :: (PandocMonad m) => LP m Inlines
romanNumeralLower =
- str . map toLower . toRomanNumeral <$> romanNumeralArg
+ str . T.toLower . toRomanNumeral <$> romanNumeralArg
romanNumeralArg :: (PandocMonad m) => LP m Int
romanNumeralArg = spaces *> (parser <|> inBraces)
@@ -1248,18 +1251,18 @@ romanNumeralArg = spaces *> (parser <|> inBraces)
let (digits, rest) = T.span isDigit s
unless (T.null rest) $
Prelude.fail "Non-digits in argument to \\Rn or \\RN"
- safeRead $ T.unpack digits
+ safeRead digits
newToggle :: (Monoid a, PandocMonad m) => [Tok] -> LP m a
newToggle name = do
updateState $ \st ->
- st{ sToggles = M.insert (toksToString name) False (sToggles st) }
+ st{ sToggles = M.insert (untokenize name) False (sToggles st) }
return mempty
setToggle :: (Monoid a, PandocMonad m) => Bool -> [Tok] -> LP m a
setToggle on name = do
updateState $ \st ->
- st{ sToggles = M.adjust (const on) (toksToString name) (sToggles st) }
+ st{ sToggles = M.adjust (const on) (untokenize name) (sToggles st) }
return mempty
ifToggle :: PandocMonad m => LP m ()
@@ -1271,7 +1274,7 @@ ifToggle = do
no <- braced
toggles <- sToggles <$> getState
inp <- getInput
- let name' = toksToString name
+ let name' = untokenize name
case M.lookup name' toggles of
Just True -> setInput (yes ++ inp)
Just False -> setInput (no ++ inp)
@@ -1294,11 +1297,11 @@ ifstrequal = do
else getInput >>= setInput . (ifnotequal ++)
return mempty
-coloredInline :: PandocMonad m => String -> LP m Inlines
+coloredInline :: PandocMonad m => Text -> LP m Inlines
coloredInline stylename = do
skipopts
color <- braced
- spanWith ("",[],[("style",stylename ++ ": " ++ toksToString color)]) <$> tok
+ spanWith ("",[],[("style",stylename <> ": " <> untokenize color)]) <$> tok
ttfamily :: PandocMonad m => LP m Inlines
ttfamily = (code . stringify . toList) <$> tok
@@ -1313,12 +1316,12 @@ rawInlineOr name' fallback = do
processHBox :: Inlines -> Inlines
processHBox = walk convert
where
- convert Space = Str [chr 160] -- non-breakable space
- convert SoftBreak = Str [chr 160] -- non-breakable space
+ convert Space = Str $ T.singleton $ chr 160 -- non-breakable space
+ convert SoftBreak = Str $ T.singleton $ chr 160 -- non-breakable space
convert LineBreak = Str ""
convert x = x
-getRawCommand :: PandocMonad m => Text -> Text -> LP m String
+getRawCommand :: PandocMonad m => Text -> Text -> LP m Text
getRawCommand name txt = do
(_, rawargs) <- withRaw $
case name of
@@ -1336,7 +1339,7 @@ getRawCommand name txt = do
skipopts
option "" (try dimenarg)
void $ many braced
- return $ T.unpack (txt <> untokenize rawargs)
+ return $ txt <> untokenize rawargs
isFontSizeCommand :: Text -> Bool
isFontSizeCommand "tiny" = True
@@ -1396,17 +1399,17 @@ treatAsInline = Set.fromList
dolabel :: PandocMonad m => LP m Inlines
dolabel = do
v <- braced
- let refstr = toksToString v
+ let refstr = untokenize v
return $ spanWith (refstr,[],[("label", refstr)])
- $ inBrackets $ str $ toksToString v
+ $ inBrackets $ str $ untokenize v
-doref :: PandocMonad m => String -> LP m Inlines
+doref :: PandocMonad m => Text -> LP m Inlines
doref cls = do
v <- braced
- let refstr = toksToString v
+ let refstr = untokenize v
return $ linkWith ("",[],[ ("reference-type", cls)
, ("reference", refstr)])
- ('#':refstr)
+ ("#" <> refstr)
""
(inBrackets $ str refstr)
@@ -1435,11 +1438,11 @@ inline = (mempty <$ comment)
<|> (str "\160" <$ symbol '~')
<|> dollarsMath
<|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb)
- <|> (str . (:[]) <$> primEscape)
+ <|> (str . T.singleton <$> primEscape)
<|> regularSymbol
<|> (do res <- symbolIn "#^'`\"[]&"
pos <- getPosition
- let s = T.unpack (untoken res)
+ let s = untoken res
report $ ParsingUnescaped s pos
return $ str s)
@@ -1498,7 +1501,7 @@ include name = do
-- note, we can have cc_by_4.0 for example...
_ | name == "usepackage" -> addExtension f ".sty"
| otherwise -> addExtension f ".tex"
- dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
+ dirs <- (map T.unpack . splitTextBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
mapM_ (insertIncluded dirs) (map addExt fs)
return mempty
@@ -1509,19 +1512,19 @@ insertIncluded :: PandocMonad m
insertIncluded dirs f = do
pos <- getPosition
containers <- getIncludeFiles <$> getState
- when (f `elem` containers) $
- throwError $ PandocParseError $ "Include file loop at " ++ show pos
- updateState $ addIncludeFile f
+ when (T.pack f `elem` containers) $
+ throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos
+ updateState $ addIncludeFile $ T.pack f
mbcontents <- readFileFromDirs dirs f
contents <- case mbcontents of
Just s -> return s
Nothing -> do
- report $ CouldNotLoadIncludeFile f pos
+ report $ CouldNotLoadIncludeFile (T.pack f) pos
return ""
- getInput >>= setInput . (tokenize f (T.pack contents) ++)
+ getInput >>= setInput . (tokenize f contents ++)
updateState dropLatestIncludeFile
-addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m ()
+addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m ()
addMeta field val = updateState $ \st ->
st{ sMeta = addMetaField field val $ sMeta st }
@@ -1536,10 +1539,10 @@ authors = try $ do
egroup
addMeta "author" (map trimInlines auths)
-macroDef :: (PandocMonad m, Monoid a) => (String -> a) -> LP m a
+macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a
macroDef constructor = do
(_, s) <- withRaw (commandDef <|> environmentDef)
- (constructor (T.unpack $ untokenize s) <$
+ (constructor (untokenize s) <$
guardDisabled Ext_latex_macros)
<|> return mempty
where commandDef = do
@@ -1632,7 +1635,7 @@ newcommand = do
case M.lookup name macros of
Just macro
| mtype == "newcommand" -> do
- report $ MacroAlreadyDefined (T.unpack txt) pos
+ report $ MacroAlreadyDefined txt pos
return (name, macro)
| mtype == "providecommand" -> return (name, macro)
_ -> return (name, Macro ExpandWhenUsed argspecs optarg contents)
@@ -1658,7 +1661,7 @@ newenvironment = do
case M.lookup name macros of
Just _
| mtype == "newenvironment" -> do
- report $ MacroAlreadyDefined (T.unpack name) pos
+ report $ MacroAlreadyDefined name pos
return Nothing
| mtype == "provideenvironment" -> do
return Nothing
@@ -1669,7 +1672,7 @@ newenvironment = do
bracketedNum :: PandocMonad m => LP m Int
bracketedNum = do
ds <- untokenize <$> bracketedToks
- case safeRead (T.unpack ds) of
+ case safeRead ds of
Just i -> return i
_ -> return 0
@@ -1709,7 +1712,7 @@ section (ident, classes, kvs) lvl = do
contents <- grouped inline
lab <- option ident $
try (spaces >> controlSeq "label"
- >> spaces >> toksToString <$> braced)
+ >> spaces >> untokenize <$> braced)
when (lvl == 0) $
updateState $ \st -> st{ sHasChapters = True }
unless ("unnumbered" `elem` classes) $ do
@@ -1836,9 +1839,9 @@ blockCommands = M.fromList
, ("centerline", (para . trimInlines) <$> (skipopts *> tok))
, ("caption", skipopts *> setCaption)
, ("bibliography", mempty <$ (skipopts *> braced >>=
- addMeta "bibliography" . splitBibs . toksToString))
+ addMeta "bibliography" . splitBibs . untokenize))
, ("addbibresource", mempty <$ (skipopts *> braced >>=
- addMeta "bibliography" . splitBibs . toksToString))
+ addMeta "bibliography" . splitBibs . untokenize))
, ("endinput", mempty <$ skipMany tok)
-- includes
, ("lstinputlisting", inputListing)
@@ -1941,18 +1944,18 @@ rawEnv name = do
(bs, raw) <- withRaw $ env name blocks
if parseRaw
then return $ rawBlock "latex"
- $ T.unpack $ beginCommand <> untokenize raw
+ $ beginCommand <> untokenize raw
else do
- report $ SkippedContent (T.unpack beginCommand) pos1
+ report $ SkippedContent beginCommand pos1
pos2 <- getPosition
- report $ SkippedContent ("\\end{" ++ T.unpack name ++ "}") pos2
+ report $ SkippedContent ("\\end{" <> name <> "}") pos2
return bs
rawVerbEnv :: PandocMonad m => Text -> LP m Blocks
rawVerbEnv name = do
pos <- getPosition
(_, raw) <- withRaw $ verbEnv name
- let raw' = "\\begin{" ++ T.unpack name ++ "}" ++ toksToString raw
+ let raw' = "\\begin{" <> name <> "}" <> untokenize raw
exts <- getOption readerExtensions
let parseRaw = extensionEnabled Ext_raw_tex exts
if parseRaw
@@ -1961,12 +1964,11 @@ rawVerbEnv name = do
report $ SkippedContent raw' pos
return mempty
-verbEnv :: PandocMonad m => Text -> LP m String
+verbEnv :: PandocMonad m => Text -> LP m Text
verbEnv name = withVerbatimMode $ do
optional blankline
res <- manyTill anyTok (end_ name)
- return $ T.unpack
- $ stripTrailingNewline
+ return $ stripTrailingNewline
$ untokenize
$ res
@@ -2010,11 +2012,11 @@ minted = do
mintedAttr :: PandocMonad m => LP m Attr
mintedAttr = do
options <- option [] keyvals
- lang <- toksToString <$> braced
+ lang <- untokenize <$> braced
let kvs = [ (if k == "firstnumber"
then "startFrom"
else k, v) | (k,v) <- options ]
- let classes = [ lang | not (null lang) ] ++
+ let classes = [ lang | not (T.null lang) ] ++
[ "numberLines" |
lookup "linenos" options == Just "true" ]
return ("",classes,kvs)
@@ -2023,14 +2025,14 @@ inputMinted :: PandocMonad m => LP m Blocks
inputMinted = do
pos <- getPosition
attr <- mintedAttr
- f <- filter (/='"') . toksToString <$> braced
- dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
- mbCode <- readFileFromDirs dirs f
+ f <- T.filter (/='"') . untokenize <$> braced
+ dirs <- (map T.unpack . splitTextBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
+ mbCode <- readFileFromDirs dirs (T.unpack f)
rawcode <- case mbCode of
Just s -> return s
Nothing -> do
report $ CouldNotLoadIncludeFile f pos
- return []
+ return ""
return $ B.codeBlockWith attr rawcode
letterContents :: PandocMonad m => LP m Blocks
@@ -2052,10 +2054,10 @@ figure = try $ do
addImageCaption :: PandocMonad m => Blocks -> LP m Blocks
addImageCaption = walkM go
where go (Image attr@(_, cls, kvs) alt (src,tit))
- | not ("fig:" `isPrefixOf` tit) = do
+ | not ("fig:" `T.isPrefixOf` tit) = do
(mbcapt, mblab) <- sCaption <$> getState
let (alt', tit') = case mbcapt of
- Just ils -> (toList ils, "fig:" ++ tit)
+ Just ils -> (toList ils, "fig:" <> tit)
Nothing -> (alt, tit)
attr' = case mblab of
Just lab -> (lab, cls, kvs)
@@ -2090,23 +2092,23 @@ addImageCaption = walkM go
return $ Image attr' alt' (src, tit')
go x = return x
-coloredBlock :: PandocMonad m => String -> LP m Blocks
+coloredBlock :: PandocMonad m => Text -> LP m Blocks
coloredBlock stylename = try $ do
skipopts
color <- braced
notFollowedBy (grouped inline)
- let constructor = divWith ("",[],[("style",stylename ++ ": " ++ toksToString color)])
+ let constructor = divWith ("",[],[("style",stylename <> ": " <> untokenize color)])
constructor <$> grouped block
graphicsPath :: PandocMonad m => LP m Blocks
graphicsPath = do
- ps <- map toksToString <$>
+ ps <- map (T.unpack . untokenize) <$>
(bgroup *> spaces *> manyTill (braced <* spaces) egroup)
- getResourcePath >>= setResourcePath . (++ ps)
+ getResourcePath >>= setResourcePath . (<> ps)
return mempty
-splitBibs :: String -> [Inlines]
-splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',')
+splitBibs :: Text -> [Inlines]
+splitBibs = map (str . T.pack . flip replaceExtension "bib" . T.unpack . trim) . splitTextBy (==',')
alltt :: Blocks -> Blocks
alltt = walk strToCode
@@ -2115,7 +2117,7 @@ alltt = walk strToCode
strToCode SoftBreak = LineBreak
strToCode x = x
-parseListingsOptions :: [(String, String)] -> Attr
+parseListingsOptions :: [(Text, Text)] -> Attr
parseListingsOptions options =
let kvs = [ (if k == "firstnumber"
then "startFrom"
@@ -2129,23 +2131,23 @@ inputListing :: PandocMonad m => LP m Blocks
inputListing = do
pos <- getPosition
options <- option [] keyvals
- f <- filter (/='"') . toksToString <$> braced
- dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
- mbCode <- readFileFromDirs dirs f
+ f <- T.filter (/='"') . untokenize <$> braced
+ dirs <- (map T.unpack . splitTextBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
+ mbCode <- readFileFromDirs dirs (T.unpack f)
codeLines <- case mbCode of
- Just s -> return $ lines s
+ Just s -> return $ T.lines s
Nothing -> do
report $ CouldNotLoadIncludeFile f pos
return []
let (ident,classes,kvs) = parseListingsOptions options
let classes' =
(case listingsLanguage options of
- Nothing -> (take 1 (languagesByExtension (takeExtension f)) ++)
+ Nothing -> (take 1 (languagesByExtension (T.pack $ takeExtension $ T.unpack f)) <>)
Just _ -> id) classes
let firstline = fromMaybe 1 $ lookup "firstline" options >>= safeRead
let lastline = fromMaybe (length codeLines) $
lookup "lastline" options >>= safeRead
- let codeContents = intercalate "\n" $ take (1 + lastline - firstline) $
+ let codeContents = T.intercalate "\n" $ take (1 + lastline - firstline) $
drop (firstline - 1) codeLines
return $ codeBlockWith (ident,classes',kvs) codeContents
@@ -2176,12 +2178,12 @@ orderedList' = try $ do
spaces
let markerSpec = do
symbol '['
- ts <- toksToString <$> manyTill anyTok (symbol ']')
+ ts <- untokenize <$> manyTill anyTok (symbol ']')
case runParser anyOrderedListMarker def "option" ts of
Right r -> return r
Left _ -> do
pos <- getPosition
- report $ SkippedContent ("[" ++ ts ++ "]") pos
+ report $ SkippedContent ("[" <> ts <> "]") pos
return (1, DefaultStyle, DefaultDelim)
(_, style, delim) <- option (1, DefaultStyle, DefaultDelim) markerSpec
spaces
@@ -2191,17 +2193,17 @@ orderedList' = try $ do
spaces
start <- option 1 $ try $ do pos <- getPosition
controlSeq "setcounter"
- ctr <- toksToString <$> braced
- guard $ "enum" `isPrefixOf` ctr
- guard $ all (`elem` ['i','v']) (drop 4 ctr)
+ ctr <- untokenize <$> braced
+ guard $ "enum" `T.isPrefixOf` ctr
+ guard $ T.all (`elem` ['i','v']) (T.drop 4 ctr)
optional sp
- num <- toksToString <$> braced
+ num <- untokenize <$> braced
case safeRead num of
Just i -> return (i + 1 :: Int)
Nothing -> do
report $ SkippedContent
- ("\\setcounter{" ++ ctr ++
- "}{" ++ num ++ "}") pos
+ ("\\setcounter{" <> ctr <>
+ "}{" <> num <> "}") pos
return 1
bs <- listenv "enumerate" (many item)
return $ orderedListWith (start, style, delim) bs
@@ -2235,7 +2237,7 @@ splitWordTok = do
inp <- getInput
case inp of
(Tok spos Word t : rest) ->
- setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) ++ rest
+ setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest
_ -> return ()
parseAligns :: PandocMonad m => LP m [(Alignment, Double, ([Tok], [Tok]))]
@@ -2256,7 +2258,7 @@ parseAligns = try $ do
let alignSuffix = symbol '<' >> braced
let colWidth = try $ do
symbol '{'
- ds <- trim . toksToString <$> manyTill anyTok (controlSeq "linewidth")
+ ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth")
spaces
symbol '}'
case safeRead ds of
@@ -2266,7 +2268,7 @@ parseAligns = try $ do
pref <- option [] alignPrefix
spaces
al <- alignChar
- width <- colWidth <|> option 0.0 (do s <- toksToString <$> braced
+ width <- colWidth <|> option 0.0 (do s <- untokenize <$> braced
pos <- getPosition
report $ SkippedContent s pos
return 0.0)
@@ -2276,13 +2278,13 @@ parseAligns = try $ do
let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro
symbol '*'
spaces
- ds <- trim . toksToString <$> braced
+ ds <- trim . untokenize <$> braced
spaces
spec <- braced
case safeRead ds of
Just n ->
getInput >>= setInput . (mconcat (replicate n spec) ++)
- Nothing -> Prelude.fail $ "Could not parse " ++ ds ++ " as number"
+ Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number"
bgroup
spaces
maybeBar
@@ -2379,7 +2381,7 @@ block = do
<|> blockCommand
<|> paragraph
<|> grouped block
- trace (take 60 $ show $ B.toList res)
+ trace (T.take 60 $ tshow $ B.toList res)
return res
blocks :: PandocMonad m => LP m Blocks
@@ -2387,9 +2389,9 @@ blocks = mconcat <$> many block
setDefaultLanguage :: PandocMonad m => LP m Blocks
setDefaultLanguage = do
- o <- option "" $ (T.unpack . T.filter (\c -> c /= '[' && c /= ']'))
+ o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
<$> rawopt
- polylang <- toksToString <$> braced
+ polylang <- untokenize <$> braced
case M.lookup polylang polyglossiaLangToBCP47 of
Nothing -> return mempty -- TODO mzero? warning?
Just langFunc -> do
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
index b21398f93..7ec432a4a 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.LaTeX.Lang
Copyright : Copyright (C) 2018-2019 John MacFarlane
@@ -18,11 +19,12 @@ module Text.Pandoc.Readers.LaTeX.Lang
where
import Prelude
import qualified Data.Map as M
+import qualified Data.Text as T
import Text.Pandoc.BCP47 (Lang(..))
-polyglossiaLangToBCP47 :: M.Map String (String -> Lang)
+polyglossiaLangToBCP47 :: M.Map T.Text (T.Text -> Lang)
polyglossiaLangToBCP47 = M.fromList
- [ ("arabic", \o -> case filter (/=' ') o of
+ [ ("arabic", \o -> case T.filter (/=' ') o of
"locale=algeria" -> Lang "ar" "" "DZ" []
"locale=mashriq" -> Lang "ar" "" "SY" []
"locale=libya" -> Lang "ar" "" "LY" []
@@ -30,7 +32,7 @@ polyglossiaLangToBCP47 = M.fromList
"locale=mauritania" -> Lang "ar" "" "MR" []
"locale=tunisia" -> Lang "ar" "" "TN" []
_ -> Lang "ar" "" "" [])
- , ("german", \o -> case filter (/=' ') o of
+ , ("german", \o -> case T.filter (/=' ') o of
"spelling=old" -> Lang "de" "" "DE" ["1901"]
"variant=austrian,spelling=old"
-> Lang "de" "" "AT" ["1901"]
@@ -40,11 +42,11 @@ polyglossiaLangToBCP47 = M.fromList
"variant=swiss" -> Lang "de" "" "CH" []
_ -> Lang "de" "" "" [])
, ("lsorbian", \_ -> Lang "dsb" "" "" [])
- , ("greek", \o -> case filter (/=' ') o of
+ , ("greek", \o -> case T.filter (/=' ') o of
"variant=poly" -> Lang "el" "" "polyton" []
"variant=ancient" -> Lang "grc" "" "" []
_ -> Lang "el" "" "" [])
- , ("english", \o -> case filter (/=' ') o of
+ , ("english", \o -> case T.filter (/=' ') o of
"variant=australian" -> Lang "en" "" "AU" []
"variant=canadian" -> Lang "en" "" "CA" []
"variant=british" -> Lang "en" "" "GB" []
@@ -52,7 +54,7 @@ polyglossiaLangToBCP47 = M.fromList
"variant=american" -> Lang "en" "" "US" []
_ -> Lang "en" "" "" [])
, ("usorbian", \_ -> Lang "hsb" "" "" [])
- , ("latin", \o -> case filter (/=' ') o of
+ , ("latin", \o -> case T.filter (/=' ') o of
"variant=classic" -> Lang "la" "" "" ["x-classic"]
_ -> Lang "la" "" "" [])
, ("slovenian", \_ -> Lang "sl" "" "" [])
@@ -133,7 +135,7 @@ polyglossiaLangToBCP47 = M.fromList
, ("vietnamese", \_ -> Lang "vi" "" "" [])
]
-babelLangToBCP47 :: String -> Maybe Lang
+babelLangToBCP47 :: T.Text -> Maybe Lang
babelLangToBCP47 s =
case s of
"austrian" -> Just $ Lang "de" "" "AT" ["1901"]
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 14cb408b0..a01abda46 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -97,8 +97,8 @@ import Text.Parsec.Pos
newtype DottedNum = DottedNum [Int]
deriving (Show)
-renderDottedNum :: DottedNum -> String
-renderDottedNum (DottedNum xs) =
+renderDottedNum :: DottedNum -> T.Text
+renderDottedNum (DottedNum xs) = T.pack $
intercalate "." (map show xs)
incrementDottedNum :: Int -> DottedNum -> DottedNum
@@ -111,18 +111,18 @@ data LaTeXState = LaTeXState{ sOptions :: ReaderOptions
, sMeta :: Meta
, sQuoteContext :: QuoteContext
, sMacros :: M.Map Text Macro
- , sContainers :: [String]
+ , sContainers :: [Text]
, sLogMessages :: [LogMessage]
- , sIdentifiers :: Set.Set String
+ , sIdentifiers :: Set.Set Text
, sVerbatimMode :: Bool
- , sCaption :: (Maybe Inlines, Maybe String)
+ , sCaption :: (Maybe Inlines, Maybe Text)
, sInListItem :: Bool
, sInTableCell :: Bool
, sLastHeaderNum :: DottedNum
, sLastFigureNum :: DottedNum
- , sLabels :: M.Map String [Inline]
+ , sLabels :: M.Map Text [Inline]
, sHasChapters :: Bool
- , sToggles :: M.Map String Bool
+ , sToggles :: M.Map Text Bool
, sExpanded :: Bool
}
deriving Show
@@ -202,7 +202,7 @@ withVerbatimMode parser = do
rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
=> [Tok] -> Bool -> LP m a -> LP m a
- -> ParserT String s m (a, String)
+ -> ParserT Text s m (a, Text)
rawLaTeXParser toks retokenize parser valParser = do
pstate <- getState
let lstate = def{ sOptions = extractReaderOptions pstate }
@@ -233,16 +233,16 @@ rawLaTeXParser toks retokenize parser valParser = do
, not (" " `T.isSuffixOf` result)
-> result <> " "
_ -> result
- return (val, T.unpack result')
+ return (val, result')
applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
- => String -> ParserT String s m String
+ => Text -> ParserT Text s m Text
applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
- do let retokenize = toksToString <$> many (satisfyTok (const True))
+ do let retokenize = untokenize <$> many (satisfyTok (const True))
pstate <- getState
let lstate = def{ sOptions = extractReaderOptions pstate
, sMacros = extractMacros pstate }
- res <- runParserT retokenize lstate "math" (tokenize "math" (T.pack s))
+ res <- runParserT retokenize lstate "math" (tokenize "math" s)
case res of
Left e -> Prelude.fail (show e)
Right s' -> return s'
@@ -307,7 +307,7 @@ totoks pos t =
: totoks (incSourceColumn pos 2) rest'
| c == '#' ->
let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest
- in case safeRead (T.unpack t1) of
+ in case safeRead t1 of
Just i ->
Tok pos (Arg i) ("#" <> t1)
: totoks (incSourceColumn pos (1 + T.length t1)) t2
@@ -447,7 +447,7 @@ doMacros' n inp = do
handleMacros n' spos name ts = do
when (n' > 20) -- detect macro expansion loops
- $ throwError $ PandocMacroLoop (T.unpack name)
+ $ throwError $ PandocMacroLoop name
macros <- sMacros <$> getState
case M.lookup name macros of
Nothing -> mzero
@@ -588,7 +588,7 @@ primEscape = do
| c >= '\64' && c <= '\127' -> return (chr (ord c - 64))
| otherwise -> return (chr (ord c + 64))
Nothing -> Prelude.fail "Empty content of Esc1"
- Esc2 -> case safeRead ('0':'x':T.unpack (T.drop 2 t)) of
+ Esc2 -> case safeRead ("0x" <> T.drop 2 t) of
Just x -> return (chr x)
Nothing -> Prelude.fail $ "Could not read: " ++ T.unpack t
_ -> Prelude.fail "Expected an Esc1 or Esc2 token" -- should not happen
@@ -677,7 +677,7 @@ dimenarg = try $ do
guard $ rest `elem` ["", "pt","pc","in","bp","cm","mm","dd","cc","sp"]
return $ T.pack ['=' | ch] <> minus <> s
-ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a
+ignore :: (Monoid a, PandocMonad m) => Text -> ParserT s u m a
ignore raw = do
pos <- getPosition
report $ SkippedContent raw pos
diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs
index ddf469222..feacb8450 100644
--- a/src/Text/Pandoc/Readers/Man.hs
+++ b/src/Text/Pandoc/Readers/Man.hs
@@ -1,4 +1,6 @@
-{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Man
Copyright : Copyright (C) 2018-2019 Yan Pashkovsky and John MacFarlane
@@ -63,7 +65,7 @@ readWithMTokens :: PandocMonad m
-> [RoffToken] -- ^ input
-> m (Either PandocError a)
readWithMTokens parser state input =
- let leftF = PandocParsecError . intercalate "\n" $ show <$> input
+ let leftF = PandocParsecError . T.pack . intercalate "\n" $ show <$> input
in mapLeft leftF `liftM` runParserT parser state "source" input
parseMan :: PandocMonad m => ManParser m Pandoc
@@ -141,7 +143,7 @@ parseTable = do
isHrule ([cellfmt], _) = columnType cellfmt `elem` ['_','-','=']
isHrule (_, [RoffTokens ss]) =
case Foldable.toList ss of
- [TextLine [RoffStr [c]]] -> c `elem` ['_','-','=']
+ [TextLine [RoffStr (T.unpack -> [c])]] -> c `elem` ['_','-','=']
_ -> False
isHrule _ = False
@@ -191,7 +193,7 @@ memptyLine = msatisfy isEmptyLine where
isEmptyLine EmptyLine = True
isEmptyLine _ = False
-mmacro :: PandocMonad m => String -> ManParser m RoffToken
+mmacro :: PandocMonad m => T.Text -> ManParser m RoffToken
mmacro mk = msatisfy isControlLine where
isControlLine (ControlLine mk' _ _) | mk == mk' = True
| otherwise = False
@@ -284,7 +286,7 @@ parseInline = try $ do
_ -> mzero
handleInlineMacro :: PandocMonad m
- => String -> [Arg] -> SourcePos -> ManParser m Inlines
+ => T.Text -> [Arg] -> SourcePos -> ManParser m Inlines
handleInlineMacro mname args _pos = do
case mname of
"UR" -> parseLink args
@@ -339,7 +341,7 @@ bareIP = msatisfy isBareIP where
isBareIP (ControlLine "IP" [] _) = True
isBareIP _ = False
-endmacro :: PandocMonad m => String -> ManParser m ()
+endmacro :: PandocMonad m => T.Text -> ManParser m ()
endmacro name = void (mmacro name)
<|> lookAhead (void newBlockMacro)
<|> lookAhead eof
@@ -356,7 +358,7 @@ parseCodeBlock = try $ do
toks <- (mmacro "nf" *> manyTill codeline (endmacro "fi"))
<|> (mmacro "EX" *> manyTill codeline (endmacro "EE"))
optional (mmacro "in")
- return $ codeBlock (intercalate "\n" $ catMaybes toks)
+ return $ codeBlock (T.intercalate "\n" $ catMaybes toks)
where
@@ -366,7 +368,7 @@ parseCodeBlock = try $ do
ControlLine "PP" _ _ -> return $ Just "" -- .PP sometimes used for blank line
ControlLine mname args pos -> do
(Just . query getText <$> handleInlineMacro mname args pos) <|>
- do report $ SkippedContent ('.':mname) pos
+ do report $ SkippedContent ("." <> mname) pos
return Nothing
Tbl _ _ pos -> do
report $ SkippedContent "TABLE" pos
@@ -375,12 +377,12 @@ parseCodeBlock = try $ do
TextLine ss
| not (null ss)
, all isFontToken ss -> return Nothing
- | otherwise -> return $ Just $ linePartsToString ss
+ | otherwise -> return $ Just $ linePartsToText ss
isFontToken Font{} = True
isFontToken _ = False
- getText :: Inline -> String
+ getText :: Inline -> T.Text
getText (Str s) = s
getText Space = " "
getText (Code _ s) = s
@@ -416,8 +418,8 @@ listItem mbListType = try $ do
(ControlLine _ args _) <- mmacro "IP"
case args of
(arg1 : _) -> do
- let cs = linePartsToString arg1
- let cs' = if not ('.' `elem` cs || ')' `elem` cs) then cs ++ "." else cs
+ let cs = linePartsToText arg1
+ let cs' = if not (T.any (== '.') cs || T.any (== ')') cs) then cs <> "." else cs
let lt = case Parsec.runParser anyOrderedListMarker defaultParserState
"list marker" cs' of
Right (start, listtype, listdelim)
@@ -467,7 +469,7 @@ parseLink args = do
ControlLine _ endargs _ <- mmacro "UE"
let url = case args of
[] -> ""
- (x:_) -> linePartsToString x
+ (x:_) -> linePartsToText x
return $ link url "" contents <>
case endargs of
[] -> mempty
@@ -479,7 +481,7 @@ parseEmailLink args = do
ControlLine _ endargs _ <- mmacro "ME"
let url = case args of
[] -> ""
- (x:_) -> "mailto:" ++ linePartsToString x
+ (x:_) -> "mailto:" <> linePartsToText x
return $ link url "" contents <>
case endargs of
[] -> mempty
@@ -490,6 +492,6 @@ skipUnknownMacro = do
tok <- mmacroAny
case tok of
ControlLine mkind _ pos -> do
- report $ SkippedContent ('.':mkind) pos
+ report $ SkippedContent ("." <> mkind) pos
return mempty
_ -> Prelude.fail "the impossible happened"
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 4807baada..f8349ea99 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1,7 +1,9 @@
-{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE RelaxedPolyRec #-}
{-# LANGUAGE ScopedTypeVariables #-}
-{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.Markdown
Copyright : Copyright (C) 2006-2019 John MacFarlane
@@ -19,14 +21,15 @@ import Prelude
import Control.Monad
import Control.Monad.Except (throwError)
import qualified Data.ByteString.Lazy as BS
-import Data.Char (isAlphaNum, isPunctuation, isSpace, toLower)
-import Data.List (intercalate, sortBy, transpose, elemIndex)
+import Data.Char (isAlphaNum, isPunctuation, isSpace)
+import Data.List (sortBy, transpose, elemIndex)
import qualified Data.Map as M
import Data.Maybe
import Data.Ord (comparing)
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
import qualified Data.YAML as YAML
import qualified Data.YAML.Event as YE
import System.FilePath (addExtension, takeExtension)
@@ -47,7 +50,7 @@ import Text.Pandoc.Shared
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.XML (fromEntities)
-type MarkdownParser m = ParserT [Char] ParserState m
+type MarkdownParser m = ParserT Text ParserState m
-- | Read markdown from an input string and return a Pandoc document.
readMarkdown :: PandocMonad m
@@ -56,7 +59,7 @@ readMarkdown :: PandocMonad m
-> m Pandoc
readMarkdown opts s = do
parsed <- readWithM parseMarkdown def{ stateOptions = opts }
- (T.unpack (crFilter s) ++ "\n\n")
+ (crFilter s <> "\n\n")
case parsed of
Right result -> return result
Left e -> throwError e
@@ -77,7 +80,7 @@ isHruleChar '-' = True
isHruleChar '_' = True
isHruleChar _ = False
-setextHChars :: String
+setextHChars :: [Char]
setextHChars = "=-"
isBlank :: Char -> Bool
@@ -96,30 +99,30 @@ inList = do
ctx <- stateParserContext <$> getState
guard (ctx == ListItemState)
-spnl :: PandocMonad m => ParserT [Char] st m ()
+spnl :: PandocMonad m => ParserT Text st m ()
spnl = try $ do
skipSpaces
optional newline
skipSpaces
notFollowedBy (char '\n')
-spnl' :: PandocMonad m => ParserT [Char] st m String
+spnl' :: PandocMonad m => ParserT Text st m Text
spnl' = try $ do
xs <- many spaceChar
ys <- option "" $ try $ (:) <$> newline
<*> (many spaceChar <* notFollowedBy (char '\n'))
- return (xs ++ ys)
+ return $ T.pack $ xs ++ ys
-indentSpaces :: PandocMonad m => MarkdownParser m String
+indentSpaces :: PandocMonad m => MarkdownParser m Text
indentSpaces = try $ do
tabStop <- getOption readerTabStop
- count tabStop (char ' ') <|>
- string "\t" <?> "indentation"
+ countChar tabStop (char ' ') <|>
+ textStr "\t" <?> "indentation"
-nonindentSpaces :: PandocMonad m => MarkdownParser m String
+nonindentSpaces :: PandocMonad m => MarkdownParser m Text
nonindentSpaces = do
n <- skipNonindentSpaces
- return $ replicate n ' '
+ return $ T.replicate n " "
-- returns number of spaces parsed
skipNonindentSpaces :: PandocMonad m => MarkdownParser m Int
@@ -139,8 +142,9 @@ inlinesInBalancedBrackets :: PandocMonad m => MarkdownParser m (F Inlines)
inlinesInBalancedBrackets =
try $ char '[' >> withRaw (go 1) >>=
parseFromString inlines . stripBracket . snd
- where stripBracket [] = []
- stripBracket xs = if last xs == ']' then init xs else xs
+ where stripBracket t = case T.unsnoc t of
+ Just (t', ']') -> t'
+ _ -> t
go :: PandocMonad m => Int -> MarkdownParser m ()
go 0 = return ()
go openBrackets =
@@ -160,7 +164,7 @@ inlinesInBalancedBrackets =
-- document structure
--
-rawTitleBlockLine :: PandocMonad m => MarkdownParser m String
+rawTitleBlockLine :: PandocMonad m => MarkdownParser m Text
rawTitleBlockLine = do
char '%'
skipSpaces
@@ -169,7 +173,7 @@ rawTitleBlockLine = do
notFollowedBy blankline
skipSpaces
anyLine
- return $ trim $ unlines (first:rest)
+ return $ trim $ T.unlines (first:rest)
titleLine :: PandocMonad m => MarkdownParser m (F Inlines)
titleLine = try $ do
@@ -222,9 +226,9 @@ yamlMetaBlock = try $ do
notFollowedBy blankline -- if --- is followed by a blank it's an HRULE
rawYamlLines <- manyTill anyLine stopLine
-- by including --- and ..., we allow yaml blocks with just comments:
- let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."]))
+ let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
optional blanklines
- newMetaF <- yamlBsToMeta $ UTF8.fromStringLazy rawYaml
+ newMetaF <- yamlBsToMeta $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
-- Since `<>` is left-biased, existing values are not touched:
updateState $ \st -> st{ stateMeta' = (stateMeta' st) <> newMetaF }
return mempty
@@ -255,7 +259,7 @@ yamlBsToMeta bstr = do
return . return $ mempty
Left (_pos, err') -> do
logMessage $ CouldNotParseYamlMetadata
- err' pos
+ (T.pack err') pos
return . return $ mempty
nodeToKey :: PandocMonad m => YAML.Node YE.Pos -> m Text
@@ -270,11 +274,11 @@ toMetaValue x =
-- Note: a standard quoted or unquoted YAML value will
-- not end in a newline, but a "block" set off with
-- `|` or `>` will.
- if (T.pack "\n") `T.isSuffixOf` x
- then parseFromString' (asBlocks <$> parseBlocks) (xstring <> "\n")
+ if "\n" `T.isSuffixOf` x
+ then parseFromString' (asBlocks <$> parseBlocks) (x <> "\n")
else parseFromString'
((asInlines <$> try pInlines) <|> (asBlocks <$> parseBlocks))
- xstring
+ x
where pInlines = trimInlinesF . mconcat <$> manyTill inline eof
asBlocks p = do
p' <- p
@@ -282,7 +286,6 @@ toMetaValue x =
asInlines p = do
p' <- p
return $ MetaInlines (B.toList p')
- xstring = T.unpack x
checkBoolean :: Text -> Maybe Bool
checkBoolean t =
@@ -298,8 +301,8 @@ yamlToMetaValue (YAML.Scalar _ x) =
case x of
YAML.SStr t -> toMetaValue t
YAML.SBool b -> return $ return $ MetaBool b
- YAML.SFloat d -> return $ return $ MetaString (show d)
- YAML.SInt i -> return $ return $ MetaString (show i)
+ YAML.SFloat d -> return $ return $ MetaString $ tshow d
+ YAML.SInt i -> return $ return $ MetaString $ tshow i
YAML.SUnknown _ t ->
case checkBoolean t of
Just b -> return $ return $ MetaBool b
@@ -315,7 +318,7 @@ yamlToMetaValue _ = return $ return $ MetaString ""
yamlMap :: PandocMonad m
=> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos)
- -> MarkdownParser m (F (M.Map String MetaValue))
+ -> MarkdownParser m (F (M.Map Text MetaValue))
yamlMap o = do
kvs <- forM (M.toList o) $ \(key, v) -> do
k <- nodeToKey key
@@ -323,12 +326,12 @@ yamlMap o = do
let kvs' = filter (not . ignorable . fst) kvs
(fmap M.fromList . sequence) <$> mapM toMeta kvs'
where
- ignorable t = (T.pack "_") `T.isSuffixOf` t
+ ignorable t = "_" `T.isSuffixOf` t
toMeta (k, v) = do
fv <- yamlToMetaValue v
return $ do
v' <- fv
- return (T.unpack k, v')
+ return (k, v')
stopLine :: PandocMonad m => MarkdownParser m ()
stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
@@ -343,14 +346,14 @@ mmdTitleBlock = try $ do
updateState $ \st -> st{ stateMeta' = stateMeta' st <>
return (Meta $ M.fromList kvPairs) }
-kvPair :: PandocMonad m => Bool -> MarkdownParser m (String, MetaValue)
+kvPair :: PandocMonad m => Bool -> MarkdownParser m (Text, MetaValue)
kvPair allowEmpty = try $ do
- key <- many1Till (alphaNum <|> oneOf "_- ") (char ':')
- val <- trim <$> manyTill anyChar
+ key <- many1TillChar (alphaNum <|> oneOf "_- ") (char ':')
+ val <- trim <$> manyTillChar anyChar
(try $ newline >> lookAhead (blankline <|> nonspaceChar))
- guard $ allowEmpty || not (null val)
- let key' = concat $ words $ map toLower key
- let val' = MetaBlocks $ B.toList $ B.plain $B.text val
+ guard $ allowEmpty || not (T.null val)
+ let key' = T.concat $ T.words $ T.toLower key
+ let val' = MetaBlocks $ B.toList $ B.plain $ B.text val
return (key',val')
parseMarkdown :: PandocMonad m => MarkdownParser m Pandoc
@@ -380,13 +383,13 @@ referenceKey = try $ do
(_,raw) <- reference
char ':'
skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
- let sourceURL = fmap unwords $ many $ try $ do
+ let sourceURL = fmap T.unwords $ many $ try $ do
skipMany spaceChar
notFollowedBy' referenceTitle
notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes
notFollowedBy' (() <$ reference)
- many1 $ notFollowedBy space >> litChar
- let betweenAngles = try $ char '<' >> manyTill litChar (char '>')
+ many1Char $ notFollowedBy space >> litChar
+ let betweenAngles = try $ char '<' >> manyTillChar litChar (char '>')
src <- try betweenAngles <|> sourceURL
tit <- option "" referenceTitle
attr <- option nullAttr $ try $
@@ -411,20 +414,20 @@ referenceKey = try $ do
updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys }
return $ return mempty
-referenceTitle :: PandocMonad m => MarkdownParser m String
+referenceTitle :: PandocMonad m => MarkdownParser m Text
referenceTitle = try $ do
skipSpaces >> optional newline >> skipSpaces
quotedTitle '"' <|> quotedTitle '\'' <|> charsInBalanced '(' ')' litChar
-- A link title in quotes
-quotedTitle :: PandocMonad m => Char -> MarkdownParser m String
+quotedTitle :: PandocMonad m => Char -> MarkdownParser m Text
quotedTitle c = try $ do
char c
notFollowedBy spaces
let pEnder = try $ char c >> notFollowedBy (satisfy isAlphaNum)
- let regChunk = many1 (noneOf ['\\','\n','&',c]) <|> count 1 litChar
- let nestedChunk = (\x -> [c] ++ x ++ [c]) <$> quotedTitle c
- unwords . words . concat <$> manyTill (nestedChunk <|> regChunk) pEnder
+ let regChunk = many1Char (noneOf ['\\','\n','&',c]) <|> countChar 1 litChar
+ let nestedChunk = (\x -> T.singleton c <> x <> T.singleton c) <$> quotedTitle c
+ T.unwords . T.words . T.concat <$> manyTill (nestedChunk <|> regChunk) pEnder
-- | PHP Markdown Extra style abbreviation key. Currently
-- we just skip them, since Pandoc doesn't have an element for
@@ -440,21 +443,21 @@ abbrevKey = do
blanklines
return $ return mempty
-noteMarker :: PandocMonad m => MarkdownParser m String
-noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']')
+noteMarker :: PandocMonad m => MarkdownParser m Text
+noteMarker = string "[^" >> many1TillChar (satisfy $ not . isBlank) (char ']')
-rawLine :: PandocMonad m => MarkdownParser m String
+rawLine :: PandocMonad m => MarkdownParser m Text
rawLine = try $ do
notFollowedBy blankline
notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker
optional indentSpaces
anyLine
-rawLines :: PandocMonad m => MarkdownParser m String
+rawLines :: PandocMonad m => MarkdownParser m Text
rawLines = do
first <- anyLine
rest <- many rawLine
- return $ unlines (first:rest)
+ return $ T.unlines (first:rest)
noteBlock :: PandocMonad m => MarkdownParser m (F Blocks)
noteBlock = try $ do
@@ -466,7 +469,7 @@ noteBlock = try $ do
optional indentSpaces
first <- rawLines
rest <- many $ try $ blanklines >> indentSpaces >> rawLines
- let raw = unlines (first:rest) ++ "\n"
+ let raw = T.unlines (first:rest) <> "\n"
optional blanklines
parsed <- parseFromString' parseBlocks raw
oldnotes <- stateNotes' <$> getState
@@ -510,7 +513,7 @@ block = do
, para
, plain
] <?> "block"
- trace (take 60 $ show $ B.toList $ runF res defaultParserState)
+ trace (T.take 60 $ tshow $ B.toList $ runF res defaultParserState)
return res
--
@@ -570,7 +573,7 @@ mmdHeaderIdentifier :: PandocMonad m => MarkdownParser m Attr
mmdHeaderIdentifier = do
(_, raw) <- reference
let raw' = trim $ stripFirstAndLast raw
- let ident = concat $ words $ map toLower raw'
+ let ident = T.concat $ T.words $ T.toLower raw'
let attr = (ident, [], [])
guardDisabled Ext_implicit_header_references
<|> registerImplicitHeader raw' attr
@@ -600,20 +603,20 @@ setextHeader = try $ do
<|> registerImplicitHeader raw attr'
return $ B.headerWith attr' level <$> text
-registerImplicitHeader :: PandocMonad m => String -> Attr -> MarkdownParser m ()
+registerImplicitHeader :: PandocMonad m => Text -> Attr -> MarkdownParser m ()
registerImplicitHeader raw attr@(ident, _, _)
- | null raw = return ()
+ | T.null raw = return ()
| otherwise = do
- let key = toKey $ "[" ++ raw ++ "]"
+ let key = toKey $ "[" <> raw <> "]"
updateState $ \s ->
- s { stateHeaderKeys = M.insert key (('#':ident,""), attr)
+ s { stateHeaderKeys = M.insert key (("#" <> ident,""), attr)
(stateHeaderKeys s) }
--
-- hrule block
--
-hrule :: PandocMonad m => ParserT [Char] st m (F Blocks)
+hrule :: PandocMonad m => ParserT Text st m (F Blocks)
hrule = try $ do
skipSpaces
start <- satisfy isHruleChar
@@ -627,13 +630,13 @@ hrule = try $ do
-- code blocks
--
-indentedLine :: PandocMonad m => MarkdownParser m String
+indentedLine :: PandocMonad m => MarkdownParser m Text
indentedLine = indentSpaces >> anyLineNewline
blockDelimiter :: PandocMonad m
=> (Char -> Bool)
-> Maybe Int
- -> ParserT [Char] ParserState m Int
+ -> ParserT Text ParserState m Int
blockDelimiter f len = try $ do
skipNonindentSpaces
c <- lookAhead (satisfy f)
@@ -652,11 +655,11 @@ attributes = try $ do
attribute :: PandocMonad m => MarkdownParser m (Attr -> Attr)
attribute = identifierAttr <|> classAttr <|> keyValAttr <|> specialAttr
-identifier :: PandocMonad m => MarkdownParser m String
+identifier :: PandocMonad m => MarkdownParser m Text
identifier = do
first <- letter
rest <- many $ alphaNum <|> oneOf "-_:."
- return (first:rest)
+ return $ T.pack (first:rest)
identifierAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
identifierAttr = try $ do
@@ -674,15 +677,15 @@ keyValAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
keyValAttr = try $ do
key <- identifier
char '='
- val <- enclosed (char '"') (char '"') litChar
- <|> enclosed (char '\'') (char '\'') litChar
+ val <- T.pack <$> enclosed (char '"') (char '"') litChar
+ <|> T.pack <$> enclosed (char '\'') (char '\'') litChar
<|> ("" <$ try (string "\"\""))
<|> ("" <$ try (string "''"))
- <|> many (escapedChar' <|> noneOf " \t\n\r}")
+ <|> manyChar (escapedChar' <|> noneOf " \t\n\r}")
return $ \(id',cs,kvs) ->
case key of
"id" -> (val,cs,kvs)
- "class" -> (id',cs ++ words val,kvs)
+ "class" -> (id',cs ++ T.words val,kvs)
_ -> (id',cs,kvs ++ [(key,val)])
specialAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
@@ -690,12 +693,12 @@ specialAttr = do
char '-'
return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs)
-rawAttribute :: PandocMonad m => MarkdownParser m String
+rawAttribute :: PandocMonad m => MarkdownParser m Text
rawAttribute = do
char '{'
skipMany spaceChar
char '='
- format <- many1 $ satisfy (\c -> isAlphaNum c || c `elem` "-_")
+ format <- many1Char $ satisfy (\c -> isAlphaNum c || c `elem` ['-', '_'])
skipMany spaceChar
char '}'
return format
@@ -703,7 +706,7 @@ rawAttribute = do
codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks)
codeBlockFenced = try $ do
indentchars <- nonindentSpaces
- let indentLevel = length indentchars
+ let indentLevel = T.length indentchars
c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~'))
<|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`'))
size <- blockDelimiter (== c) Nothing
@@ -713,9 +716,9 @@ codeBlockFenced = try $ do
<|>
(Right <$> option ("",[],[])
(try (guardEnabled Ext_fenced_code_attributes >> attributes)
- <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar)))
+ <|> ((\x -> ("",[toLanguageId x],[])) <$> many1Char nonspaceChar)))
blankline
- contents <- intercalate "\n" <$>
+ contents <- T.intercalate "\n" <$>
manyTill (gobbleAtMostSpaces indentLevel >> anyLine)
(try $ do
blockDelimiter (== c) (Just size)
@@ -726,8 +729,8 @@ codeBlockFenced = try $ do
Right attr -> B.codeBlockWith attr contents
-- correctly handle github language identifiers
-toLanguageId :: String -> String
-toLanguageId = map toLower . go
+toLanguageId :: Text -> Text
+toLanguageId = T.toLower . go
where go "c++" = "cpp"
go "objective-c" = "objectivec"
go x = x
@@ -737,11 +740,11 @@ codeBlockIndented = do
contents <- many1 (indentedLine <|>
try (do b <- blanklines
l <- indentedLine
- return $ b ++ l))
+ return $ b <> l))
optional blanklines
classes <- getOption readerIndentedCodeClasses
return $ return $ B.codeBlockWith ("", classes, []) $
- stripTrailingNewlines $ concat contents
+ stripTrailingNewlines $ T.concat contents
lhsCodeBlock :: PandocMonad m => MarkdownParser m (F Blocks)
lhsCodeBlock = do
@@ -751,33 +754,33 @@ lhsCodeBlock = do
<|> (return . B.codeBlockWith ("",["haskell"],[]) <$>
lhsCodeBlockInverseBird)
-lhsCodeBlockLaTeX :: PandocMonad m => MarkdownParser m String
+lhsCodeBlockLaTeX :: PandocMonad m => MarkdownParser m Text
lhsCodeBlockLaTeX = try $ do
string "\\begin{code}"
manyTill spaceChar newline
- contents <- many1Till anyChar (try $ string "\\end{code}")
+ contents <- many1TillChar anyChar (try $ string "\\end{code}")
blanklines
return $ stripTrailingNewlines contents
-lhsCodeBlockBird :: PandocMonad m => MarkdownParser m String
+lhsCodeBlockBird :: PandocMonad m => MarkdownParser m Text
lhsCodeBlockBird = lhsCodeBlockBirdWith '>'
-lhsCodeBlockInverseBird :: PandocMonad m => MarkdownParser m String
+lhsCodeBlockInverseBird :: PandocMonad m => MarkdownParser m Text
lhsCodeBlockInverseBird = lhsCodeBlockBirdWith '<'
-lhsCodeBlockBirdWith :: PandocMonad m => Char -> MarkdownParser m String
+lhsCodeBlockBirdWith :: PandocMonad m => Char -> MarkdownParser m Text
lhsCodeBlockBirdWith c = try $ do
pos <- getPosition
when (sourceColumn pos /= 1) $ Prelude.fail "Not in first column"
lns <- many1 $ birdTrackLine c
-- if (as is normal) there is always a space after >, drop it
- let lns' = if all (\ln -> null ln || take 1 ln == " ") lns
- then map (drop 1) lns
+ let lns' = if all (\ln -> T.null ln || T.take 1 ln == " ") lns
+ then map (T.drop 1) lns
else lns
blanklines
- return $ intercalate "\n" lns'
+ return $ T.intercalate "\n" lns'
-birdTrackLine :: PandocMonad m => Char -> ParserT [Char] st m String
+birdTrackLine :: PandocMonad m => Char -> ParserT Text st m Text
birdTrackLine c = try $ do
char c
-- allow html tags on left margin:
@@ -791,12 +794,12 @@ birdTrackLine c = try $ do
emailBlockQuoteStart :: PandocMonad m => MarkdownParser m Char
emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' <* optional (char ' ')
-emailBlockQuote :: PandocMonad m => MarkdownParser m [String]
+emailBlockQuote :: PandocMonad m => MarkdownParser m [Text]
emailBlockQuote = try $ do
emailBlockQuoteStart
- let emailLine = many $ nonEndline <|> try
- (endline >> notFollowedBy emailBlockQuoteStart >>
- return '\n')
+ let emailLine = manyChar $ nonEndline <|> try
+ (endline >> notFollowedBy emailBlockQuoteStart >>
+ return '\n')
let emailSep = try (newline >> emailBlockQuoteStart)
first <- emailLine
rest <- many $ try $ emailSep >> emailLine
@@ -809,7 +812,7 @@ blockQuote :: PandocMonad m => MarkdownParser m (F Blocks)
blockQuote = do
raw <- emailBlockQuote
-- parse the extracted block, which may contain various block elements:
- contents <- parseFromString' parseBlocks $ intercalate "\n" raw ++ "\n\n"
+ contents <- parseFromString' parseBlocks $ T.intercalate "\n" raw <> "\n\n"
return $ B.blockQuote <$> contents
--
@@ -833,7 +836,7 @@ orderedListStart mbstydelim = try $ do
skipNonindentSpaces
notFollowedBy $ string "p." >> spaceChar >> digit -- page number
(do guardDisabled Ext_fancy_lists
- start <- many1 digit >>= safeRead
+ start <- many1Char digit >>= safeRead
char '.'
gobbleSpaces 1 <|> () <$ lookAhead newline
optional $ try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar)
@@ -857,7 +860,7 @@ orderedListStart mbstydelim = try $ do
listStart :: PandocMonad m => MarkdownParser m ()
listStart = bulletListStart <|> Control.Monad.void (orderedListStart Nothing)
-listLine :: PandocMonad m => Int -> MarkdownParser m String
+listLine :: PandocMonad m => Int -> MarkdownParser m Text
listLine continuationIndent = try $ do
notFollowedBy' (do gobbleSpaces continuationIndent
skipMany spaceChar
@@ -867,19 +870,19 @@ listLine continuationIndent = try $ do
optional (() <$ gobbleSpaces continuationIndent)
listLineCommon
-listLineCommon :: PandocMonad m => MarkdownParser m String
-listLineCommon = concat <$> manyTill
- ( many1 (satisfy $ \c -> c `notElem` ['\n', '<', '`'])
+listLineCommon :: PandocMonad m => MarkdownParser m Text
+listLineCommon = T.concat <$> manyTill
+ ( many1Char (satisfy $ \c -> c `notElem` ['\n', '<', '`'])
<|> fmap snd (withRaw code)
<|> fmap snd (htmlTag isCommentTag)
- <|> count 1 anyChar
+ <|> countChar 1 anyChar
) newline
-- parse raw text for one list item, excluding start marker and continuations
rawListItem :: PandocMonad m
=> Bool -- four space rule
-> MarkdownParser m a
- -> MarkdownParser m (String, Int)
+ -> MarkdownParser m (Text, Int)
rawListItem fourSpaceRule start = try $ do
pos1 <- getPosition
start
@@ -892,14 +895,14 @@ rawListItem fourSpaceRule start = try $ do
notFollowedBy (() <$ codeBlockFenced)
notFollowedBy blankline
listLine continuationIndent)
- blanks <- many blankline
- let result = unlines (first:rest) ++ blanks
+ blanks <- manyChar blankline
+ let result = T.unlines (first:rest) <> blanks
return (result, continuationIndent)
-- continuation of a list item - indented and separated by blankline
-- or (in compact lists) endline.
-- note: nested lists are parsed as continuations
-listContinuation :: PandocMonad m => Int -> MarkdownParser m String
+listContinuation :: PandocMonad m => Int -> MarkdownParser m Text
listContinuation continuationIndent = try $ do
x <- try $ do
notFollowedBy blankline
@@ -913,12 +916,12 @@ listContinuation continuationIndent = try $ do
notFollowedByDivCloser
gobbleSpaces continuationIndent <|> notFollowedBy' listStart
anyLineNewline
- blanks <- many blankline
- return $ concat (x:xs) ++ blanks
+ blanks <- manyChar blankline
+ return $ T.concat (x:xs) <> blanks
-- Variant of blanklines that doesn't require blank lines
-- before a fence or eof.
-blanklines' :: PandocMonad m => MarkdownParser m [Char]
+blanklines' :: PandocMonad m => MarkdownParser m Text
blanklines' = blanklines <|> try checkDivCloser
where checkDivCloser = do
guardEnabled Ext_fenced_divs
@@ -954,7 +957,7 @@ listItem fourSpaceRule start = try $ do
(first, continuationIndent) <- rawListItem fourSpaceRule start
continuations <- many (listContinuation continuationIndent)
-- parse the extracted block, which may contain various block elements:
- let raw = concat (first:continuations)
+ let raw = T.concat (first:continuations)
contents <- parseFromString' parseBlocks raw
updateState (\st -> st {stateParserContext = oldContext})
exts <- getOption readerExtensions
@@ -990,7 +993,7 @@ defListMarker = do
sps <- nonindentSpaces
char ':' <|> char '~'
tabStop <- getOption readerTabStop
- let remaining = tabStop - (length sps + 1)
+ let remaining = tabStop - (T.length sps + 1)
if remaining > 0
then try (count remaining (char ' ')) <|> string "\t" <|> many1 spaceChar
else mzero
@@ -1001,11 +1004,11 @@ definitionListItem compact = try $ do
rawLine' <- anyLine
raw <- many1 $ defRawBlock compact
term <- parseFromString' (trimInlinesF <$> inlines) rawLine'
- contents <- mapM (parseFromString' parseBlocks . (++"\n")) raw
+ contents <- mapM (parseFromString' parseBlocks . (<> "\n")) raw
optional blanklines
return $ liftM2 (,) term (sequence contents)
-defRawBlock :: PandocMonad m => Bool -> MarkdownParser m String
+defRawBlock :: PandocMonad m => Bool -> MarkdownParser m Text
defRawBlock compact = try $ do
hasBlank <- option False $ blankline >> return True
defListMarker
@@ -1020,13 +1023,13 @@ defRawBlock compact = try $ do
<|> notFollowedBy defListMarker
anyLine )
rawlines <- many dline
- cont <- fmap concat $ many $ try $ do
+ cont <- fmap T.concat $ many $ try $ do
trailing <- option "" blanklines
ln <- indentSpaces >> notFollowedBy blankline >> anyLine
lns <- many dline
- return $ trailing ++ unlines (ln:lns)
- return $ trimr (firstline ++ unlines rawlines ++ cont) ++
- if hasBlank || not (null cont) then "\n\n" else ""
+ return $ trailing <> T.unlines (ln:lns)
+ return $ trimr (firstline <> T.unlines rawlines <> cont) <>
+ if hasBlank || not (T.null cont) then "\n\n" else ""
definitionList :: PandocMonad m => MarkdownParser m (F Blocks)
definitionList = try $ do
@@ -1063,7 +1066,7 @@ para = try $ do
| not (null alt) ->
-- the fig: at beginning of title indicates a figure
return $ B.singleton
- $ Image attr alt (src,'f':'i':'g':':':tit)
+ $ Image attr alt (src, "fig:" <> tit)
_ -> return x'
| otherwise = x
result <- implicitFigures . trimInlinesF <$> inlines1
@@ -1082,7 +1085,7 @@ para = try $ do
inHtmlBlock <- stateInHtmlBlock <$> getState
case inHtmlBlock of
Just "div" -> () <$
- lookAhead (htmlTag (~== TagClose "div"))
+ lookAhead (htmlTag (~== TagClose ("div" :: Text)))
_ -> mzero
<|> do guardEnabled Ext_fenced_divs
divLevel <- stateFencedDivLevel <$> getState
@@ -1098,7 +1101,7 @@ plain = fmap B.plain . trimInlinesF <$> inlines1
-- raw html
--
-htmlElement :: PandocMonad m => MarkdownParser m String
+htmlElement :: PandocMonad m => MarkdownParser m Text
htmlElement = rawVerbatimBlock
<|> strictHtmlBlock
<|> fmap snd (htmlTag isBlockTag)
@@ -1132,14 +1135,14 @@ htmlBlock' = try $ do
first <- htmlElement
skipMany spaceChar
optional blanklines
- return $ if null first
+ return $ if T.null first
then mempty
else return $ B.rawBlock "html" first
-strictHtmlBlock :: PandocMonad m => MarkdownParser m String
+strictHtmlBlock :: PandocMonad m => MarkdownParser m Text
strictHtmlBlock = htmlInBalanced (not . isInlineTag)
-rawVerbatimBlock :: PandocMonad m => MarkdownParser m String
+rawVerbatimBlock :: PandocMonad m => MarkdownParser m Text
rawVerbatimBlock = htmlInBalanced isVerbTag
where isVerbTag (TagOpen "pre" _) = True
isVerbTag (TagOpen "style" _) = True
@@ -1150,13 +1153,13 @@ rawVerbatimBlock = htmlInBalanced isVerbTag
rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks)
rawTeXBlock = do
guardEnabled Ext_raw_tex
- result <- (B.rawBlock "tex" . trim . concat <$>
- many1 ((++) <$> rawConTeXtEnvironment <*> spnl'))
- <|> (B.rawBlock "tex" . trim . concat <$>
- many1 ((++) <$> rawLaTeXBlock <*> spnl'))
+ result <- (B.rawBlock "tex" . trim . T.concat <$>
+ many1 ((<>) <$> rawConTeXtEnvironment <*> spnl'))
+ <|> (B.rawBlock "tex" . trim . T.concat <$>
+ many1 ((<>) <$> rawLaTeXBlock <*> spnl'))
return $ case B.toList result of
[RawBlock _ cs]
- | all (`elem` [' ','\t','\n']) cs -> return mempty
+ | T.all (`elem` [' ','\t','\n']) cs -> return mempty
-- don't create a raw block for suppressed macro defs
_ -> return result
@@ -1186,7 +1189,7 @@ rawHtmlBlocks = do
return result
-- remove markdown="1" attribute
-stripMarkdownAttribute :: String -> String
+stripMarkdownAttribute :: Text -> Text
stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s
where filterAttrib (TagOpen t as) = TagOpen t
[(k,v) | (k,v) <- as, k /= "markdown"]
@@ -1211,7 +1214,7 @@ lineBlock = try $ do
-- and the length including trailing space.
dashedLine :: PandocMonad m
=> Char
- -> ParserT [Char] st m (Int, Int)
+ -> ParserT Text st m (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many spaceChar
@@ -1232,9 +1235,9 @@ simpleTableHeader headless = try $ do
dashes <- many1 (dashedLine '-')
newline
let (lengths, lines') = unzip dashes
- let indices = scanl (+) (length initSp) lines'
+ let indices = scanl (+) (T.length initSp) lines'
-- If no header, calculate alignment on basis of first row of text
- rawHeads <- fmap (tail . splitStringByIndices (init indices)) $
+ rawHeads <- fmap (tail . splitTextByIndices (init indices)) $
if headless
then lookAhead anyLine
else return rawContent
@@ -1250,15 +1253,15 @@ simpleTableHeader headless = try $ do
-- Returns an alignment type for a table, based on a list of strings
-- (the rows of the column header) and a number (the length of the
-- dashed line under the rows.
-alignType :: [String]
+alignType :: [Text]
-> Int
-> Alignment
alignType [] _ = AlignDefault
alignType strLst len =
- let nonempties = filter (not . null) $ map trimr strLst
+ let nonempties = filter (not . T.null) $ map trimr strLst
(leftSpace, rightSpace) =
- case sortBy (comparing length) nonempties of
- (x:_) -> (head x `elem` " \t", length x < len)
+ case sortBy (comparing T.length) nonempties of
+ (x:_) -> (T.head x `elem` [' ', 't'], T.length x < len)
[] -> (False, False)
in case (leftSpace, rightSpace) of
(True, False) -> AlignRight
@@ -1267,7 +1270,7 @@ alignType strLst len =
(False, False) -> AlignDefault
-- Parse a table footer - dashed lines followed by blank line.
-tableFooter :: PandocMonad m => MarkdownParser m String
+tableFooter :: PandocMonad m => MarkdownParser m Text
tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines'
-- Parse a table separator - dashed line.
@@ -1277,12 +1280,12 @@ tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> char '\n'
-- Parse a raw line and split it into chunks by indices.
rawTableLine :: PandocMonad m
=> [Int]
- -> MarkdownParser m [String]
+ -> MarkdownParser m [Text]
rawTableLine indices = do
notFollowedBy' (blanklines' <|> tableFooter)
- line <- many1Till anyChar newline
+ line <- many1TillChar anyChar newline
return $ map trim $ tail $
- splitStringByIndices (init indices) line
+ splitTextByIndices (init indices) line
-- Parse a table line and return a list of lists of blocks (columns).
tableLine :: PandocMonad m
@@ -1297,7 +1300,7 @@ multilineRow :: PandocMonad m
-> MarkdownParser m (F [Blocks])
multilineRow indices = do
colLines <- many1 (rawTableLine indices)
- let cols = map unlines $ transpose colLines
+ let cols = map T.unlines $ transpose colLines
fmap sequence $ mapM (parseFromString' (mconcat <$> many plain)) cols
-- Parses a table caption: inlines beginning with 'Table:'
@@ -1344,7 +1347,7 @@ multilineTableHeader headless = try $ do
dashes <- many1 (dashedLine '-')
newline
let (lengths, lines') = unzip dashes
- let indices = scanl (+) (length initSp) lines'
+ let indices = scanl (+) (T.length initSp) lines'
-- compensate for the fact that intercolumn spaces are
-- not included in the last index:
let indices' = case reverse indices of
@@ -1352,14 +1355,14 @@ multilineTableHeader headless = try $ do
(x:xs) -> reverse (x+1:xs)
rawHeadsList <- if headless
then fmap (map (:[]) . tail .
- splitStringByIndices (init indices')) $ lookAhead anyLine
+ splitTextByIndices (init indices')) $ lookAhead anyLine
else return $ transpose $ map
- (tail . splitStringByIndices (init indices'))
+ (tail . splitTextByIndices (init indices'))
rawContent
let aligns = zipWith alignType rawHeadsList lengths
let rawHeads = if headless
then replicate (length dashes) ""
- else map (unlines . map trim) rawHeadsList
+ else map (T.unlines . map trim) rawHeadsList
heads <- fmap sequence $
mapM ((parseFromString' (mconcat <$> many plain)).trim) rawHeads
return (heads, aligns, indices')
@@ -1393,7 +1396,7 @@ pipeTable = try $ do
lines' <- many pipeTableRow
let lines'' = map (take (length aligns) <$>) lines'
let maxlength = maximum $
- map (\x -> length . stringify $ runF x def) (heads' : lines'')
+ map (\x -> T.length . stringify $ runF x def) (heads' : lines'')
numColumns <- getOption readerColumns
let widths = if maxlength > numColumns
then map (\len ->
@@ -1430,7 +1433,7 @@ pipeTableCell =
return $ B.plain <$> result)
<|> return mempty
-pipeTableHeaderPart :: PandocMonad m => ParserT [Char] st m (Alignment, Int)
+pipeTableHeaderPart :: PandocMonad m => ParserT Text st m (Alignment, Int)
pipeTableHeaderPart = try $ do
skipMany spaceChar
left <- optionMaybe (char ':')
@@ -1446,12 +1449,12 @@ pipeTableHeaderPart = try $ do
(Just _,Just _) -> AlignCenter, len)
-- Succeed only if current line contains a pipe.
-scanForPipe :: PandocMonad m => ParserT [Char] st m ()
+scanForPipe :: PandocMonad m => ParserT Text st m ()
scanForPipe = do
inp <- getInput
- case break (\c -> c == '\n' || c == '|') inp of
- (_,'|':_) -> return ()
- _ -> mzero
+ case T.break (\c -> c == '\n' || c == '|') inp of
+ (_, T.uncons -> Just ('|', _)) -> return ()
+ _ -> mzero
-- | Parse a table using 'headerParser', 'rowParser',
-- 'lineParser', and 'footerParser'. Variant of the version in
@@ -1561,7 +1564,7 @@ escapedChar = do
result <- escapedChar'
case result of
' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space
- _ -> return $ return $ B.str [result]
+ _ -> return $ return $ B.str $ T.singleton result
ltSign :: PandocMonad m => MarkdownParser m (F Inlines)
ltSign = do
@@ -1574,12 +1577,12 @@ exampleRef :: PandocMonad m => MarkdownParser m (F Inlines)
exampleRef = try $ do
guardEnabled Ext_example_lists
char '@'
- lab <- many1 (alphaNum <|> oneOf "-_")
+ lab <- many1Char (alphaNum <|> oneOf "-_")
return $ do
st <- askF
return $ case M.lookup lab (stateExamples st) of
- Just n -> B.str (show n)
- Nothing -> B.str ('@':lab)
+ Just n -> B.str $ tshow n
+ Nothing -> B.str $ "@" <> lab
symbol :: PandocMonad m => MarkdownParser m (F Inlines)
symbol = do
@@ -1587,16 +1590,16 @@ symbol = do
<|> try (do lookAhead $ char '\\'
notFollowedBy' (() <$ rawTeXBlock)
char '\\')
- return $ return $ B.str [result]
+ return $ return $ B.str $ T.singleton result
-- parses inline code, between n `s and n `s
code :: PandocMonad m => MarkdownParser m (F Inlines)
code = try $ do
starts <- many1 (char '`')
skipSpaces
- result <- (trim . concat) <$>
+ result <- (trim . T.concat) <$>
manyTill (notFollowedBy (inList >> listStart) >>
- (many1 (noneOf "`\n") <|> many1 (char '`') <|>
+ (many1Char (noneOf "`\n") <|> many1Char (char '`') <|>
(char '\n' >> notFollowedBy' blankline >> return " ")))
(try (skipSpaces >> count (length starts) (char '`') >>
notFollowedBy (char '`')))
@@ -1627,10 +1630,10 @@ enclosure c = do
guardDisabled Ext_intraword_underscores
<|> guard (c == '*')
<|> (guard =<< notAfterString)
- cs <- many1 (char c)
+ cs <- many1Char (char c)
(return (B.str cs) <>) <$> whitespace
<|>
- case length cs of
+ case T.length cs of
3 -> three c
2 -> two c mempty
1 -> one c mempty
@@ -1653,7 +1656,7 @@ three c = do
(ender c 3 >> updateLastStrPos >> return ((B.strong . B.emph) <$> contents))
<|> (ender c 2 >> updateLastStrPos >> one c (B.strong <$> contents))
<|> (ender c 1 >> updateLastStrPos >> two c (B.emph <$> contents))
- <|> return (return (B.str [c,c,c]) <> contents)
+ <|> return (return (B.str $ T.pack [c,c,c]) <> contents)
-- Parse inlines til you hit two c's, and emit strong.
-- If you never do hit two cs, emit ** plus inlines parsed.
@@ -1662,7 +1665,7 @@ two c prefix' = do
contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline)
(ender c 2 >> updateLastStrPos >>
return (B.strong <$> (prefix' <> contents)))
- <|> return (return (B.str [c,c]) <> (prefix' <> contents))
+ <|> return (return (B.str $ T.pack [c,c]) <> (prefix' <> contents))
-- Parse inlines til you hit a c, and emit emph.
-- If you never hit a c, emit * plus inlines parsed.
@@ -1673,7 +1676,7 @@ one c prefix' = do
notFollowedBy (ender c 1) >>
two c mempty) )
(ender c 1 >> updateLastStrPos >> return (B.emph <$> (prefix' <> contents)))
- <|> return (return (B.str [c]) <> (prefix' <> contents))
+ <|> return (return (B.str $ T.singleton c) <> (prefix' <> contents))
strongOrEmph :: PandocMonad m => MarkdownParser m (F Inlines)
strongOrEmph = enclosure '*' <|> enclosure '_'
@@ -1717,16 +1720,16 @@ whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
regsp = skipMany spaceChar >> return B.space
-nonEndline :: PandocMonad m => ParserT [Char] st m Char
+nonEndline :: PandocMonad m => ParserT Text st m Char
nonEndline = satisfy (/='\n')
str :: PandocMonad m => MarkdownParser m (F Inlines)
str = do
- result <- many1 (alphaNum <|> try (char '.' <* notFollowedBy (char '.')))
+ result <- many1Char (alphaNum <|> try (char '.' <* notFollowedBy (char '.')))
updateLastStrPos
(do guardEnabled Ext_smart
abbrevs <- getOption readerAbbreviations
- if not (null result) && last result == '.' && result `Set.member` abbrevs
+ if not (T.null result) && T.last result == '.' && result `Set.member` abbrevs
then try (do ils <- whitespace
-- ?? lookAhead alphaNum
-- replace space after with nonbreaking space
@@ -1766,36 +1769,36 @@ endline = try $ do
--
-- a reference label for a link
-reference :: PandocMonad m => MarkdownParser m (F Inlines, String)
+reference :: PandocMonad m => MarkdownParser m (F Inlines, Text)
reference = do
guardDisabled Ext_footnotes <|> notFollowedBy' (string "[^")
guardDisabled Ext_citations <|> notFollowedBy' (string "[@")
withRaw $ trimInlinesF <$> inlinesInBalancedBrackets
-parenthesizedChars :: PandocMonad m => MarkdownParser m [Char]
+parenthesizedChars :: PandocMonad m => MarkdownParser m Text
parenthesizedChars = do
result <- charsInBalanced '(' ')' litChar
- return $ '(' : result ++ ")"
+ return $ "(" <> result <> ")"
-- source for a link, with optional title
-source :: PandocMonad m => MarkdownParser m (String, String)
+source :: PandocMonad m => MarkdownParser m (Text, Text)
source = do
char '('
skipSpaces
let urlChunk =
try parenthesizedChars
- <|> (notFollowedBy (oneOf " )") >> count 1 litChar)
- <|> try (many1 spaceChar <* notFollowedBy (oneOf "\"')"))
- let sourceURL = (unwords . words . concat) <$> many urlChunk
+ <|> (notFollowedBy (oneOf " )") >> countChar 1 litChar)
+ <|> try (many1Char spaceChar <* notFollowedBy (oneOf "\"')"))
+ let sourceURL = (T.unwords . T.words . T.concat) <$> many urlChunk
let betweenAngles = try $
- char '<' >> manyTill litChar (char '>')
+ char '<' >> manyTillChar litChar (char '>')
src <- try betweenAngles <|> sourceURL
tit <- option "" $ try $ spnl >> linkTitle
skipSpaces
char ')'
return (escapeURI $ trimr src, tit)
-linkTitle :: PandocMonad m => MarkdownParser m String
+linkTitle :: PandocMonad m => MarkdownParser m Text
linkTitle = quotedTitle '"' <|> quotedTitle '\''
link :: PandocMonad m => MarkdownParser m (F Inlines)
@@ -1823,13 +1826,13 @@ isSmallCaps :: Attr -> Bool
isSmallCaps ("",["smallcaps"],[]) = True
isSmallCaps ("",[],kvs) =
case lookup "style" kvs of
- Just s -> map toLower (filter (`notElem` " \t;") s) ==
+ Just s -> T.toLower (T.filter (`notElem` [' ', '\t', ';']) s) ==
"font-variant:small-caps"
Nothing -> False
isSmallCaps _ = False
regLink :: PandocMonad m
- => (Attr -> String -> String -> Inlines -> Inlines)
+ => (Attr -> Text -> Text -> Inlines -> Inlines)
-> F Inlines
-> MarkdownParser m (F Inlines)
regLink constructor lab = try $ do
@@ -1840,8 +1843,8 @@ regLink constructor lab = try $ do
-- a link like [this][ref] or [this][] or [this]
referenceLink :: PandocMonad m
- => (Attr -> String -> String -> Inlines -> Inlines)
- -> (F Inlines, String)
+ => (Attr -> Text -> Text -> Inlines -> Inlines)
+ -> (F Inlines, Text)
-> MarkdownParser m (F Inlines)
referenceLink constructor (lab, raw) = do
sp <- (True <$ lookAhead (char ' ')) <|> return False
@@ -1863,7 +1866,7 @@ referenceLink constructor (lab, raw) = do
parsedRaw' <- parsedRaw
fallback' <- fallback
return $ B.str "[" <> fallback' <> B.str "]" <>
- (if sp && not (null raw) then B.space else mempty) <>
+ (if sp && not (T.null raw) then B.space else mempty) <>
parsedRaw'
return $ do
keys <- asksF stateKeys
@@ -1878,19 +1881,19 @@ referenceLink constructor (lab, raw) = do
else makeFallback
Just ((src,tit), attr) -> constructor attr src tit <$> lab
-dropBrackets :: String -> String
-dropBrackets = reverse . dropRB . reverse . dropLB
- where dropRB (']':xs) = xs
- dropRB xs = xs
- dropLB ('[':xs) = xs
- dropLB xs = xs
+dropBrackets :: Text -> Text
+dropBrackets = dropRB . dropLB
+ where dropRB (T.unsnoc -> Just (xs,']')) = xs
+ dropRB xs = xs
+ dropLB (T.uncons -> Just ('[',xs)) = xs
+ dropLB xs = xs
bareURL :: PandocMonad m => MarkdownParser m (F Inlines)
bareURL = try $ do
guardEnabled Ext_autolink_bare_uris
getState >>= guard . stateAllowLinks
(cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress)
- notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a")
+ notFollowedBy $ try $ spaces >> htmlTag (~== TagClose ("a" :: Text))
return $ return $ B.linkWith ("",[cls],[]) src "" (B.str orig)
autoLink :: PandocMonad m => MarkdownParser m (F Inlines)
@@ -1902,19 +1905,20 @@ autoLink = try $ do
-- is finished, because the uri parser tries to avoid parsing
-- final punctuation. for example: in `<http://hi---there>`,
-- the URI parser will stop before the dashes.
- extra <- fromEntities <$> manyTill nonspaceChar (char '>')
+ extra <- fromEntities <$> manyTillChar nonspaceChar (char '>')
attr <- option ("", [cls], []) $ try $
guardEnabled Ext_link_attributes >> attributes
- return $ return $ B.linkWith attr (src ++ escapeURI extra) ""
- (B.str $ orig ++ extra)
+ return $ return $ B.linkWith attr (src <> escapeURI extra) ""
+ (B.str $ orig <> extra)
image :: PandocMonad m => MarkdownParser m (F Inlines)
image = try $ do
char '!'
(lab,raw) <- reference
defaultExt <- getOption readerDefaultImageExtension
- let constructor attr' src = case takeExtension src of
- "" -> B.imageWith attr' (addExtension src defaultExt)
+ let constructor attr' src = case takeExtension (T.unpack src) of
+ "" -> B.imageWith attr' (T.pack $ addExtension (T.unpack src)
+ $ T.unpack defaultExt)
_ -> B.imageWith attr' src
regLink constructor lab <|> referenceLink constructor (lab,raw)
@@ -1926,7 +1930,7 @@ note = try $ do
return $ do
notes <- asksF stateNotes'
case M.lookup ref notes of
- Nothing -> return $ B.str $ "[^" ++ ref ++ "]"
+ Nothing -> return $ B.str $ "[^" <> ref <> "]"
Just (_pos, contents) -> do
st <- askF
-- process the note in a context that doesn't resolve
@@ -1949,29 +1953,29 @@ rawLaTeXInline' = try $ do
s <- rawLaTeXInline
return $ return $ B.rawInline "tex" s -- "tex" because it might be context
-rawConTeXtEnvironment :: PandocMonad m => ParserT [Char] st m String
+rawConTeXtEnvironment :: PandocMonad m => ParserT Text st m Text
rawConTeXtEnvironment = try $ do
string "\\start"
completion <- inBrackets (letter <|> digit <|> spaceChar)
- <|> many1 letter
- contents <- manyTill (rawConTeXtEnvironment <|> count 1 anyChar)
- (try $ string "\\stop" >> string completion)
- return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion
+ <|> many1Char letter
+ contents <- manyTill (rawConTeXtEnvironment <|> countChar 1 anyChar)
+ (try $ string "\\stop" >> textStr completion)
+ return $ "\\start" <> completion <> T.concat contents <> "\\stop" <> completion
-inBrackets :: PandocMonad m => ParserT [Char] st m Char -> ParserT [Char] st m String
+inBrackets :: PandocMonad m => ParserT Text st m Char -> ParserT Text st m Text
inBrackets parser = do
char '['
- contents <- many parser
+ contents <- manyChar parser
char ']'
- return $ "[" ++ contents ++ "]"
+ return $ "[" <> contents <> "]"
spanHtml :: PandocMonad m => MarkdownParser m (F Inlines)
spanHtml = try $ do
guardEnabled Ext_native_spans
- (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" [])
- contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span"))
+ (TagOpen _ attrs, _) <- htmlTag (~== TagOpen ("span" :: Text) [])
+ contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose ("span" :: Text)))
let ident = fromMaybe "" $ lookup "id" attrs
- let classes = maybe [] words $ lookup "class" attrs
+ let classes = maybe [] T.words $ lookup "class" attrs
let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
return $ if isSmallCaps (ident, classes, keyvals)
then B.smallcaps <$> contents
@@ -1980,20 +1984,20 @@ spanHtml = try $ do
divHtml :: PandocMonad m => MarkdownParser m (F Blocks)
divHtml = try $ do
guardEnabled Ext_native_divs
- (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" [])
+ (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen ("div" :: Text) [])
-- we set stateInHtmlBlock so that closing tags that can be either block or
-- inline will not be parsed as inline tags
oldInHtmlBlock <- stateInHtmlBlock <$> getState
updateState $ \st -> st{ stateInHtmlBlock = Just "div" }
bls <- option "" (blankline >> option "" blanklines)
contents <- mconcat <$>
- many (notFollowedBy' (htmlTag (~== TagClose "div")) >> block)
- closed <- option False (True <$ htmlTag (~== TagClose "div"))
+ many (notFollowedBy' (htmlTag (~== TagClose ("div" :: Text))) >> block)
+ closed <- option False (True <$ htmlTag (~== TagClose ("div" :: Text)))
if closed
then do
updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
let ident = fromMaybe "" $ lookup "id" attrs
- let classes = maybe [] words $ lookup "class" attrs
+ let classes = maybe [] T.words $ lookup "class" attrs
let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
return $ B.divWith (ident, classes, keyvals) <$> contents
else -- avoid backtracing
@@ -2005,7 +2009,7 @@ divFenced = try $ do
string ":::"
skipMany (char ':')
skipMany spaceChar
- attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1 nonspaceChar)
+ attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1Char nonspaceChar)
skipMany spaceChar
skipMany (char ':')
blankline
@@ -2047,7 +2051,7 @@ emoji :: PandocMonad m => MarkdownParser m (F Inlines)
emoji = try $ do
guardEnabled Ext_emoji
char ':'
- emojikey <- many1 (oneOf emojiChars)
+ emojikey <- many1Char (oneOf emojiChars)
char ':'
case emojiToInline emojikey of
Just i -> return (return $ B.singleton i)
@@ -2077,14 +2081,14 @@ textualCite = try $ do
mbrest <- option Nothing $ try $ spnl >> Just <$> withRaw normalCite
case mbrest of
Just (rest, raw) ->
- return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw) . (first:))
+ return $ (flip B.cite (B.text $ "@" <> key <> " " <> raw) . (first:))
<$> rest
Nothing ->
(do
(cs, raw) <- withRaw $ bareloc first
- let (spaces',raw') = span isSpace raw
- spc | null spaces' = mempty
- | otherwise = B.space
+ let (spaces',raw') = T.span isSpace raw
+ spc | T.null spaces' = mempty
+ | otherwise = B.space
lab <- parseFromString' inlines $ dropBrackets raw'
fallback <- referenceLink B.linkWith (lab,raw')
return $ do
@@ -2092,12 +2096,12 @@ textualCite = try $ do
cs' <- cs
return $
case B.toList fallback' of
- Link{}:_ -> B.cite [first] (B.str $ '@':key) <> spc <> fallback'
- _ -> B.cite cs' (B.text $ '@':key ++ " " ++ raw))
+ Link{}:_ -> B.cite [first] (B.str $ "@" <> key) <> spc <> fallback'
+ _ -> B.cite cs' (B.text $ "@" <> key <> " " <> raw))
<|> return (do st <- askF
return $ case M.lookup key (stateExamples st) of
- Just n -> B.str (show n)
- _ -> B.cite [first] $ B.str $ '@':key)
+ Just n -> B.str $ tshow n
+ _ -> B.cite [first] $ B.str $ "@" <> key)
bareloc :: PandocMonad m => Citation -> MarkdownParser m (F [Citation])
bareloc c = try $ do
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 35bb8e3eb..07240e951 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -1,5 +1,6 @@
{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE RelaxedPolyRec #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE RelaxedPolyRec #-}
-- RelaxedPolyRec needed for inlinesBetween on GHC < 7
{- |
Module : Text.Pandoc.Readers.MediaWiki
@@ -24,11 +25,12 @@ import Control.Monad
import Control.Monad.Except (throwError)
import Data.Char (isDigit, isSpace)
import qualified Data.Foldable as F
-import Data.List (intercalate, intersperse, isPrefixOf)
+import Data.List (intersperse)
import Data.Maybe (fromMaybe, maybeToList)
import Data.Sequence (ViewL (..), viewl, (<|))
import qualified Data.Set as Set
-import Data.Text (Text, unpack)
+import Data.Text (Text)
+import qualified Data.Text as T
import Text.HTML.TagSoup
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
import qualified Text.Pandoc.Builder as B
@@ -39,7 +41,7 @@ import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (nested)
import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag)
import Text.Pandoc.Shared (crFilter, safeRead, stringify, stripTrailingNewlines,
- trim)
+ trim, splitTextBy, tshow)
import Text.Pandoc.Walk (walk)
import Text.Pandoc.XML (fromEntities)
@@ -57,7 +59,7 @@ readMediaWiki opts s = do
, mwLogMessages = []
, mwInTT = False
}
- (unpack (crFilter s) ++ "\n")
+ (crFilter s <> "\n")
case parsed of
Right result -> return result
Left e -> throwError e
@@ -66,12 +68,12 @@ data MWState = MWState { mwOptions :: ReaderOptions
, mwMaxNestingLevel :: Int
, mwNextLinkNumber :: Int
, mwCategoryLinks :: [Inlines]
- , mwIdentifierList :: Set.Set String
+ , mwIdentifierList :: Set.Set Text
, mwLogMessages :: [LogMessage]
, mwInTT :: Bool
}
-type MWParser m = ParserT [Char] MWState m
+type MWParser m = ParserT Text MWState m
instance HasReaderOptions MWState where
extractReaderOptions = mwOptions
@@ -105,58 +107,58 @@ specialChars = "'[]<=&*{}|\":\\"
spaceChars :: [Char]
spaceChars = " \n\t"
-sym :: PandocMonad m => String -> MWParser m ()
-sym s = () <$ try (string s)
+sym :: PandocMonad m => Text -> MWParser m ()
+sym s = () <$ try (string $ T.unpack s)
-newBlockTags :: [String]
+newBlockTags :: [Text]
newBlockTags = ["haskell","syntaxhighlight","source","gallery","references"]
-isBlockTag' :: Tag String -> Bool
+isBlockTag' :: Tag Text -> Bool
isBlockTag' tag@(TagOpen t _) = (isBlockTag tag || t `elem` newBlockTags) &&
t `notElem` eitherBlockOrInline
isBlockTag' tag@(TagClose t) = (isBlockTag tag || t `elem` newBlockTags) &&
t `notElem` eitherBlockOrInline
isBlockTag' tag = isBlockTag tag
-isInlineTag' :: Tag String -> Bool
+isInlineTag' :: Tag Text -> Bool
isInlineTag' (TagComment _) = True
isInlineTag' t = not (isBlockTag' t)
-eitherBlockOrInline :: [String]
+eitherBlockOrInline :: [Text]
eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins",
"map", "area", "object"]
htmlComment :: PandocMonad m => MWParser m ()
htmlComment = () <$ htmlTag isCommentTag
-inlinesInTags :: PandocMonad m => String -> MWParser m Inlines
+inlinesInTags :: PandocMonad m => Text -> MWParser m Inlines
inlinesInTags tag = try $ do
(_,raw) <- htmlTag (~== TagOpen tag [])
- if '/' `elem` raw -- self-closing tag
+ if T.any (== '/') raw -- self-closing tag
then return mempty
else trimInlines . mconcat <$>
manyTill inline (htmlTag (~== TagClose tag))
-blocksInTags :: PandocMonad m => String -> MWParser m Blocks
+blocksInTags :: PandocMonad m => Text -> MWParser m Blocks
blocksInTags tag = try $ do
(_,raw) <- htmlTag (~== TagOpen tag [])
let closer = if tag == "li"
- then htmlTag (~== TagClose "li")
+ then htmlTag (~== TagClose ("li" :: Text))
<|> lookAhead (
- htmlTag (~== TagOpen "li" [])
- <|> htmlTag (~== TagClose "ol")
- <|> htmlTag (~== TagClose "ul"))
+ htmlTag (~== TagOpen ("li" :: Text) [])
+ <|> htmlTag (~== TagClose ("ol" :: Text))
+ <|> htmlTag (~== TagClose ("ul" :: Text)))
else htmlTag (~== TagClose tag)
- if '/' `elem` raw -- self-closing tag
+ if T.any (== '/') raw -- self-closing tag
then return mempty
else mconcat <$> manyTill block closer
-charsInTags :: PandocMonad m => String -> MWParser m [Char]
-charsInTags tag = try $ do
+textInTags :: PandocMonad m => Text -> MWParser m Text
+textInTags tag = try $ do
(_,raw) <- htmlTag (~== TagOpen tag [])
- if '/' `elem` raw -- self-closing tag
+ if T.any (== '/') raw -- self-closing tag
then return ""
- else manyTill anyChar (htmlTag (~== TagClose tag))
+ else T.pack <$> manyTill anyChar (htmlTag (~== TagClose tag))
--
-- main parser
@@ -192,7 +194,7 @@ block = do
<|> blockTag
<|> (B.rawBlock "mediawiki" <$> template)
<|> para
- trace (take 60 $ show $ B.toList res)
+ trace (T.take 60 $ tshow $ B.toList res)
return res
para :: PandocMonad m => MWParser m Blocks
@@ -234,16 +236,16 @@ table = do
else (replicate cols mempty, hdr:rows')
return $ B.table caption cellspecs headers rows
-parseAttrs :: PandocMonad m => MWParser m [(String,String)]
+parseAttrs :: PandocMonad m => MWParser m [(Text,Text)]
parseAttrs = many1 parseAttr
-parseAttr :: PandocMonad m => MWParser m (String, String)
+parseAttr :: PandocMonad m => MWParser m (Text, Text)
parseAttr = try $ do
skipMany spaceChar
- k <- many1 letter
+ k <- many1Char letter
char '='
- v <- (char '"' >> many1Till (satisfy (/='\n')) (char '"'))
- <|> many1 (satisfy $ \c -> not (isSpace c) && c /= '|')
+ v <- (char '"' >> many1TillChar (satisfy (/='\n')) (char '"'))
+ <|> many1Char (satisfy $ \c -> not (isSpace c) && c /= '|')
return (k,v)
tableStart :: PandocMonad m => MWParser m ()
@@ -293,8 +295,8 @@ tableCell = try $ do
notFollowedBy (char '|')
skipMany spaceChar
pos' <- getPosition
- ls <- concat <$> many (notFollowedBy (cellsep <|> rowsep <|> tableEnd) *>
- ((snd <$> withRaw table) <|> count 1 anyChar))
+ ls <- T.concat <$> many (notFollowedBy (cellsep <|> rowsep <|> tableEnd) *>
+ ((snd <$> withRaw table) <|> countChar 1 anyChar))
bs <- parseFromString (do setPosition pos'
mconcat <$> many block) ls
let align = case lookup "align" attrs of
@@ -307,48 +309,49 @@ tableCell = try $ do
Nothing -> 0.0
return ((align, width), bs)
-parseWidth :: String -> Maybe Double
+parseWidth :: Text -> Maybe Double
parseWidth s =
- case reverse s of
- ('%':ds) | all isDigit ds -> safeRead ('0':'.':reverse ds)
- _ -> Nothing
+ case T.unsnoc s of
+ Just (ds, '%') | T.all isDigit ds -> safeRead $ "0." <> ds
+ _ -> Nothing
-template :: PandocMonad m => MWParser m String
+template :: PandocMonad m => MWParser m Text
template = try $ do
string "{{"
notFollowedBy (char '{')
lookAhead $ letter <|> digit <|> char ':'
- let chunk = template <|> variable <|> many1 (noneOf "{}") <|> count 1 anyChar
+ let chunk = template <|> variable <|> many1Char (noneOf "{}") <|> countChar 1 anyChar
contents <- manyTill chunk (try $ string "}}")
- return $ "{{" ++ concat contents ++ "}}"
+ return $ "{{" <> T.concat contents <> "}}"
blockTag :: PandocMonad m => MWParser m Blocks
blockTag = do
(tag, _) <- lookAhead $ htmlTag isBlockTag'
case tag of
TagOpen "blockquote" _ -> B.blockQuote <$> blocksInTags "blockquote"
- TagOpen "pre" _ -> B.codeBlock . trimCode <$> charsInTags "pre"
+ TagOpen "pre" _ -> B.codeBlock . trimCode <$> textInTags "pre"
TagOpen "syntaxhighlight" attrs -> syntaxhighlight "syntaxhighlight" attrs
TagOpen "source" attrs -> syntaxhighlight "source" attrs
TagOpen "haskell" _ -> B.codeBlockWith ("",["haskell"],[]) . trimCode <$>
- charsInTags "haskell"
+ textInTags "haskell"
TagOpen "gallery" _ -> blocksInTags "gallery"
TagOpen "p" _ -> mempty <$ htmlTag (~== tag)
TagClose "p" -> mempty <$ htmlTag (~== tag)
_ -> B.rawBlock "html" . snd <$> htmlTag (~== tag)
-trimCode :: String -> String
-trimCode ('\n':xs) = stripTrailingNewlines xs
-trimCode xs = stripTrailingNewlines xs
+trimCode :: Text -> Text
+trimCode t = case T.uncons t of
+ Just ('\n', xs) -> stripTrailingNewlines xs
+ _ -> stripTrailingNewlines t
-syntaxhighlight :: PandocMonad m => String -> [Attribute String] -> MWParser m Blocks
+syntaxhighlight :: PandocMonad m => Text -> [Attribute Text] -> MWParser m Blocks
syntaxhighlight tag attrs = try $ do
let mblang = lookup "lang" attrs
let mbstart = lookup "start" attrs
let mbline = lookup "line" attrs
let classes = maybeToList mblang ++ maybe [] (const ["numberLines"]) mbline
let kvs = maybe [] (\x -> [("startFrom",x)]) mbstart
- contents <- charsInTags tag
+ contents <- textInTags tag
return $ B.codeBlockWith ("",classes,kvs) $ trimCode contents
hrule :: PandocMonad m => MWParser m Blocks
@@ -362,17 +365,17 @@ preformatted = try $ do
guardColumnOne
char ' '
let endline' = B.linebreak <$ try (newline <* char ' ')
- let whitespace' = B.str <$> many1 ('\160' <$ spaceChar)
+ let whitespace' = B.str <$> many1Char ('\160' <$ spaceChar)
let spToNbsp ' ' = '\160'
spToNbsp x = x
let nowiki' = mconcat . intersperse B.linebreak . map B.str .
- lines . fromEntities . map spToNbsp <$> try
- (htmlTag (~== TagOpen "nowiki" []) *>
- manyTill anyChar (htmlTag (~== TagClose "nowiki")))
+ T.lines . fromEntities . T.map spToNbsp <$> try
+ (htmlTag (~== TagOpen ("nowiki" :: Text) []) *>
+ manyTillChar anyChar (htmlTag (~== TagClose ("nowiki" :: Text))))
let inline' = whitespace' <|> endline' <|> nowiki'
<|> try (notFollowedBy newline *> inline)
contents <- mconcat <$> many1 inline'
- let spacesStr (Str xs) = all isSpace xs
+ let spacesStr (Str xs) = T.all isSpace xs
spacesStr _ = False
if F.all spacesStr contents
then return mempty
@@ -385,7 +388,7 @@ encode = B.fromList . normalizeCode . B.toList . walk strToCode
strToCode x = x
normalizeCode [] = []
normalizeCode (Code a1 x : Code a2 y : zs) | a1 == a2 =
- normalizeCode $ Code a1 (x ++ y) : zs
+ normalizeCode $ Code a1 (x <> y) : zs
normalizeCode (x:xs) = x : normalizeCode xs
header :: PandocMonad m => MWParser m Blocks
@@ -400,22 +403,22 @@ header = try $ do
-- See #4731:
modifyIdentifier :: Attr -> Attr
modifyIdentifier (ident,cl,kv) = (ident',cl,kv)
- where ident' = map (\c -> if c == '-' then '_' else c) ident
+ where ident' = T.map (\c -> if c == '-' then '_' else c) ident
bulletList :: PandocMonad m => MWParser m Blocks
bulletList = B.bulletList <$>
( many1 (listItem '*')
- <|> (htmlTag (~== TagOpen "ul" []) *> spaces *> many (listItem '*' <|> li) <*
- optional (htmlTag (~== TagClose "ul"))) )
+ <|> (htmlTag (~== TagOpen ("ul" :: Text) []) *> spaces *> many (listItem '*' <|> li) <*
+ optional (htmlTag (~== TagClose ("ul" :: Text)))) )
orderedList :: PandocMonad m => MWParser m Blocks
orderedList =
(B.orderedList <$> many1 (listItem '#'))
<|> try
- (do (tag,_) <- htmlTag (~== TagOpen "ol" [])
+ (do (tag,_) <- htmlTag (~== TagOpen ("ol" :: Text) [])
spaces
items <- many (listItem '#' <|> li)
- optional (htmlTag (~== TagClose "ol"))
+ optional (htmlTag (~== TagClose ("ol" :: Text)))
let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag
return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items)
@@ -452,7 +455,7 @@ anyListStart :: PandocMonad m => MWParser m Char
anyListStart = guardColumnOne >> oneOf "*#:;"
li :: PandocMonad m => MWParser m Blocks
-li = lookAhead (htmlTag (~== TagOpen "li" [])) *>
+li = lookAhead (htmlTag (~== TagOpen ("li" :: Text) [])) *>
(firstParaToPlain <$> blocksInTags "li") <* spaces
listItem :: PandocMonad m => Char -> MWParser m Blocks
@@ -464,13 +467,13 @@ listItem c = try $ do
else do
skipMany spaceChar
pos' <- getPosition
- first <- concat <$> manyTill listChunk newline
+ first <- T.concat <$> manyTill listChunk newline
rest <- many
(try $ string extras *> lookAhead listStartChar *>
- (concat <$> manyTill listChunk newline))
+ (T.concat <$> manyTill listChunk newline))
contents <- parseFromString (do setPosition pos'
many1 $ listItem' c)
- (unlines (first : rest))
+ (T.unlines (first : rest))
case c of
'*' -> return $ B.bulletList contents
'#' -> return $ B.orderedList contents
@@ -484,20 +487,20 @@ listItem c = try $ do
-- }}
-- * next list item
-- which seems to be valid mediawiki.
-listChunk :: PandocMonad m => MWParser m String
-listChunk = template <|> count 1 anyChar
+listChunk :: PandocMonad m => MWParser m Text
+listChunk = template <|> countChar 1 anyChar
listItem' :: PandocMonad m => Char -> MWParser m Blocks
listItem' c = try $ do
listStart c
skipMany spaceChar
pos' <- getPosition
- first <- concat <$> manyTill listChunk newline
+ first <- T.concat <$> manyTill listChunk newline
rest <- many (try $ char c *> lookAhead listStartChar *>
- (concat <$> manyTill listChunk newline))
+ (T.concat <$> manyTill listChunk newline))
parseFromString (do setPosition pos'
firstParaToPlain . mconcat <$> many1 block)
- $ unlines $ first : rest
+ $ T.unlines $ first : rest
firstParaToPlain :: Blocks -> Blocks
firstParaToPlain contents =
@@ -528,23 +531,23 @@ inline = whitespace
<|> special
str :: PandocMonad m => MWParser m Inlines
-str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars)
+str = B.str <$> many1Char (noneOf $ specialChars ++ spaceChars)
math :: PandocMonad m => MWParser m Inlines
-math = (B.displayMath . trim <$> try (many1 (char ':') >> charsInTags "math"))
- <|> (B.math . trim <$> charsInTags "math")
- <|> (B.displayMath . trim <$> try (dmStart *> manyTill anyChar dmEnd))
- <|> (B.math . trim <$> try (mStart *> manyTill (satisfy (/='\n')) mEnd))
+math = (B.displayMath . trim <$> try (many1 (char ':') >> textInTags "math"))
+ <|> (B.math . trim <$> textInTags "math")
+ <|> (B.displayMath . trim <$> try (dmStart *> manyTillChar anyChar dmEnd))
+ <|> (B.math . trim <$> try (mStart *> manyTillChar (satisfy (/='\n')) mEnd))
where dmStart = string "\\["
dmEnd = try (string "\\]")
mStart = string "\\("
mEnd = try (string "\\)")
-variable :: PandocMonad m => MWParser m String
+variable :: PandocMonad m => MWParser m Text
variable = try $ do
string "{{{"
- contents <- manyTill anyChar (try $ string "}}}")
- return $ "{{{" ++ contents ++ "}}}"
+ contents <- manyTillChar anyChar (try $ string "}}}")
+ return $ "{{{" <> contents <> "}}}"
inlineTag :: PandocMonad m => MWParser m Inlines
inlineTag = do
@@ -553,11 +556,11 @@ inlineTag = do
TagOpen "ref" _ -> B.note . B.plain <$> inlinesInTags "ref"
TagOpen "nowiki" _ -> try $ do
(_,raw) <- htmlTag (~== tag)
- if '/' `elem` raw
+ if T.any (== '/') raw
then return mempty
else B.text . fromEntities <$>
- manyTill anyChar (htmlTag (~== TagClose "nowiki"))
- TagOpen "br" _ -> B.linebreak <$ (htmlTag (~== TagOpen "br" []) -- will get /> too
+ manyTillChar anyChar (htmlTag (~== TagClose ("nowiki" :: Text)))
+ TagOpen "br" _ -> B.linebreak <$ (htmlTag (~== TagOpen ("br" :: Text) []) -- will get /> too
*> optional blankline)
TagOpen "strike" _ -> B.strikeout <$> inlinesInTags "strike"
TagOpen "del" _ -> B.strikeout <$> inlinesInTags "del"
@@ -570,12 +573,12 @@ inlineTag = do
result <- encode <$> inlinesInTags "tt"
updateState $ \st -> st{ mwInTT = inTT }
return result
- TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask"
+ TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> textInTags "hask"
_ -> B.rawInline "html" . snd <$> htmlTag (~== tag)
special :: PandocMonad m => MWParser m Inlines
-special = B.str <$> count 1 (notFollowedBy' (htmlTag isBlockTag') *>
- oneOf specialChars)
+special = B.str <$> countChar 1 (notFollowedBy' (htmlTag isBlockTag') *>
+ oneOf specialChars)
inlineHtml :: PandocMonad m => MWParser m Inlines
inlineHtml = B.rawInline "html" . snd <$> htmlTag isInlineTag'
@@ -594,7 +597,7 @@ endline = () <$ try (newline <*
notFollowedBy anyListStart)
imageIdentifiers :: PandocMonad m => [MWParser m ()]
-imageIdentifiers = [sym (identifier ++ ":") | identifier <- identifiers]
+imageIdentifiers = [sym (identifier <> ":") | identifier <- identifiers]
where identifiers = ["File", "Image", "Archivo", "Datei", "Fichier",
"Bild"]
@@ -602,9 +605,9 @@ image :: PandocMonad m => MWParser m Inlines
image = try $ do
sym "[["
choice imageIdentifiers
- fname <- addUnderscores <$> many1 (noneOf "|]")
+ fname <- addUnderscores <$> many1Char (noneOf "|]")
_ <- many imageOption
- dims <- try (char '|' *> sepBy (many digit) (char 'x') <* string "px")
+ dims <- try (char '|' *> sepBy (manyChar digit) (char 'x') <* string "px")
<|> return []
_ <- many imageOption
let kvs = case dims of
@@ -614,9 +617,9 @@ image = try $ do
let attr = ("", [], kvs)
caption <- (B.str fname <$ sym "]]")
<|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]")))
- return $ B.imageWith attr fname ("fig:" ++ stringify caption) caption
+ return $ B.imageWith attr fname ("fig:" <> stringify caption) caption
-imageOption :: PandocMonad m => MWParser m String
+imageOption :: PandocMonad m => MWParser m Text
imageOption = try $ char '|' *> opt
where
opt = try (oneOfStrings [ "border", "thumbnail", "frameless"
@@ -624,30 +627,27 @@ imageOption = try $ char '|' *> opt
, "center", "none", "baseline", "sub"
, "super", "top", "text-top", "middle"
, "bottom", "text-bottom" ])
- <|> try (string "frame")
+ <|> try (textStr "frame")
<|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]"))
-collapseUnderscores :: String -> String
-collapseUnderscores [] = []
-collapseUnderscores ('_':'_':xs) = collapseUnderscores ('_':xs)
-collapseUnderscores (x:xs) = x : collapseUnderscores xs
-
-addUnderscores :: String -> String
-addUnderscores = collapseUnderscores . intercalate "_" . words
+addUnderscores :: Text -> Text
+addUnderscores = T.intercalate "_" . splitTextBy sep
+ where
+ sep c = isSpace c || c == '_'
internalLink :: PandocMonad m => MWParser m Inlines
internalLink = try $ do
sym "[["
- pagename <- unwords . words <$> many (noneOf "|]")
+ pagename <- T.unwords . T.words <$> manyChar (noneOf "|]")
label <- option (B.text pagename) $ char '|' *>
( (mconcat <$> many1 (notFollowedBy (char ']') *> inline))
-- the "pipe trick"
-- [[Help:Contents|] -> "Contents"
- <|> return (B.text $ drop 1 $ dropWhile (/=':') pagename) )
+ <|> return (B.text $ T.drop 1 $ T.dropWhile (/=':') pagename) )
sym "]]"
- linktrail <- B.text <$> many letter
+ linktrail <- B.text <$> manyChar letter
let link = B.link (addUnderscores pagename) "wikilink" (label <> linktrail)
- if "Category:" `isPrefixOf` pagename
+ if "Category:" `T.isPrefixOf` pagename
then do
updateState $ \st -> st{ mwCategoryLinks = link : mwCategoryLinks st }
return mempty
@@ -662,7 +662,7 @@ externalLink = try $ do
<|> do char ']'
num <- mwNextLinkNumber <$> getState
updateState $ \st -> st{ mwNextLinkNumber = num + 1 }
- return $ B.str $ show num
+ return $ B.str $ tshow num
return $ B.link src "" lab
url :: PandocMonad m => MWParser m Inlines
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs
index b8cbe2f26..4ade61294 100644
--- a/src/Text/Pandoc/Readers/Muse.hs
+++ b/src/Text/Pandoc/Readers/Muse.hs
@@ -1,6 +1,7 @@
{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Muse
Copyright : Copyright (C) 2017-2019 Alexander Krotov
@@ -24,12 +25,12 @@ import Control.Monad.Reader
import Control.Monad.Except (throwError)
import Data.Bifunctor
import Data.Default
-import Data.List (intercalate, transpose, uncons)
-import Data.List.Split (splitOn)
+import Data.List (transpose, uncons)
import qualified Data.Map as M
import qualified Data.Set as Set
import Data.Maybe (fromMaybe, isNothing, maybeToList)
-import Data.Text (Text, unpack)
+import Data.Text (Text)
+import qualified Data.Text as T
import Text.Pandoc.Builder (Blocks, Inlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad (..))
@@ -38,7 +39,7 @@ import Text.Pandoc.Error (PandocError (PandocParsecError))
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (F)
-import Text.Pandoc.Shared (crFilter, trimr, underlineSpan)
+import Text.Pandoc.Shared (crFilter, trimr, underlineSpan, tshow)
-- | Read Muse from an input string and return a Pandoc document.
readMuse :: PandocMonad m
@@ -49,18 +50,18 @@ readMuse opts s = do
let input = crFilter s
res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } "source" input
case res of
- Left e -> throwError $ PandocParsecError (unpack input) e
+ Left e -> throwError $ PandocParsecError input e
Right d -> return d
type F = Future MuseState
data MuseState = MuseState { museMeta :: F Meta -- ^ Document metadata
, museOptions :: ReaderOptions
- , museIdentifierList :: Set.Set String
+ , museIdentifierList :: Set.Set Text
, museLastSpacePos :: Maybe SourcePos -- ^ Position after last space or newline parsed
, museLastStrPos :: Maybe SourcePos -- ^ Position after last str parsed
, museLogMessages :: [LogMessage]
- , museNotes :: M.Map String (SourcePos, F Blocks)
+ , museNotes :: M.Map Text (SourcePos, F Blocks)
}
instance Default MuseState where
@@ -116,22 +117,27 @@ parseMuse = do
-- * Utility functions
-- | Trim up to one newline from the beginning of the string.
-lchop :: String -> String
-lchop ('\n':xs) = xs
-lchop s = s
+lchop :: Text -> Text
+lchop s = case T.uncons s of
+ Just ('\n', xs) -> xs
+ _ -> s
-- | Trim up to one newline from the end of the string.
-rchop :: String -> String
-rchop = reverse . lchop . reverse
+rchop :: Text -> Text
+rchop s = case T.unsnoc s of
+ Just (xs, '\n') -> xs
+ _ -> s
-unindent :: String -> String
-unindent = rchop . intercalate "\n" . dropSpacePrefix . splitOn "\n" . lchop
+unindent :: Text -> Text
+unindent = rchop . T.intercalate "\n" . dropSpacePrefix . T.splitOn "\n" . lchop
-dropSpacePrefix :: [String] -> [String]
-dropSpacePrefix lns = drop maxIndent <$> lns
+dropSpacePrefix :: [Text] -> [Text]
+dropSpacePrefix lns = T.drop maxIndent <$> lns
where isSpaceChar c = c == ' ' || c == '\t'
- maxIndent = length $ takeWhile (isSpaceChar . head) $ takeWhile same $ transpose lns
- same = and . (zipWith (==) <*> drop 1)
+ maxIndent = length $ takeWhile (isSpaceChar . T.head) $ takeWhile same $ T.transpose lns
+ same t = case T.uncons t of
+ Just (c, cs) -> T.all (== c) cs
+ Nothing -> True
atStart :: PandocMonad m => MuseParser m ()
atStart = do
@@ -160,29 +166,29 @@ getIndent = subtract 1 . sourceColumn <$ many spaceChar <*> getPosition
-- ** HTML parsers
-openTag :: PandocMonad m => String -> MuseParser m [(String, String)]
+openTag :: PandocMonad m => Text -> MuseParser m [(Text, Text)]
openTag tag = try $
- char '<' *> string tag *> manyTill attr (char '>')
+ char '<' *> textStr tag *> manyTill attr (char '>')
where
attr = try $ (,)
<$ many1 spaceChar
- <*> many1 (noneOf "=\n")
+ <*> many1Char (noneOf "=\n")
<* string "=\""
- <*> manyTill (noneOf "\"") (char '"')
+ <*> manyTillChar (noneOf "\"") (char '"')
-closeTag :: PandocMonad m => String -> MuseParser m ()
-closeTag tag = try $ string "</" *> string tag *> void (char '>')
+closeTag :: PandocMonad m => Text -> MuseParser m ()
+closeTag tag = try $ string "</" *> textStr tag *> void (char '>')
-- | Convert HTML attributes to Pandoc 'Attr'
-htmlAttrToPandoc :: [(String, String)] -> Attr
+htmlAttrToPandoc :: [(Text, Text)] -> Attr
htmlAttrToPandoc attrs = (ident, classes, keyvals)
where
ident = fromMaybe "" $ lookup "id" attrs
- classes = maybe [] words $ lookup "class" attrs
+ classes = maybe [] T.words $ lookup "class" attrs
keyvals = [(k,v) | (k,v) <- attrs, k /= "id", k /= "class"]
parseHtmlContent :: PandocMonad m
- => String -- ^ Tag name
+ => Text -- ^ Tag name
-> MuseParser m (Attr, F Blocks)
parseHtmlContent tag = try $ getIndent >>= \indent -> (,)
<$> fmap htmlAttrToPandoc (openTag tag)
@@ -193,16 +199,16 @@ parseHtmlContent tag = try $ getIndent >>= \indent -> (,)
-- ** Directive parsers
-- While not documented, Emacs Muse allows "-" in directive name
-parseDirectiveKey :: PandocMonad m => MuseParser m String
-parseDirectiveKey = char '#' *> many (letter <|> char '-')
+parseDirectiveKey :: PandocMonad m => MuseParser m Text
+parseDirectiveKey = char '#' *> manyChar (letter <|> char '-')
-parseEmacsDirective :: PandocMonad m => MuseParser m (String, F Inlines)
+parseEmacsDirective :: PandocMonad m => MuseParser m (Text, F Inlines)
parseEmacsDirective = (,)
<$> parseDirectiveKey
<* spaceChar
<*> (trimInlinesF . mconcat <$> manyTill inline' eol)
-parseAmuseDirective :: PandocMonad m => MuseParser m (String, F Inlines)
+parseAmuseDirective :: PandocMonad m => MuseParser m (Text, F Inlines)
parseAmuseDirective = (,)
<$> parseDirectiveKey
<* many1 spaceChar
@@ -289,7 +295,7 @@ listItemContentsUntil col pre end = p
parseBlock :: PandocMonad m => MuseParser m (F Blocks)
parseBlock = do
res <- blockElements <|> para
- trace (take 60 $ show $ B.toList $ runF res def)
+ trace (T.take 60 $ tshow $ B.toList $ runF res def)
return res
where para = fst <$> paraUntil (try (eof <|> void (lookAhead blockElements)))
@@ -337,7 +343,7 @@ pagebreak = try $ pure (B.divWith ("", [], [("style", "page-break-before: always
<* string "* * * * *"
<* manyTill spaceChar eol
-headingStart :: PandocMonad m => MuseParser m (String, Int)
+headingStart :: PandocMonad m => MuseParser m (Text, Int)
headingStart = try $ (,)
<$> option "" (try (parseAnchor <* manyTill spaceChar eol))
<* firstColumn
@@ -371,14 +377,14 @@ example :: PandocMonad m => MuseParser m (F Blocks)
example = try $ pure . B.codeBlock
<$ string "{{{"
<* many spaceChar
- <*> (unindent <$> manyTill anyChar (string "}}}"))
+ <*> (unindent <$> manyTillChar anyChar (string "}}}"))
-- | Parse an @\<example>@ tag.
exampleTag :: PandocMonad m => MuseParser m (F Blocks)
exampleTag = try $ fmap pure $ B.codeBlockWith
<$ many spaceChar
<*> (htmlAttrToPandoc <$> openTag "example")
- <*> (unindent <$> manyTill anyChar (closeTag "example"))
+ <*> (unindent <$> manyTillChar anyChar (closeTag "example"))
<* manyTill spaceChar eol
-- | Parse a @\<literal>@ tag as a raw block.
@@ -388,7 +394,7 @@ literalTag = try $ fmap pure $ B.rawBlock
<$ many spaceChar
<*> (fromMaybe "html" . lookup "style" <$> openTag "literal") -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML
<* manyTill spaceChar eol
- <*> (unindent <$> manyTill anyChar (closeTag "literal"))
+ <*> (unindent <$> manyTillChar anyChar (closeTag "literal"))
<* manyTill spaceChar eol
-- | Parse @\<center>@ tag.
@@ -428,7 +434,7 @@ playTag = do
verseLine :: PandocMonad m => MuseParser m (F Inlines)
verseLine = (<>)
- <$> fmap pure (option mempty (B.str <$> many1 ('\160' <$ char ' ')))
+ <$> fmap pure (option mempty (B.str <$> many1Char ('\160' <$ char ' ')))
<*> fmap (trimInlinesF . mconcat) (manyTill inline' eol)
-- | Parse @\<verse>@ tag.
@@ -466,17 +472,17 @@ paraUntil end = do
noteMarker' :: PandocMonad m
=> Char
-> Char
- -> MuseParser m String
-noteMarker' l r = try $ (\x y -> l:x:y ++ [r])
+ -> MuseParser m Text
+noteMarker' l r = try $ (\x y -> T.pack $ l:x:y ++ [r])
<$ char l
<*> oneOf "123456789"
<*> manyTill digit (char r)
-noteMarker :: PandocMonad m => MuseParser m String
+noteMarker :: PandocMonad m => MuseParser m Text
noteMarker = noteMarker' '[' ']' <|> noteMarker' '{' '}'
addNote :: PandocMonad m
- => String
+ => Text
-> SourcePos
-> F Blocks
-> MuseParser m ()
@@ -674,15 +680,15 @@ museGridTableRow :: PandocMonad m
-> MuseParser m (F [Blocks])
museGridTableRow indent indices = try $ do
lns <- many1 $ try (indentWith indent *> museGridTableRawLine indices)
- let cols = map (unlines . map trimr) $ transpose lns
+ let cols = map (T.unlines . map trimr) $ transpose lns
indentWith indent *> museGridTableHeader
sequence <$> mapM (parseFromString' parseBlocks) cols
museGridTableRawLine :: PandocMonad m
=> [Int]
- -> MuseParser m [String]
+ -> MuseParser m [Text]
museGridTableRawLine indices =
- char '|' *> forM indices (\n -> count n anyChar <* char '|') <* manyTill spaceChar eol
+ char '|' *> forM indices (\n -> countChar n anyChar <* char '|') <* manyTill spaceChar eol
museGridTable :: PandocMonad m => MuseParser m (F Blocks)
museGridTable = try $ do
@@ -767,12 +773,12 @@ inline = endline <|> inline'
endline :: PandocMonad m => MuseParser m (F Inlines)
endline = try $ pure B.softbreak <$ newline <* notFollowedBy blankline <* updateLastSpacePos
-parseAnchor :: PandocMonad m => MuseParser m String
-parseAnchor = try $ (:)
+parseAnchor :: PandocMonad m => MuseParser m Text
+parseAnchor = try $ T.cons
<$ firstColumn
<* char '#'
<*> letter
- <*> many (letter <|> digit <|> char '-')
+ <*> manyChar (letter <|> digit <|> char '-')
anchor :: PandocMonad m => MuseParser m (F Inlines)
anchor = try $ do
@@ -813,7 +819,7 @@ emphasisBetween p = try $ trimInlinesF . mconcat
-- | Parse an inline tag, such as @\<em>@ and @\<strong>@.
inlineTag :: PandocMonad m
- => String -- ^ Tag name
+ => Text -- ^ Tag name
-> MuseParser m (F Inlines)
inlineTag tag = try $ mconcat
<$ openTag tag
@@ -862,12 +868,12 @@ strikeoutTag = fmap B.strikeout <$> inlineTag "del"
verbatimTag :: PandocMonad m => MuseParser m (F Inlines)
verbatimTag = return . B.text
<$ openTag "verbatim"
- <*> manyTill anyChar (closeTag "verbatim")
+ <*> manyTillChar anyChar (closeTag "verbatim")
-- | Parse @\<class>@ tag.
classTag :: PandocMonad m => MuseParser m (F Inlines)
classTag = do
- classes <- maybe [] words . lookup "name" <$> openTag "class"
+ classes <- maybe [] T.words . lookup "name" <$> openTag "class"
fmap (B.spanWith ("", classes, [])) . mconcat <$> manyTill inline (closeTag "class")
-- | Parse @\<\<\<RTL>>>@ text.
@@ -886,43 +892,43 @@ nbsp = try $ pure (B.str "\160") <$ string "~~"
-- | Parse code markup, indicated by @\'=\'@ characters.
code :: PandocMonad m => MuseParser m (F Inlines)
-code = try $ fmap pure $ B.code . uncurry (++)
+code = try $ fmap pure $ B.code . uncurry (<>)
<$ atStart
<* char '='
<* notFollowedBy (spaceChar <|> newline)
- <*> manyUntil (noneOf "\n\r" <|> (newline <* notFollowedBy newline)) (try $ fmap pure $ noneOf " \t\n\r=" <* char '=')
+ <*> manyUntilChar (noneOf "\n\r" <|> (newline <* notFollowedBy newline)) (try $ fmap T.singleton $ noneOf " \t\n\r=" <* char '=')
<* notFollowedBy alphaNum
-- | Parse @\<code>@ tag.
codeTag :: PandocMonad m => MuseParser m (F Inlines)
codeTag = fmap pure $ B.codeWith
<$> (htmlAttrToPandoc <$> openTag "code")
- <*> manyTill anyChar (closeTag "code")
+ <*> manyTillChar anyChar (closeTag "code")
-- | Parse @\<math>@ tag.
-- @\<math>@ tag is an Emacs Muse extension enabled by @(require 'muse-latex2png)@
mathTag :: PandocMonad m => MuseParser m (F Inlines)
mathTag = return . B.math
<$ openTag "math"
- <*> manyTill anyChar (closeTag "math")
+ <*> manyTillChar anyChar (closeTag "math")
-- | Parse inline @\<literal>@ tag as a raw inline.
inlineLiteralTag :: PandocMonad m => MuseParser m (F Inlines)
inlineLiteralTag = try $ fmap pure $ B.rawInline
<$> (fromMaybe "html" . lookup "style" <$> openTag "literal") -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML
- <*> manyTill anyChar (closeTag "literal")
+ <*> manyTillChar anyChar (closeTag "literal")
str :: PandocMonad m => MuseParser m (F Inlines)
-str = return . B.str <$> many1 alphaNum <* updateLastStrPos
+str = return . B.str <$> many1Char alphaNum <* updateLastStrPos
-- | Consume asterisks that were not used as emphasis opening.
-- This prevents series of asterisks from being split into
-- literal asterisk and emphasis opening.
asterisks :: PandocMonad m => MuseParser m (F Inlines)
-asterisks = pure . B.str <$> many1 (char '*')
+asterisks = pure . B.str <$> many1Char (char '*')
symbol :: PandocMonad m => MuseParser m (F Inlines)
-symbol = pure . B.str . pure <$> nonspaceChar
+symbol = pure . B.str . T.singleton <$> nonspaceChar
-- | Parse a link or image.
linkOrImage :: PandocMonad m => MuseParser m (F Inlines)
@@ -934,12 +940,12 @@ linkContent = trimInlinesF . mconcat
<*> manyTill inline (char ']')
-- | Parse a link starting with (possibly null) prefix
-link :: PandocMonad m => String -> MuseParser m (F Inlines)
+link :: PandocMonad m => Text -> MuseParser m (F Inlines)
link prefix = try $ do
inLink <- asks museInLink
guard $ not inLink
- string $ "[[" ++ prefix
- url <- manyTill anyChar $ char ']'
+ textStr $ "[[" <> prefix
+ url <- manyTillChar anyChar $ char ']'
content <- option (pure $ B.str url) (local (\s -> s { museInLink = True }) linkContent)
char ']'
return $ B.link url "" <$> content
@@ -947,27 +953,27 @@ link prefix = try $ do
image :: PandocMonad m => MuseParser m (F Inlines)
image = try $ do
string "[["
- (url, (ext, width, align)) <- manyUntil (noneOf "]") (imageExtensionAndOptions <* char ']')
+ (url, (ext, width, align)) <- manyUntilChar (noneOf "]") (imageExtensionAndOptions <* char ']')
content <- option mempty linkContent
char ']'
let widthAttr = case align of
- Just 'f' -> [("width", fromMaybe "100" width ++ "%"), ("height", "75%")]
- _ -> maybeToList (("width",) . (++ "%") <$> width)
+ Just 'f' -> [("width", fromMaybe "100" width <> "%"), ("height", "75%")]
+ _ -> maybeToList (("width",) . (<> "%") <$> width)
let alignClass = case align of
Just 'r' -> ["align-right"]
Just 'l' -> ["align-left"]
Just 'f' -> []
_ -> []
- return $ B.imageWith ("", alignClass, widthAttr) (url ++ ext) mempty <$> content
+ return $ B.imageWith ("", alignClass, widthAttr) (url <> ext) mempty <$> content
where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el
imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"]
- imageExtension = choice (try . string <$> imageExtensions)
+ imageExtension = choice (try . textStr <$> imageExtensions)
imageExtensionAndOptions = do
ext <- imageExtension
(width, align) <- option (Nothing, Nothing) imageAttrs
return (ext, width, align)
imageAttrs = (,)
<$ many1 spaceChar
- <*> optionMaybe (many1 digit)
+ <*> optionMaybe (many1Char digit)
<* many spaceChar
<*> optionMaybe (oneOf "rlf")
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
index 9e3c118d8..34d3c5e8f 100644
--- a/src/Text/Pandoc/Readers/Native.hs
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Native
Copyright : Copyright (C) 2011-2019 John MacFarlane
@@ -19,7 +20,7 @@ import Text.Pandoc.Options (ReaderOptions)
import Text.Pandoc.Shared (safeRead)
import Control.Monad.Except (throwError)
-import Data.Text (Text, unpack)
+import Data.Text (Text)
import Text.Pandoc.Class
import Text.Pandoc.Error
@@ -38,18 +39,18 @@ readNative :: PandocMonad m
-> Text -- ^ String to parse (assuming @'\n'@ line endings)
-> m Pandoc
readNative _ s =
- case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead (unpack s)) of
+ case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of
Right doc -> return doc
Left _ -> throwError $ PandocParseError "couldn't read native"
readBlocks :: Text -> Either PandocError [Block]
-readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead (unpack s))
+readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s)
readBlock :: Text -> Either PandocError Block
-readBlock s = maybe (Plain <$> readInlines s) Right (safeRead (unpack s))
+readBlock s = maybe (Plain <$> readInlines s) Right (safeRead s)
readInlines :: Text -> Either PandocError [Inline]
-readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead (unpack s))
+readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead s)
readInline :: Text -> Either PandocError Inline
-readInline s = maybe (Left . PandocParseError $ "Could not read: " ++ unpack s) Right (safeRead (unpack s))
+readInline s = maybe (Left . PandocParseError $ "Could not read: " <> s) Right (safeRead s)
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 2c3b0367f..5330b0238 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.OPML
Copyright : Copyright (C) 2013-2019 John MacFarlane
@@ -18,7 +19,8 @@ import Data.Char (toUpper)
import Data.Default
import Data.Generics
import Data.Maybe (fromMaybe)
-import Data.Text (Text, pack, unpack)
+import Data.Text (Text)
+import qualified Data.Text as T
import Text.HTML.TagSoup.Entity (lookupEntity)
import Text.Pandoc.Builder
import Text.Pandoc.Class (PandocMonad)
@@ -50,7 +52,7 @@ readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readOPML opts inp = do
(bs, st') <- runStateT
(mapM parseBlock $ normalizeTree $
- parseXML (unpack (crFilter inp))) def{ opmlOptions = opts }
+ parseXML (T.unpack (crFilter inp))) def{ opmlOptions = opts }
return $
setTitle (opmlDocTitle st') $
setAuthors (opmlDocAuthors st') $
@@ -76,23 +78,26 @@ convertEntity :: String -> String
convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
-- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> String
+attrValue :: String -> Element -> Text
attrValue attr elt =
- fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
+ maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
+
+textContent :: Element -> Text
+textContent = T.pack . strContent
-- exceptT :: PandocMonad m => Either PandocError a -> OPML m a
-- exceptT = either throwError return
-asHtml :: PandocMonad m => String -> OPML m Inlines
+asHtml :: PandocMonad m => Text -> OPML m Inlines
asHtml s = do
opts <- gets opmlOptions
- Pandoc _ bs <- readHtml def{ readerExtensions = readerExtensions opts } (pack s)
+ Pandoc _ bs <- readHtml def{ readerExtensions = readerExtensions opts } s
return $ blocksToInlines' bs
-asMarkdown :: PandocMonad m => String -> OPML m Blocks
+asMarkdown :: PandocMonad m => Text -> OPML m Blocks
asMarkdown s = do
opts <- gets opmlOptions
- Pandoc _ bs <- readMarkdown def{ readerExtensions = readerExtensions opts } (pack s)
+ Pandoc _ bs <- readMarkdown def{ readerExtensions = readerExtensions opts } s
return $ fromList bs
getBlocks :: PandocMonad m => Element -> OPML m Blocks
@@ -102,11 +107,11 @@ parseBlock :: PandocMonad m => Content -> OPML m Blocks
parseBlock (Elem e) =
case qName (elName e) of
"ownerName" -> mempty <$ modify (\st ->
- st{opmlDocAuthors = [text $ strContent e]})
+ st{opmlDocAuthors = [text $ textContent e]})
"dateModified" -> mempty <$ modify (\st ->
- st{opmlDocDate = text $ strContent e})
+ st{opmlDocDate = text $ textContent e})
"title" -> mempty <$ modify (\st ->
- st{opmlDocTitle = text $ strContent e})
+ st{opmlDocTitle = text $ textContent e})
"outline" -> gets opmlSectionLevel >>= sect . (+1)
"?xml" -> return mempty
_ -> getBlocks e
@@ -115,7 +120,7 @@ parseBlock (Elem e) =
modify $ \st -> st{ opmlSectionLevel = n }
bs <- getBlocks e
modify $ \st -> st{ opmlSectionLevel = n - 1 }
- let headerText' = case map toUpper (attrValue "type" e) of
+ let headerText' = case T.toUpper (attrValue "type" e) of
"LINK" -> link
(attrValue "url" e) "" headerText
_ -> headerText
diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs
index dfa019932..f9b78d5bf 100644
--- a/src/Text/Pandoc/Readers/Odt.hs
+++ b/src/Text/Pandoc/Readers/Odt.hs
@@ -1,5 +1,6 @@
{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Reader.Odt
Copyright : Copyright (C) 2015 Martin Linnemann
diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
index d8e5ba272..ff8cdc5fa 100644
--- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
@@ -1,11 +1,12 @@
{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE Arrows #-}
-{-# LANGUAGE DeriveFoldable #-}
+{-# LANGUAGE Arrows #-}
+{-# LANGUAGE DeriveFoldable #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-{-# LANGUAGE PatternGuards #-}
-{-# LANGUAGE RecordWildCards #-}
-{-# LANGUAGE TupleSections #-}
-{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Odt.ContentReader
Copyright : Copyright (C) 2015 Martin Linnemann
@@ -29,8 +30,9 @@ import Control.Arrow
import qualified Data.ByteString.Lazy as B
import Data.Foldable (fold)
-import Data.List (find, intercalate, stripPrefix)
+import Data.List (find, stripPrefix)
import qualified Data.Map as M
+import qualified Data.Text as T
import Data.Maybe
import Data.Semigroup (First(..), Option(..))
@@ -59,7 +61,7 @@ import qualified Data.Set as Set
-- State
--------------------------------------------------------------------------------
-type Anchor = String
+type Anchor = T.Text
type Media = [(FilePath, B.ByteString)]
data ReaderState
@@ -204,21 +206,21 @@ updateMediaWithResource = keepingTheValue (
)
>>^ fst
-lookupResource :: OdtReaderSafe String (FilePath, B.ByteString)
+lookupResource :: OdtReaderSafe FilePath (FilePath, B.ByteString)
lookupResource = proc target -> do
state <- getExtraState -< ()
case lookup target (getMediaEnv state) of
Just bs -> returnV (target, bs) -<< ()
Nothing -> returnV ("", B.empty) -< ()
-type AnchorPrefix = String
+type AnchorPrefix = T.Text
-- | An adaptation of 'uniqueIdent' from "Text.Pandoc.Shared" that generates a
-- unique identifier but without assuming that the id should be for a header.
-- Second argument is a list of already used identifiers.
uniqueIdentFrom :: AnchorPrefix -> [Anchor] -> Anchor
uniqueIdentFrom baseIdent usedIdents =
- let numIdent n = baseIdent ++ "-" ++ show n
+ let numIdent n = baseIdent <> "-" <> T.pack (show n)
in if baseIdent `elem` usedIdents
then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of
Just x -> numIdent x
@@ -305,7 +307,7 @@ withNewStyle a = proc x -> do
isCodeStyle _ = False
inlineCode :: Inlines -> Inlines
- inlineCode = code . intercalate "" . map stringify . toList
+ inlineCode = code . T.concat . map stringify . toList
type PropertyTriple = (ReaderState, TextProperties, Maybe StyleFamily)
type InlineModifier = Inlines -> Inlines
@@ -535,7 +537,6 @@ matchChildContent :: (Monoid result)
-> OdtReaderSafe _x result
matchChildContent ls fallback = returnV mempty >>> matchContent ls fallback
-
--------------------------------------------
-- Matchers
--------------------------------------------
@@ -556,8 +557,8 @@ read_plain_text = fst ^&&& read_plain_text' >>% recover
)
>>?% mappend
--
- extractText :: XML.Content -> Fallible String
- extractText (XML.Text cData) = succeedWith (XML.cdData cData)
+ extractText :: XML.Content -> Fallible T.Text
+ extractText (XML.Text cData) = succeedWith (T.pack $ XML.cdData cData)
extractText _ = failEmpty
read_text_seq :: InlineMatcher
@@ -675,8 +676,8 @@ read_list_item = matchingElement NsText "list-item"
read_link :: InlineMatcher
read_link = matchingElement NsText "a"
$ liftA3 link
- ( findAttrWithDefault NsXLink "href" "" )
- ( findAttrWithDefault NsOffice "title" "" )
+ ( findAttrTextWithDefault NsXLink "href" "" )
+ ( findAttrTextWithDefault NsOffice "title" "" )
( matchChildContent [ read_span
, read_note
, read_citation
@@ -709,12 +710,12 @@ read_citation :: InlineMatcher
read_citation = matchingElement NsText "bibliography-mark"
$ liftA2 cite
( liftA2 makeCitation
- ( findAttrWithDefault NsText "identifier" "" )
+ ( findAttrTextWithDefault NsText "identifier" "" )
( readAttrWithDefault NsText "number" 0 )
)
( matchChildContent [] read_plain_text )
where
- makeCitation :: String -> Int -> [Citation]
+ makeCitation :: T.Text -> Int -> [Citation]
makeCitation citeId num = [Citation citeId [] [] NormalCitation num 0]
@@ -779,17 +780,17 @@ read_frame_img =
let exts = extensionsFromList [Ext_auto_identifiers]
resource <- lookupResource -< src'
_ <- updateMediaWithResource -< resource
- w <- findAttr' NsSVG "width" -< ()
- h <- findAttr' NsSVG "height" -< ()
+ w <- findAttrText' NsSVG "width" -< ()
+ h <- findAttrText' NsSVG "height" -< ()
titleNodes <- matchChildContent' [ read_frame_title ] -< ()
alt <- matchChildContent [] read_plain_text -< ()
arr (firstMatch . uncurry4 imageWith) -<
- (image_attributes w h, src', inlineListToIdentifier exts (toList titleNodes), alt)
+ (image_attributes w h, T.pack src', inlineListToIdentifier exts (toList titleNodes), alt)
read_frame_title :: InlineMatcher
read_frame_title = matchingElement NsSVG "title" (matchChildContent [] read_plain_text)
-image_attributes :: Maybe String -> Maybe String -> Attr
+image_attributes :: Maybe T.Text -> Maybe T.Text -> Attr
image_attributes x y =
( "", [], (dim "width" x) ++ (dim "height" y))
where
@@ -806,7 +807,7 @@ read_frame_mathml =
src' -> do
let path = fromMaybe src' (stripPrefix "./" src') ++ "/content.xml"
(_, mathml) <- lookupResource -< path
- case readMathML (UTF8.toString $ B.toStrict mathml) of
+ case readMathML (UTF8.toText $ B.toStrict mathml) of
Left _ -> returnV mempty -< ()
Right exps -> arr (firstMatch . displayMath . writeTeX) -< exps
@@ -817,9 +818,9 @@ read_frame_text_box = proc box -> do
read_img_with_caption :: [Block] -> FirstMatch Inlines
read_img_with_caption (Para [Image attr alt (src,title)] : _) =
- firstMatch $ singleton (Image attr alt (src, 'f':'i':'g':':':title)) -- no text, default caption
+ firstMatch $ singleton (Image attr alt (src, "fig:" <> title)) -- no text, default caption
read_img_with_caption (Para (Image attr _ (src,title) : txt) : _) =
- firstMatch $ singleton (Image attr txt (src, 'f':'i':'g':':':title) ) -- override caption with the text that follows
+ firstMatch $ singleton (Image attr txt (src, "fig:" <> title) ) -- override caption with the text that follows
read_img_with_caption ( Para (_ : xs) : ys) =
read_img_with_caption (Para xs : ys)
read_img_with_caption _ =
@@ -829,12 +830,12 @@ read_img_with_caption _ =
-- Internal links
----------------------
-_ANCHOR_PREFIX_ :: String
+_ANCHOR_PREFIX_ :: T.Text
_ANCHOR_PREFIX_ = "anchor"
--
readAnchorAttr :: OdtReader _x Anchor
-readAnchorAttr = findAttr NsText "name"
+readAnchorAttr = findAttrText NsText "name"
-- | Beware: may fail
findAnchorName :: OdtReader AnchorPrefix Anchor
@@ -875,7 +876,7 @@ read_reference_start = matchingElement NsText "reference-mark-start"
-- | Beware: may fail
findAnchorRef :: OdtReader _x Anchor
-findAnchorRef = ( findAttr NsText "ref-name"
+findAnchorRef = ( findAttrText NsText "ref-name"
>>?^ (_ANCHOR_PREFIX_,)
) >>?! getPrettyAnchor
@@ -890,7 +891,7 @@ maybeInAnchorRef = proc inlines -> do
Left _ -> returnA -< inlines
where
toAnchorRef :: Anchor -> Inlines -> Inlines
- toAnchorRef anchor = link ('#':anchor) "" -- no title
+ toAnchorRef anchor = link ("#" <> anchor) "" -- no title
--
read_bookmark_ref :: InlineMatcher
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
index ccbaf6fc4..59d1b8abd 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
@@ -38,8 +38,11 @@ module Text.Pandoc.Readers.Odt.Generic.XMLConverter
, lookupAttr'
, lookupDefaultingAttr
, findAttr'
+, findAttrText'
, findAttr
+, findAttrText
, findAttrWithDefault
+, findAttrTextWithDefault
, readAttr
, readAttr'
, readAttrWithDefault
@@ -59,6 +62,7 @@ import Control.Arrow
import Data.Either ( rights )
import qualified Data.Map as M
+import qualified Data.Text as T
import Data.Default
import Data.Maybe
@@ -79,6 +83,7 @@ import Text.Pandoc.Readers.Odt.Generic.Fallible
type ElementName = String
type AttributeName = String
type AttributeValue = String
+type TextAttributeValue = T.Text
--
type NameSpacePrefix = String
@@ -466,6 +471,16 @@ findAttr' nsID attrName = qualifyName nsID attrName
&&& getCurrentElement
>>% XML.findAttr
+-- | Return value as a (Maybe Text)
+findAttrText' :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> XMLConverter nsID extraState x (Maybe TextAttributeValue)
+findAttrText' nsID attrName
+ = qualifyName nsID attrName
+ &&& getCurrentElement
+ >>% XML.findAttr
+ >>^ fmap T.pack
+
-- | Return value as string or fail
findAttr :: (NameSpaceID nsID)
=> nsID -> AttributeName
@@ -473,6 +488,15 @@ findAttr :: (NameSpaceID nsID)
findAttr nsID attrName = findAttr' nsID attrName
>>> maybeToChoice
+-- | Return value as text or fail
+findAttrText :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> FallibleXMLConverter nsID extraState x TextAttributeValue
+findAttrText nsID attrName
+ = findAttr' nsID attrName
+ >>^ fmap T.pack
+ >>> maybeToChoice
+
-- | Return value as string or return provided default value
findAttrWithDefault :: (NameSpaceID nsID)
=> nsID -> AttributeName
@@ -482,6 +506,15 @@ findAttrWithDefault nsID attrName deflt
= findAttr' nsID attrName
>>^ fromMaybe deflt
+-- | Return value as string or return provided default value
+findAttrTextWithDefault :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> TextAttributeValue
+ -> XMLConverter nsID extraState x TextAttributeValue
+findAttrTextWithDefault nsID attrName deflt
+ = findAttr' nsID attrName
+ >>^ maybe deflt T.pack
+
-- | Read and return value or fail
readAttr :: (NameSpaceID nsID, Read attrValue)
=> nsID -> AttributeName
diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
index 79e8d7aea..99fa05880 100644
--- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
@@ -548,11 +548,11 @@ readListLevelStyle :: ListLevelType -> StyleReader _x (Int, ListLevelStyle)
readListLevelStyle levelType = readAttr NsText "level"
>>?! keepingTheValue
( liftA5 toListLevelStyle
- ( returnV levelType )
- ( findAttr' NsStyle "num-prefix" )
- ( findAttr' NsStyle "num-suffix" )
- ( getAttr NsStyle "num-format" )
- ( findAttr' NsText "start-value" )
+ ( returnV levelType )
+ ( findAttr' NsStyle "num-prefix" )
+ ( findAttr' NsStyle "num-suffix" )
+ ( getAttr NsStyle "num-format" )
+ ( findAttrText' NsText "start-value" )
)
where
toListLevelStyle _ p s LinfNone b = ListLevelStyle LltBullet p s LinfNone (startValue b)
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 2c88c7776..99ece152c 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org
Copyright : Copyright (C) 2014-2019 Albert Krewinkel
@@ -26,7 +27,6 @@ import Control.Monad.Except (throwError)
import Control.Monad.Reader (runReaderT)
import Data.Text (Text)
-import qualified Data.Text as T
-- | Parse org-mode string and return a Pandoc document.
readOrg :: PandocMonad m
@@ -36,7 +36,7 @@ readOrg :: PandocMonad m
readOrg opts s = do
parsed <- flip runReaderT def $
readWithM parseOrg (optionsToParserState opts)
- (T.unpack (crFilter s) ++ "\n\n")
+ (crFilter s <> "\n\n")
case parsed of
Right result -> return result
Left _ -> throwError $ PandocParseError "problem parsing org"
diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
index 58db4f46c..b4f3cc0d8 100644
--- a/src/Text/Pandoc/Readers/Org/BlockStarts.hs
+++ b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.BlockStarts
Copyright : Copyright (C) 2014-2019 Albert Krewinkel
@@ -25,6 +26,8 @@ module Text.Pandoc.Readers.Org.BlockStarts
import Prelude
import Control.Monad (void)
+import Data.Text (Text)
+import qualified Data.Text as T
import Text.Pandoc.Readers.Org.Parsing
-- | Horizontal Line (five -- dashes or more)
@@ -49,15 +52,15 @@ gridTableStart :: Monad m => OrgParser m ()
gridTableStart = try $ skipSpaces <* char '+' <* char '-'
-latexEnvStart :: Monad m => OrgParser m String
+latexEnvStart :: Monad m => OrgParser m Text
latexEnvStart = try $
skipSpaces *> string "\\begin{"
*> latexEnvName
<* string "}"
<* blankline
where
- latexEnvName :: Monad m => OrgParser m String
- latexEnvName = try $ mappend <$> many1 alphaNum <*> option "" (string "*")
+ latexEnvName :: Monad m => OrgParser m Text
+ latexEnvName = try $ mappend <$> many1Char alphaNum <*> option "" (textStr "*")
bulletListStart :: Monad m => OrgParser m Int
bulletListStart = try $ do
@@ -68,7 +71,7 @@ bulletListStart = try $ do
return (ind + 1)
genericListStart :: Monad m
- => OrgParser m String
+ => OrgParser m Text
-> OrgParser m Int
genericListStart listMarker = try $ do
ind <- length <$> many spaceChar
@@ -82,11 +85,11 @@ eol = void (char '\n')
orderedListStart :: Monad m => OrgParser m Int
orderedListStart = genericListStart orderedListMarker
-- Ordered list markers allowed in org-mode
- where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
+ where orderedListMarker = T.snoc <$> many1Char digit <*> oneOf ".)"
-drawerStart :: Monad m => OrgParser m String
+drawerStart :: Monad m => OrgParser m Text
drawerStart = try $ skipSpaces *> drawerName <* skipSpaces <* newline
- where drawerName = char ':' *> manyTill nonspaceChar (char ':')
+ where drawerName = char ':' *> manyTillChar nonspaceChar (char ':')
metaLineStart :: Monad m => OrgParser m ()
metaLineStart = try $ skipSpaces <* string "#+"
@@ -99,12 +102,12 @@ commentLineStart = try $
exampleLineStart :: Monad m => OrgParser m ()
exampleLineStart = () <$ try (skipSpaces *> string ": ")
-noteMarker :: Monad m => OrgParser m String
+noteMarker :: Monad m => OrgParser m Text
noteMarker = try $ do
char '['
- choice [ many1Till digit (char ']')
- , (++) <$> string "fn:"
- <*> many1Till (noneOf "\n\r\t ") (char ']')
+ choice [ many1TillChar digit (char ']')
+ , (<>) <$> textStr "fn:"
+ <*> many1TillChar (noneOf "\n\r\t ") (char ']')
]
-- | Succeeds if the parser is at the end of a block.
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index cba876f06..de51dec3d 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -1,6 +1,7 @@
{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.Blocks
Copyright : Copyright (C) 2014-2019 Albert Krewinkel
@@ -23,7 +24,7 @@ import Text.Pandoc.Readers.Org.Inlines
import Text.Pandoc.Readers.Org.Meta (metaExport, metaKey, metaLine)
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
-import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename,
+import Text.Pandoc.Readers.Org.Shared (cleanLinkText, isImageFilename,
originalLang, translateLang, exportsCode)
import Text.Pandoc.Builder (Blocks, Inlines)
@@ -33,11 +34,13 @@ import Text.Pandoc.Options
import Text.Pandoc.Shared (compactify, compactifyDL, safeRead)
import Control.Monad (foldM, guard, mzero, void)
-import Data.Char (isSpace, toLower, toUpper)
+import Data.Char (isSpace)
import Data.Default (Default)
-import Data.List (foldl', isPrefixOf)
+import Data.List (foldl')
import Data.Maybe (fromMaybe, isJust, isNothing)
+import Data.Text (Text)
+import qualified Data.Text as T
import qualified Text.Pandoc.Builder as B
import qualified Text.Pandoc.Walk as Walk
@@ -90,10 +93,10 @@ horizontalRule = return B.horizontalRule <$ try hline
-- | Attributes that may be added to figures (like a name or caption).
data BlockAttributes = BlockAttributes
- { blockAttrName :: Maybe String
- , blockAttrLabel :: Maybe String
+ { blockAttrName :: Maybe Text
+ , blockAttrLabel :: Maybe Text
, blockAttrCaption :: Maybe (F Inlines)
- , blockAttrKeyValues :: [(String, String)]
+ , blockAttrKeyValues :: [(Text, Text)]
}
-- | Convert BlockAttributes into pandoc Attr
@@ -103,14 +106,14 @@ attrFromBlockAttributes BlockAttributes{..} =
ident = fromMaybe mempty $ lookup "id" blockAttrKeyValues
classes = case lookup "class" blockAttrKeyValues of
Nothing -> []
- Just clsStr -> words clsStr
+ Just clsStr -> T.words clsStr
kv = filter ((`notElem` ["id", "class"]) . fst) blockAttrKeyValues
in (ident, classes, kv)
-stringyMetaAttribute :: Monad m => OrgParser m (String, String)
+stringyMetaAttribute :: Monad m => OrgParser m (Text, Text)
stringyMetaAttribute = try $ do
metaLineStart
- attrName <- map toUpper <$> many1Till nonspaceChar (char ':')
+ attrName <- T.toUpper <$> many1TillChar nonspaceChar (char ':')
skipSpaces
attrValue <- anyLine <|> ("" <$ newline)
return (attrName, attrValue)
@@ -129,8 +132,8 @@ blockAttributes = try $ do
let label = lookup "LABEL" kv
caption' <- case caption of
Nothing -> return Nothing
- Just s -> Just <$> parseFromString inlines (s ++ "\n")
- kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs
+ Just s -> Just <$> parseFromString inlines (s <> "\n")
+ kvAttrs' <- parseFromString keyValues . (<> "\n") $ fromMaybe mempty kvAttrs
return BlockAttributes
{ blockAttrName = name
, blockAttrLabel = label
@@ -138,31 +141,31 @@ blockAttributes = try $ do
, blockAttrKeyValues = kvAttrs'
}
where
- isBlockAttr :: String -> Bool
+ isBlockAttr :: Text -> Bool
isBlockAttr = flip elem
[ "NAME", "LABEL", "CAPTION"
, "ATTR_HTML", "ATTR_LATEX"
, "RESULTS"
]
- appendValues :: String -> Maybe String -> (String, String) -> Maybe String
+ appendValues :: Text -> Maybe Text -> (Text, Text) -> Maybe Text
appendValues attrName accValue (key, value) =
if key /= attrName
then accValue
else case accValue of
- Just acc -> Just $ acc ++ ' ':value
+ Just acc -> Just $ acc <> " " <> value
Nothing -> Just value
-- | Parse key-value pairs for HTML attributes
-keyValues :: Monad m => OrgParser m [(String, String)]
+keyValues :: Monad m => OrgParser m [(Text, Text)]
keyValues = try $
manyTill ((,) <$> key <*> value) newline
where
- key :: Monad m => OrgParser m String
- key = try $ skipSpaces *> char ':' *> many1 nonspaceChar
+ key :: Monad m => OrgParser m Text
+ key = try $ skipSpaces *> char ':' *> many1Char nonspaceChar
- value :: Monad m => OrgParser m String
- value = skipSpaces *> manyTill anyChar endOfValue
+ value :: Monad m => OrgParser m Text
+ value = skipSpaces *> manyTillChar anyChar endOfValue
endOfValue :: Monad m => OrgParser m ()
endOfValue =
@@ -180,7 +183,7 @@ orgBlock = try $ do
blockAttrs <- blockAttributes
blkType <- blockHeaderStart
($ blkType) $
- case map toLower blkType of
+ case T.toLower blkType of
"export" -> exportBlock
"comment" -> rawBlockLines (const mempty)
"html" -> rawBlockLines (return . B.rawBlock (lowercase blkType))
@@ -194,13 +197,13 @@ orgBlock = try $ do
let (ident, classes, kv) = attrFromBlockAttributes blockAttrs
in fmap $ B.divWith (ident, classes ++ [blkType], kv)
where
- blockHeaderStart :: Monad m => OrgParser m String
+ blockHeaderStart :: Monad m => OrgParser m Text
blockHeaderStart = try $ skipSpaces *> stringAnyCase "#+begin_" *> orgArgWord
- lowercase :: String -> String
- lowercase = map toLower
+ lowercase :: Text -> Text
+ lowercase = T.toLower
-exampleBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks)
+exampleBlock :: PandocMonad m => BlockAttributes -> Text -> OrgParser m (F Blocks)
exampleBlock blockAttrs _label = do
skipSpaces
(classes, kv) <- switchesAsAttributes
@@ -210,54 +213,54 @@ exampleBlock blockAttrs _label = do
let codeBlck = B.codeBlockWith (id', "example":classes, kv) content
return . return $ codeBlck
-rawBlockLines :: Monad m => (String -> F Blocks) -> String -> OrgParser m (F Blocks)
+rawBlockLines :: Monad m => (Text -> F Blocks) -> Text -> OrgParser m (F Blocks)
rawBlockLines f blockType = ignHeaders *> (f <$> rawBlockContent blockType)
-parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> String -> OrgParser m (F Blocks)
+parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> Text -> OrgParser m (F Blocks)
parseBlockLines f blockType = ignHeaders *> (f <$> parsedBlockContent)
where
parsedBlockContent :: PandocMonad m => OrgParser m (F Blocks)
parsedBlockContent = try $ do
raw <- rawBlockContent blockType
- parseFromString blocks (raw ++ "\n")
+ parseFromString blocks (raw <> "\n")
-- | Read the raw string content of a block
-rawBlockContent :: Monad m => String -> OrgParser m String
+rawBlockContent :: Monad m => Text -> OrgParser m Text
rawBlockContent blockType = try $ do
blkLines <- manyTill rawLine blockEnder
tabLen <- getOption readerTabStop
trimP <- orgStateTrimLeadBlkIndent <$> getState
- let stripIndent strs = if trimP then map (drop (shortestIndent strs)) strs else strs
- (unlines
+ let stripIndent strs = if trimP then map (T.drop (shortestIndent strs)) strs else strs
+ (T.unlines
. stripIndent
. map (tabsToSpaces tabLen . commaEscaped)
$ blkLines)
<$ updateState (\s -> s { orgStateTrimLeadBlkIndent = True })
where
- rawLine :: Monad m => OrgParser m String
+ rawLine :: Monad m => OrgParser m Text
rawLine = try $ ("" <$ blankline) <|> anyLine
blockEnder :: Monad m => OrgParser m ()
blockEnder = try $ skipSpaces <* stringAnyCase ("#+end_" <> blockType)
- shortestIndent :: [String] -> Int
- shortestIndent = foldr (min . length . takeWhile isSpace) maxBound
- . filter (not . null)
-
- tabsToSpaces :: Int -> String -> String
- tabsToSpaces _ [] = []
- tabsToSpaces tabLen cs'@(c:cs) =
- case c of
- ' ' -> ' ':tabsToSpaces tabLen cs
- '\t' -> replicate tabLen ' ' ++ tabsToSpaces tabLen cs
- _ -> cs'
-
- commaEscaped :: String -> String
- commaEscaped (',':cs@('*':_)) = cs
- commaEscaped (',':cs@('#':'+':_)) = cs
- commaEscaped (' ':cs) = ' ':commaEscaped cs
- commaEscaped ('\t':cs) = '\t':commaEscaped cs
- commaEscaped cs = cs
+ shortestIndent :: [Text] -> Int
+ shortestIndent = foldr (min . T.length . T.takeWhile isSpace) maxBound
+ . filter (not . T.null)
+
+ tabsToSpaces :: Int -> Text -> Text
+ tabsToSpaces tabStop t =
+ let (ind, suff) = T.span (\c -> c == ' ' || c == '\t') t
+ tabNum = T.length $ T.filter (== '\n') ind
+ spaceNum = T.length ind - tabNum
+ in T.replicate (spaceNum + tabStop * tabNum) " " <> suff
+
+ commaEscaped t =
+ let (ind, suff) = T.span (\c -> c == ' ' || c == '\t') t
+ in case T.uncons suff of
+ Just (',', cs)
+ | "*" <- T.take 1 cs -> ind <> cs
+ | "#+" <- T.take 2 cs -> ind <> cs
+ _ -> t
-- | Read but ignore all remaining block headers.
ignHeaders :: Monad m => OrgParser m ()
@@ -265,34 +268,34 @@ ignHeaders = (() <$ newline) <|> (() <$ anyLine)
-- | Read a block containing code intended for export in specific backends
-- only.
-exportBlock :: Monad m => String -> OrgParser m (F Blocks)
+exportBlock :: Monad m => Text -> OrgParser m (F Blocks)
exportBlock blockType = try $ do
exportType <- skipSpaces *> orgArgWord <* ignHeaders
contents <- rawBlockContent blockType
- returnF (B.rawBlock (map toLower exportType) contents)
+ returnF (B.rawBlock (T.toLower exportType) contents)
-verseBlock :: PandocMonad m => String -> OrgParser m (F Blocks)
+verseBlock :: PandocMonad m => Text -> OrgParser m (F Blocks)
verseBlock blockType = try $ do
ignHeaders
content <- rawBlockContent blockType
fmap B.lineBlock . sequence
- <$> mapM parseVerseLine (lines content)
+ <$> mapM parseVerseLine (T.lines content)
where
-- replace initial spaces with nonbreaking spaces to preserve
-- indentation, parse the rest as normal inline
- parseVerseLine :: PandocMonad m => String -> OrgParser m (F Inlines)
+ parseVerseLine :: PandocMonad m => Text -> OrgParser m (F Inlines)
parseVerseLine cs = do
- let (initialSpaces, indentedLine) = span isSpace cs
- let nbspIndent = if null initialSpaces
+ let (initialSpaces, indentedLine) = T.span isSpace cs
+ let nbspIndent = if T.null initialSpaces
then mempty
- else B.str $ map (const '\160') initialSpaces
- line <- parseFromString inlines (indentedLine ++ "\n")
+ else B.str $ T.map (const '\160') initialSpaces
+ line <- parseFromString inlines (indentedLine <> "\n")
return (trimInlinesF $ pure nbspIndent <> line)
-- | Read a code block and the associated results block if present. Which of
-- boths blocks is included in the output is determined using the "exports"
-- argument in the block header.
-codeBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks)
+codeBlock :: PandocMonad m => BlockAttributes -> Text -> OrgParser m (F Blocks)
codeBlock blockAttrs blockType = do
skipSpaces
(classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders)
@@ -314,7 +317,7 @@ codeBlock blockAttrs blockType = do
labelledBlock :: F Inlines -> F Blocks
labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
- exportsResults :: [(String, String)] -> Bool
+ exportsResults :: [(Text, Text)] -> Bool
exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports"
-- | Parse the result of an evaluated babel code block.
@@ -329,7 +332,7 @@ babelResultsBlock = try $ do
resultsMarker = try . void $ stringAnyCase "#+RESULTS:" *> blankline
-- | Parse code block arguments
-codeHeaderArgs :: Monad m => OrgParser m ([String], [(String, String)])
+codeHeaderArgs :: Monad m => OrgParser m ([Text], [(Text, Text)])
codeHeaderArgs = try $ do
language <- skipSpaces *> orgArgWord
(switchClasses, switchKv) <- switchesAsAttributes
@@ -338,14 +341,14 @@ codeHeaderArgs = try $ do
, originalLang language <> switchKv <> parameters
)
-switchesAsAttributes :: Monad m => OrgParser m ([String], [(String, String)])
+switchesAsAttributes :: Monad m => OrgParser m ([Text], [(Text, Text)])
switchesAsAttributes = try $ do
switches <- skipSpaces *> try (switch `sepBy` many1 spaceChar)
return $ foldr addToAttr ([], []) switches
where
- addToAttr :: (Char, Maybe String, SwitchPolarity)
- -> ([String], [(String, String)])
- -> ([String], [(String, String)])
+ addToAttr :: (Char, Maybe Text, SwitchPolarity)
+ -> ([Text], [(Text, Text)])
+ -> ([Text], [(Text, Text)])
addToAttr ('n', lineNum, pol) (cls, kv) =
let kv' = case lineNum of
Just num -> ("startFrom", num):kv
@@ -365,15 +368,15 @@ switchPolarity :: Monad m => OrgParser m SwitchPolarity
switchPolarity = (SwitchMinus <$ char '-') <|> (SwitchPlus <$ char '+')
-- | Parses a source block switch option.
-switch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity)
+switch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity)
switch = try $ lineNumberSwitch <|> labelSwitch
<|> whitespaceSwitch <|> simpleSwitch
where
simpleSwitch = (\pol c -> (c, Nothing, pol)) <$> switchPolarity <*> letter
labelSwitch = genericSwitch 'l' $
- char '"' *> many1Till nonspaceChar (char '"')
+ char '"' *> many1TillChar nonspaceChar (char '"')
-whitespaceSwitch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity)
+whitespaceSwitch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity)
whitespaceSwitch = do
string "-i"
updateState $ \s -> s { orgStateTrimLeadBlkIndent = False }
@@ -382,8 +385,8 @@ whitespaceSwitch = do
-- | Generic source block switch-option parser.
genericSwitch :: Monad m
=> Char
- -> OrgParser m String
- -> OrgParser m (Char, Maybe String, SwitchPolarity)
+ -> OrgParser m Text
+ -> OrgParser m (Char, Maybe Text, SwitchPolarity)
genericSwitch c p = try $ do
polarity <- switchPolarity <* char c <* skipSpaces
arg <- optionMaybe p
@@ -391,17 +394,17 @@ genericSwitch c p = try $ do
-- | Reads a line number switch option. The line number switch can be used with
-- example and source blocks.
-lineNumberSwitch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity)
-lineNumberSwitch = genericSwitch 'n' (many digit)
+lineNumberSwitch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity)
+lineNumberSwitch = genericSwitch 'n' (manyChar digit)
-blockOption :: Monad m => OrgParser m (String, String)
+blockOption :: Monad m => OrgParser m (Text, Text)
blockOption = try $ do
argKey <- orgArgKey
paramValue <- option "yes" orgParamValue
return (argKey, paramValue)
-orgParamValue :: Monad m => OrgParser m String
-orgParamValue = try $
+orgParamValue :: Monad m => OrgParser m Text
+orgParamValue = try $ fmap T.pack $
skipSpaces
*> notFollowedBy orgArgKey
*> noneOf "\n\r" `many1Till` endOfValue
@@ -420,7 +423,7 @@ orgParamValue = try $
-- export setting.
genericDrawer :: PandocMonad m => OrgParser m (F Blocks)
genericDrawer = try $ do
- name <- map toUpper <$> drawerStart
+ name <- T.toUpper <$> drawerStart
content <- manyTill drawerLine (try drawerEnd)
state <- getState
-- Include drawer if it is explicitly included in or not explicitly excluded
@@ -432,16 +435,16 @@ genericDrawer = try $ do
Right names | name `notElem` names -> return mempty
_ -> drawerDiv name <$> parseLines content
where
- parseLines :: PandocMonad m => [String] -> OrgParser m (F Blocks)
- parseLines = parseFromString blocks . (++ "\n") . unlines
+ parseLines :: PandocMonad m => [Text] -> OrgParser m (F Blocks)
+ parseLines = parseFromString blocks . (<> "\n") . T.unlines
- drawerDiv :: String -> F Blocks -> F Blocks
+ drawerDiv :: Text -> F Blocks -> F Blocks
drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty)
-drawerLine :: Monad m => OrgParser m String
+drawerLine :: Monad m => OrgParser m Text
drawerLine = anyLine
-drawerEnd :: Monad m => OrgParser m String
+drawerEnd :: Monad m => OrgParser m Text
drawerEnd = try $
skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
@@ -456,17 +459,17 @@ figure :: PandocMonad m => OrgParser m (F Blocks)
figure = try $ do
figAttrs <- blockAttributes
src <- skipSpaces *> selfTarget <* skipSpaces <* endOfParagraph
- case cleanLinkString src of
+ case cleanLinkText src of
Nothing -> mzero
Just imgSrc -> do
guard (isImageFilename imgSrc)
let isFigure = isJust $ blockAttrCaption figAttrs
return $ imageBlock isFigure figAttrs imgSrc
where
- selfTarget :: PandocMonad m => OrgParser m String
+ selfTarget :: PandocMonad m => OrgParser m Text
selfTarget = try $ char '[' *> linkTarget <* char ']'
- imageBlock :: Bool -> BlockAttributes -> String -> F Blocks
+ imageBlock :: Bool -> BlockAttributes -> Text -> F Blocks
imageBlock isFigure figAttrs imgSrc =
let
figName = fromMaybe mempty $ blockAttrName figAttrs
@@ -478,11 +481,11 @@ figure = try $ do
in
B.para . B.imageWith attr imgSrc figTitle <$> figCaption
- withFigPrefix :: String -> String
+ withFigPrefix :: Text -> Text
withFigPrefix cs =
- if "fig:" `isPrefixOf` cs
+ if "fig:" `T.isPrefixOf` cs
then cs
- else "fig:" ++ cs
+ else "fig:" <> cs
-- | Succeeds if looking at the end of the current paragraph
endOfParagraph :: Monad m => OrgParser m ()
@@ -495,12 +498,12 @@ endOfParagraph = try $ skipSpaces *> newline *> endOfBlock
-- | Example code marked up by a leading colon.
example :: Monad m => OrgParser m (F Blocks)
-example = try $ returnF . exampleCode =<< unlines <$> many1 exampleLine
+example = try $ returnF . exampleCode =<< T.unlines <$> many1 exampleLine
where
- exampleLine :: Monad m => OrgParser m String
+ exampleLine :: Monad m => OrgParser m Text
exampleLine = try $ exampleLineStart *> anyLine
-exampleCode :: String -> Blocks
+exampleCode :: Text -> Blocks
exampleCode = B.codeBlockWith ("", ["example"], [])
@@ -516,7 +519,7 @@ include :: PandocMonad m => OrgParser m (F Blocks)
include = try $ do
metaLineStart <* stringAnyCase "include:" <* skipSpaces
filename <- includeTarget
- includeArgs <- many (try $ skipSpaces *> many1 alphaNum)
+ includeArgs <- many (try $ skipSpaces *> many1Char alphaNum)
params <- keyValues
blocksParser <- case includeArgs of
("example" : _) -> return $ pure . B.codeBlock <$> parseRaw
@@ -535,10 +538,10 @@ include = try $ do
char '"'
manyTill (noneOf "\n\r\t") (char '"')
- parseRaw :: PandocMonad m => OrgParser m String
- parseRaw = many anyChar
+ parseRaw :: PandocMonad m => OrgParser m Text
+ parseRaw = manyChar anyChar
- blockFilter :: [(String, String)] -> [Block] -> [Block]
+ blockFilter :: [(Text, Text)] -> [Block] -> [Block]
blockFilter params blks =
let minlvl = lookup "minlevel" params
in case (minlvl >>= safeRead :: Maybe Int) of
@@ -660,7 +663,7 @@ columnPropertyCell = emptyCell <|> propCell <?> "alignment info"
<$> (skipSpaces
*> char '<'
*> optionMaybe tableAlignFromChar)
- <*> (optionMaybe (many1 digit >>= safeRead)
+ <*> (optionMaybe (many1Char digit >>= safeRead)
<* char '>'
<* emptyCell)
@@ -739,10 +742,10 @@ latexFragment = try $ do
, "\\end{", e, "}\n"
]
-latexEnd :: Monad m => String -> OrgParser m ()
+latexEnd :: Monad m => Text -> OrgParser m ()
latexEnd envName = try $
() <$ skipSpaces
- <* string ("\\end{" ++ envName ++ "}")
+ <* textStr ("\\end{" <> envName <> "}")
<* blankline
@@ -813,12 +816,12 @@ definitionListItem :: PandocMonad m
-> OrgParser m (F (Inlines, [Blocks]))
definitionListItem parseIndentedMarker = try $ do
markerLength <- parseIndentedMarker
- term <- manyTill (noneOf "\n\r") (try definitionMarker)
+ term <- manyTillChar (noneOf "\n\r") (try definitionMarker)
line1 <- anyLineNewline
blank <- option "" ("\n" <$ blankline)
- cont <- concat <$> many (listContinuation markerLength)
+ cont <- T.concat <$> many (listContinuation markerLength)
term' <- parseFromString inlines term
- contents' <- parseFromString blocks $ line1 ++ blank ++ cont
+ contents' <- parseFromString blocks $ line1 <> blank <> cont
return $ (,) <$> term' <*> fmap (:[]) contents'
where
definitionMarker =
@@ -832,16 +835,16 @@ listItem parseIndentedMarker = try . withContext ListItemState $ do
markerLength <- try parseIndentedMarker
firstLine <- anyLineNewline
blank <- option "" ("\n" <$ blankline)
- rest <- concat <$> many (listContinuation markerLength)
- parseFromString blocks $ firstLine ++ blank ++ rest
+ rest <- T.concat <$> many (listContinuation markerLength)
+ parseFromString blocks $ firstLine <> blank <> rest
-- continuation of a list item - indented and separated by blankline or endline.
-- Note: nested lists are parsed as continuations.
-listContinuation :: PandocMonad m => Int -> OrgParser m String
+listContinuation :: PandocMonad m => Int -> OrgParser m Text
listContinuation markerLength = try $ do
notFollowedBy' blankline
- mappend <$> (concat <$> many1 (listContinuation' markerLength))
- <*> many blankline
+ mappend <$> (T.concat <$> many1 (listContinuation' markerLength))
+ <*> manyChar blankline
where
listContinuation' indentation =
blockLines indentation <|> listLine indentation
@@ -853,6 +856,6 @@ listContinuation markerLength = try $ do
>> blockAttributes
>>= (\blockAttrs ->
case attrFromBlockAttributes blockAttrs of
- ("", [], []) -> count 1 anyChar
+ ("", [], []) -> countChar 1 anyChar
_ -> indentWith indentation))
>> (snd <$> withRaw orgBlock)
diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
index c96087be7..09a501b68 100644
--- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs
+++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
@@ -1,5 +1,6 @@
-{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.DocumentTree
Copyright : Copyright (C) 2014-2019 Albert Krewinkel
@@ -17,9 +18,9 @@ module Text.Pandoc.Readers.Org.DocumentTree
import Prelude
import Control.Arrow ((***))
import Control.Monad (guard, void)
-import Data.Char (toLower, toUpper)
import Data.List (intersperse)
import Data.Maybe (mapMaybe)
+import Data.Text (Text)
import Text.Pandoc.Builder (Blocks, Inlines)
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
@@ -28,6 +29,7 @@ import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
import qualified Data.Set as Set
+import qualified Data.Text as T
import qualified Text.Pandoc.Builder as B
--
@@ -59,28 +61,28 @@ documentTree blocks inline = do
}
-- | Create a tag containing the given string.
-toTag :: String -> Tag
+toTag :: Text -> Tag
toTag = Tag
-- | The key (also called name or type) of a property.
-newtype PropertyKey = PropertyKey { fromKey :: String }
+newtype PropertyKey = PropertyKey { fromKey :: Text }
deriving (Show, Eq, Ord)
-- | Create a property key containing the given string. Org mode keys are
-- case insensitive and are hence converted to lower case.
-toPropertyKey :: String -> PropertyKey
-toPropertyKey = PropertyKey . map toLower
+toPropertyKey :: Text -> PropertyKey
+toPropertyKey = PropertyKey . T.toLower
-- | The value assigned to a property.
-newtype PropertyValue = PropertyValue { fromValue :: String }
+newtype PropertyValue = PropertyValue { fromValue :: Text }
-- | Create a property value containing the given string.
-toPropertyValue :: String -> PropertyValue
+toPropertyValue :: Text -> PropertyValue
toPropertyValue = PropertyValue
-- | Check whether the property value is non-nil (i.e. truish).
isNonNil :: PropertyValue -> Bool
-isNonNil p = map toLower (fromValue p) `notElem` ["()", "{}", "nil"]
+isNonNil p = T.toLower (fromValue p) `notElem` ["()", "{}", "nil"]
-- | Key/value pairs from a PROPERTIES drawer
type Properties = [(PropertyKey, PropertyValue)]
@@ -273,7 +275,7 @@ headlineToHeader hdln = do
todoKeyword :: Monad m => OrgParser m TodoMarker
todoKeyword = try $ do
taskStates <- activeTodoMarkers <$> getState
- let kwParser tdm = try (tdm <$ string (todoMarkerName tdm)
+ let kwParser tdm = try (tdm <$ textStr (todoMarkerName tdm)
<* spaceChar
<* updateLastPreCharPos)
choice (map kwParser taskStates)
@@ -281,26 +283,26 @@ todoKeyword = try $ do
todoKeywordToInlines :: TodoMarker -> Inlines
todoKeywordToInlines tdm =
let todoText = todoMarkerName tdm
- todoState = map toLower . show $ todoMarkerState tdm
+ todoState = T.toLower . T.pack . show $ todoMarkerState tdm
classes = [todoState, todoText]
in B.spanWith (mempty, classes, mempty) (B.str todoText)
propertiesToAttr :: Properties -> Attr
propertiesToAttr properties =
let
- toStringPair = fromKey *** fromValue
+ toTextPair = fromKey *** fromValue
customIdKey = toPropertyKey "custom_id"
classKey = toPropertyKey "class"
unnumberedKey = toPropertyKey "unnumbered"
specialProperties = [customIdKey, classKey, unnumberedKey]
id' = maybe mempty fromValue . lookup customIdKey $ properties
cls = maybe mempty fromValue . lookup classKey $ properties
- kvs' = map toStringPair . filter ((`notElem` specialProperties) . fst)
+ kvs' = map toTextPair . filter ((`notElem` specialProperties) . fst)
$ properties
isUnnumbered =
maybe False isNonNil . lookup unnumberedKey $ properties
in
- (id', words cls ++ ["unnumbered" | isUnnumbered], kvs')
+ (id', T.words cls ++ ["unnumbered" | isUnnumbered], kvs')
tagsToInlines :: [Tag] -> Inlines
tagsToInlines [] = mempty
@@ -336,15 +338,15 @@ planningToBlock planning = do
<> B.emph (B.str time)
-- | An Org timestamp, including repetition marks. TODO: improve
-type Timestamp = String
+type Timestamp = Text
timestamp :: Monad m => OrgParser m Timestamp
timestamp = try $ do
openChar <- oneOf "<["
let isActive = openChar == '<'
let closeChar = if isActive then '>' else ']'
- content <- many1Till anyChar (char closeChar)
- return (openChar : content ++ [closeChar])
+ content <- many1TillChar anyChar (char closeChar)
+ return $ T.cons openChar $ content `T.snoc` closeChar
-- | Planning information for a subtree/headline.
data PlanningInfo = PlanningInfo
@@ -374,7 +376,7 @@ planningInfo = try $ do
propertiesDrawer :: Monad m => OrgParser m Properties
propertiesDrawer = try $ do
drawerType <- drawerStart
- guard $ map toUpper drawerType == "PROPERTIES"
+ guard $ T.toUpper drawerType == "PROPERTIES"
manyTill property (try endOfDrawer)
where
property :: Monad m => OrgParser m (PropertyKey, PropertyValue)
@@ -382,12 +384,12 @@ propertiesDrawer = try $ do
key :: Monad m => OrgParser m PropertyKey
key = fmap toPropertyKey . try $
- skipSpaces *> char ':' *> many1Till nonspaceChar (char ':')
+ skipSpaces *> char ':' *> many1TillChar nonspaceChar (char ':')
value :: Monad m => OrgParser m PropertyValue
value = fmap toPropertyValue . try $
- skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline)
+ skipSpaces *> manyTillChar anyChar (try $ skipSpaces *> newline)
- endOfDrawer :: Monad m => OrgParser m String
+ endOfDrawer :: Monad m => OrgParser m Text
endOfDrawer = try $
skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
index f783eaa0f..f1f089273 100644
--- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs
+++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.ExportSettings
Copyright : © 2016–2019 Albert Krewinkel
@@ -21,6 +22,7 @@ import Text.Pandoc.Readers.Org.Parsing
import Control.Monad (mzero, void)
import Data.Char (toLower)
import Data.Maybe (listToMaybe)
+import Data.Text (Text)
-- | Read and handle space separated org-mode export settings.
exportSettings :: PandocMonad m => OrgParser m ()
@@ -70,11 +72,11 @@ exportSetting = choice
genericExportSetting :: Monad m
=> OrgParser m a
- -> String
+ -> Text
-> ExportSettingSetter a
-> OrgParser m ()
genericExportSetting optionParser settingIdentifier setter = try $ do
- _ <- string settingIdentifier *> char ':'
+ _ <- textStr settingIdentifier *> char ':'
value <- optionParser
updateState $ modifyExportSettings value
where
@@ -82,11 +84,11 @@ genericExportSetting optionParser settingIdentifier setter = try $ do
st { orgStateExportSettings = setter val . orgStateExportSettings $ st }
-- | A boolean option, either nil (False) or non-nil (True).
-booleanSetting :: Monad m => String -> ExportSettingSetter Bool -> OrgParser m ()
+booleanSetting :: Monad m => Text -> ExportSettingSetter Bool -> OrgParser m ()
booleanSetting = genericExportSetting elispBoolean
-- | An integer-valued option.
-integerSetting :: Monad m => String -> ExportSettingSetter Int -> OrgParser m ()
+integerSetting :: Monad m => Text -> ExportSettingSetter Int -> OrgParser m ()
integerSetting = genericExportSetting parseInt
where
parseInt = try $
@@ -95,7 +97,7 @@ integerSetting = genericExportSetting parseInt
-- | Either the string "headline" or an elisp boolean and treated as an
-- @ArchivedTreesOption@.
archivedTreeSetting :: Monad m
- => String
+ => Text
-> ExportSettingSetter ArchivedTreesOption
-> OrgParser m ()
archivedTreeSetting =
@@ -115,42 +117,42 @@ archivedTreeSetting =
-- | A list or a complement list (i.e. a list starting with `not`).
complementableListSetting :: Monad m
- => String
- -> ExportSettingSetter (Either [String] [String])
+ => Text
+ -> ExportSettingSetter (Either [Text] [Text])
-> OrgParser m ()
complementableListSetting = genericExportSetting $ choice
- [ Left <$> complementStringList
+ [ Left <$> complementTextList
, Right <$> stringList
, (\b -> if b then Left [] else Right []) <$> elispBoolean
]
where
-- Read a plain list of strings.
- stringList :: Monad m => OrgParser m [String]
+ stringList :: Monad m => OrgParser m [Text]
stringList = try $
char '('
- *> sepBy elispString spaces
+ *> sepBy elispText spaces
<* char ')'
-- Read an emacs lisp list specifying a complement set.
- complementStringList :: Monad m => OrgParser m [String]
- complementStringList = try $
+ complementTextList :: Monad m => OrgParser m [Text]
+ complementTextList = try $
string "(not "
- *> sepBy elispString spaces
+ *> sepBy elispText spaces
<* char ')'
- elispString :: Monad m => OrgParser m String
- elispString = try $
+ elispText :: Monad m => OrgParser m Text
+ elispText = try $
char '"'
- *> manyTill alphaNum (char '"')
+ *> manyTillChar alphaNum (char '"')
-- | Read but ignore the export setting.
-ignoredSetting :: Monad m => String -> OrgParser m ()
-ignoredSetting s = try (() <$ string s <* char ':' <* many1 nonspaceChar)
+ignoredSetting :: Monad m => Text -> OrgParser m ()
+ignoredSetting s = try (() <$ textStr s <* char ':' <* many1 nonspaceChar)
-- | Read any setting string, but ignore it and emit a warning.
ignoreAndWarn :: PandocMonad m => OrgParser m ()
ignoreAndWarn = try $ do
- opt <- many1 nonspaceChar
+ opt <- many1Char nonspaceChar
report (UnknownOrgExportOption opt)
return ()
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index cae590c5f..da638f717 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -20,7 +20,7 @@ import Prelude
import Text.Pandoc.Readers.Org.BlockStarts (endOfBlock, noteMarker)
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
-import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename,
+import Text.Pandoc.Readers.Org.Shared (cleanLinkText, isImageFilename,
originalLang, translateLang, exportsCode)
import Text.Pandoc.Builder (Inlines)
@@ -38,12 +38,14 @@ import Control.Monad.Trans (lift)
import Data.Char (isAlphaNum, isSpace)
import Data.List (intersperse)
import qualified Data.Map as M
+import Data.Text (Text)
+import qualified Data.Text as T
import Data.Maybe (fromMaybe)
--
-- Functions acting on the parser state
--
-recordAnchorId :: PandocMonad m => String -> OrgParser m ()
+recordAnchorId :: PandocMonad m => Text -> OrgParser m ()
recordAnchorId i = updateState $ \s ->
s{ orgStateAnchorIds = i : orgStateAnchorIds s }
@@ -127,7 +129,7 @@ linebreak :: PandocMonad m => OrgParser m (F Inlines)
linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline
str :: PandocMonad m => OrgParser m (F Inlines)
-str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
+str = return . B.str <$> many1Char (noneOf $ specialChars ++ "\n\r ")
<* updateLastStrPos
-- | An endline character that can be treated as a space, not a structural
@@ -321,7 +323,7 @@ linkLikeOrgRefCite = try $ do
-- | Read a citation key. The characters allowed in citation keys are taken
-- from the `org-ref-cite-re` variable in `org-ref.el`.
-orgRefCiteKey :: PandocMonad m => OrgParser m String
+orgRefCiteKey :: PandocMonad m => OrgParser m Text
orgRefCiteKey =
let citeKeySpecialChars = "-_:\\./," :: String
isCiteKeySpecialChar c = c `elem` citeKeySpecialChars
@@ -329,7 +331,7 @@ orgRefCiteKey =
endOfCitation = try $ do
many $ satisfy isCiteKeySpecialChar
satisfy $ not . isCiteKeyChar
- in try $ satisfy isCiteKeyChar `many1Till` lookAhead endOfCitation
+ in try $ satisfy isCiteKeyChar `many1TillChar` lookAhead endOfCitation
-- | Supported citation types. Only a small subset of org-ref types is
@@ -384,11 +386,11 @@ footnote = try $ inlineNote <|> referencedNote
inlineNote :: PandocMonad m => OrgParser m (F Inlines)
inlineNote = try $ do
string "[fn:"
- ref <- many alphaNum
+ ref <- manyChar alphaNum
char ':'
note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']')
- unless (null ref) $
- addToNotesTable ("fn:" ++ ref, note)
+ unless (T.null ref) $
+ addToNotesTable ("fn:" <> ref, note)
return $ B.note <$> note
referencedNote :: PandocMonad m => OrgParser m (F Inlines)
@@ -397,7 +399,7 @@ referencedNote = try $ do
return $ do
notes <- asksF orgStateNotes'
case lookup ref notes of
- Nothing -> return . B.str $ "[" ++ ref ++ "]"
+ Nothing -> return . B.str $ "[" <> ref <> "]"
Just contents -> do
st <- askF
let contents' = runF contents st{ orgStateNotes' = [] }
@@ -420,7 +422,7 @@ explicitOrImageLink = try $ do
return $ do
src <- srcF
title <- titleF
- case cleanLinkString descr of
+ case cleanLinkText descr of
Just imgSrc | isImageFilename imgSrc ->
return . B.link src "" $ B.image imgSrc mempty mempty
_ ->
@@ -429,10 +431,10 @@ explicitOrImageLink = try $ do
selflinkOrImage :: PandocMonad m => OrgParser m (F Inlines)
selflinkOrImage = try $ do
target <- char '[' *> linkTarget <* char ']'
- case cleanLinkString target of
- Nothing -> case target of
- '#':_ -> returnF $ B.link target "" (B.str target)
- _ -> return $ internalLink target (B.str target)
+ case cleanLinkText target of
+ Nothing -> case T.uncons target of
+ Just ('#', _) -> returnF $ B.link target "" (B.str target)
+ _ -> return $ internalLink target (B.str target)
Just nonDocTgt -> if isImageFilename nonDocTgt
then returnF $ B.image nonDocTgt "" ""
else returnF $ B.link nonDocTgt "" (B.str target)
@@ -449,35 +451,35 @@ angleLink = try $ do
char '>'
return link
-linkTarget :: PandocMonad m => OrgParser m String
-linkTarget = enclosedByPair1 '[' ']' (noneOf "\n\r[]")
+linkTarget :: PandocMonad m => OrgParser m Text
+linkTarget = T.pack <$> enclosedByPair1 '[' ']' (noneOf "\n\r[]")
-possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m String
+possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m Text
possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]")
-applyCustomLinkFormat :: String -> OrgParser m (F String)
+applyCustomLinkFormat :: Text -> OrgParser m (F Text)
applyCustomLinkFormat link = do
- let (linkType, rest) = break (== ':') link
+ let (linkType, rest) = T.break (== ':') link
return $ do
formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters
- return $ maybe link ($ drop 1 rest) formatter
+ return $ maybe link ($ T.drop 1 rest) formatter
-- | Take a link and return a function which produces new inlines when given
-- description inlines.
-linkToInlinesF :: String -> Inlines -> F Inlines
+linkToInlinesF :: Text -> Inlines -> F Inlines
linkToInlinesF linkStr =
- case linkStr of
- "" -> pure . B.link mempty "" -- wiki link (empty by convention)
- ('#':_) -> pure . B.link linkStr "" -- document-local fraction
- _ -> case cleanLinkString linkStr of
- Just extTgt -> return . B.link extTgt ""
- Nothing -> internalLink linkStr -- other internal link
-
-internalLink :: String -> Inlines -> F Inlines
+ case T.uncons linkStr of
+ Nothing -> pure . B.link mempty "" -- wiki link (empty by convention)
+ Just ('#', _) -> pure . B.link linkStr "" -- document-local fraction
+ _ -> case cleanLinkText linkStr of
+ Just extTgt -> return . B.link extTgt ""
+ Nothing -> internalLink linkStr -- other internal link
+
+internalLink :: Text -> Inlines -> F Inlines
internalLink link title = do
anchorB <- (link `elem`) <$> asksF orgStateAnchorIds
if anchorB
- then return $ B.link ('#':link) "" title
+ then return $ B.link ("#" <> link) "" title
else return $ B.emph title
-- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
@@ -493,15 +495,15 @@ anchor = try $ do
returnF $ B.spanWith (solidify anchorId, [], []) mempty
where
parseAnchor = string "<<"
- *> many1 (noneOf "\t\n\r<>\"' ")
+ *> many1Char (noneOf "\t\n\r<>\"' ")
<* string ">>"
<* skipSpaces
-- | Replace every char but [a-zA-Z0-9_.-:] with a hyphen '-'. This mirrors
-- the org function @org-export-solidify-link-text@.
-solidify :: String -> String
-solidify = map replaceSpecialChar
+solidify :: Text -> Text
+solidify = T.map replaceSpecialChar
where replaceSpecialChar c
| isAlphaNum c = c
| c `elem` ("_.-:" :: String) = c
@@ -511,25 +513,25 @@ solidify = map replaceSpecialChar
inlineCodeBlock :: PandocMonad m => OrgParser m (F Inlines)
inlineCodeBlock = try $ do
string "src_"
- lang <- many1 orgArgWordChar
+ lang <- many1Char orgArgWordChar
opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption
- inlineCode <- enclosedByPair1 '{' '}' (noneOf "\n\r")
+ inlineCode <- T.pack <$> enclosedByPair1 '{' '}' (noneOf "\n\r")
let attrClasses = [translateLang lang]
let attrKeyVal = originalLang lang <> opts
let codeInlineBlck = B.codeWith ("", attrClasses, attrKeyVal) inlineCode
returnF $ if exportsCode opts then codeInlineBlck else mempty
where
- inlineBlockOption :: PandocMonad m => OrgParser m (String, String)
+ inlineBlockOption :: PandocMonad m => OrgParser m (Text, Text)
inlineBlockOption = try $ do
argKey <- orgArgKey
paramValue <- option "yes" orgInlineParamValue
return (argKey, paramValue)
- orgInlineParamValue :: PandocMonad m => OrgParser m String
+ orgInlineParamValue :: PandocMonad m => OrgParser m Text
orgInlineParamValue = try $
skipSpaces
*> notFollowedBy (char ':')
- *> many1 (noneOf "\t\n\r ]")
+ *> many1Char (noneOf "\t\n\r ]")
<* skipSpaces
@@ -584,7 +586,7 @@ superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr)
math :: PandocMonad m => OrgParser m (F Inlines)
math = return . B.math <$> choice [ math1CharBetween '$'
- , mathStringBetween '$'
+ , mathTextBetween '$'
, rawMathBetween "\\(" "\\)"
]
@@ -604,7 +606,7 @@ updatePositions c = do
return c
symbol :: PandocMonad m => OrgParser m (F Inlines)
-symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
+symbol = return . B.str . T.singleton <$> (oneOf specialChars >>= updatePositions)
emphasisBetween :: PandocMonad m
=> Char
@@ -619,7 +621,7 @@ emphasisBetween c = try $ do
verbatimBetween :: PandocMonad m
=> Char
- -> OrgParser m String
+ -> OrgParser m Text
verbatimBetween c = try $
emphasisStart c *>
many1TillNOrLessNewlines 1 verbatimChar (emphasisEnd c)
@@ -627,33 +629,33 @@ verbatimBetween c = try $
verbatimChar = noneOf "\n\r" >>= updatePositions
-- | Parses a raw string delimited by @c@ using Org's math rules
-mathStringBetween :: PandocMonad m
+mathTextBetween :: PandocMonad m
=> Char
- -> OrgParser m String
-mathStringBetween c = try $ do
+ -> OrgParser m Text
+mathTextBetween c = try $ do
mathStart c
body <- many1TillNOrLessNewlines mathAllowedNewlines
(noneOf (c:"\n\r"))
(lookAhead $ mathEnd c)
final <- mathEnd c
- return $ body ++ [final]
+ return $ T.snoc body final
-- | Parse a single character between @c@ using math rules
math1CharBetween :: PandocMonad m
=> Char
- -> OrgParser m String
+ -> OrgParser m Text
math1CharBetween c = try $ do
char c
res <- noneOf $ c:mathForbiddenBorderChars
char c
eof <|> () <$ lookAhead (oneOf mathPostChars)
- return [res]
+ return $ T.singleton res
rawMathBetween :: PandocMonad m
- => String
- -> String
- -> OrgParser m String
-rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e)
+ => Text
+ -> Text
+ -> OrgParser m Text
+rawMathBetween s e = try $ textStr s *> manyTillChar anyChar (try $ textStr e)
-- | Parses the start (opening character) of emphasis
emphasisStart :: PandocMonad m => Char -> OrgParser m Char
@@ -702,10 +704,10 @@ enclosedInlines start end = try $
enclosedRaw :: (PandocMonad m, Show b) => OrgParser m a
-> OrgParser m b
- -> OrgParser m String
+ -> OrgParser m Text
enclosedRaw start end = try $
start *> (onSingleLine <|> spanningTwoLines)
- where onSingleLine = try $ many1Till (noneOf "\n\r") end
+ where onSingleLine = try $ many1TillChar (noneOf "\n\r") end
spanningTwoLines = try $
anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine
@@ -714,7 +716,7 @@ enclosedRaw start end = try $
many1TillNOrLessNewlines :: PandocMonad m => Int
-> OrgParser m Char
-> OrgParser m a
- -> OrgParser m String
+ -> OrgParser m Text
many1TillNOrLessNewlines n p end = try $
nMoreLines (Just n) mempty >>= oneOrMore
where
@@ -726,7 +728,7 @@ many1TillNOrLessNewlines n p end = try $
rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline)
finalLine = try $ manyTill p end
minus1 k = k - 1
- oneOrMore cs = cs <$ guard (not $ null cs)
+ oneOrMore cs = T.pack cs <$ guard (not $ null cs)
-- Org allows customization of the way it reads emphasis. We use the defaults
-- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components`
@@ -773,17 +775,17 @@ subOrSuperExpr :: PandocMonad m => OrgParser m (F Inlines)
subOrSuperExpr = try $
choice [ charsInBalanced '{' '}' (noneOf "\n\r")
, enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r")
- , simpleSubOrSuperString
+ , simpleSubOrSuperText
] >>= parseFromString (mconcat <$> many inline)
- where enclosing (left, right) s = left : s ++ [right]
+ where enclosing (left, right) s = T.cons left $ T.snoc s right
-simpleSubOrSuperString :: PandocMonad m => OrgParser m String
-simpleSubOrSuperString = try $ do
+simpleSubOrSuperText :: PandocMonad m => OrgParser m Text
+simpleSubOrSuperText = try $ do
state <- getState
guard . exportSubSuperscripts . orgStateExportSettings $ state
- choice [ string "*"
- , mappend <$> option [] ((:[]) <$> oneOf "+-")
- <*> many1 alphaNum
+ choice [ textStr "*"
+ , mappend <$> option "" (T.singleton <$> oneOf "+-")
+ <*> many1Char alphaNum
]
inlineLaTeX :: PandocMonad m => OrgParser m (F Inlines)
@@ -793,28 +795,28 @@ inlineLaTeX = try $ do
maybe mzero returnF $
parseAsMathMLSym cmd `mplus` parseAsMath cmd `mplus` ils
where
- parseAsMath :: String -> Maybe Inlines
+ parseAsMath :: Text -> Maybe Inlines
parseAsMath cs = B.fromList <$> texMathToPandoc cs
- parseAsInlineLaTeX :: PandocMonad m => String -> m (Maybe Inlines)
+ parseAsInlineLaTeX :: PandocMonad m => Text -> m (Maybe Inlines)
parseAsInlineLaTeX cs = maybeRight <$> runParserT inlineCommand state "" cs
- parseAsMathMLSym :: String -> Maybe Inlines
+ parseAsMathMLSym :: Text -> Maybe Inlines
parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs)
-- drop initial backslash and any trailing "{}"
- where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1
+ where clean = T.dropWhileEnd (`elem` ("{}" :: String)) . T.drop 1
state :: ParserState
state = def{ stateOptions = def{ readerExtensions =
enableExtension Ext_raw_tex (readerExtensions def) } }
- texMathToPandoc :: String -> Maybe [Inline]
+ texMathToPandoc :: Text -> Maybe [Inline]
texMathToPandoc cs = maybeRight (readTeX cs) >>= writePandoc DisplayInline
maybeRight :: Either a b -> Maybe b
maybeRight = either (const Nothing) Just
-inlineLaTeXCommand :: PandocMonad m => OrgParser m String
+inlineLaTeXCommand :: PandocMonad m => OrgParser m Text
inlineLaTeXCommand = try $ do
rest <- getInput
st <- getState
@@ -823,21 +825,17 @@ inlineLaTeXCommand = try $ do
Right cs -> do
-- drop any trailing whitespace, those are not be part of the command as
-- far as org mode is concerned.
- let cmdNoSpc = dropWhileEnd isSpace cs
- let len = length cmdNoSpc
+ let cmdNoSpc = T.dropWhileEnd isSpace cs
+ let len = T.length cmdNoSpc
count len anyChar
return cmdNoSpc
_ -> mzero
--- Taken from Data.OldList.
-dropWhileEnd :: (a -> Bool) -> [a] -> [a]
-dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) []
-
exportSnippet :: PandocMonad m => OrgParser m (F Inlines)
exportSnippet = try $ do
string "@@"
- format <- many1Till (alphaNum <|> char '-') (char ':')
- snippet <- manyTill anyChar (try $ string "@@")
+ format <- many1TillChar (alphaNum <|> char '-') (char ':')
+ snippet <- manyTillChar anyChar (try $ string "@@")
returnF $ B.rawInline format snippet
macro :: PandocMonad m => OrgParser m (F Inlines)
@@ -845,7 +843,7 @@ macro = try $ do
recursionDepth <- orgStateMacroDepth <$> getState
guard $ recursionDepth < 15
string "{{{"
- name <- many alphaNum
+ name <- manyChar alphaNum
args <- ([] <$ string "}}}")
<|> char '(' *> argument `sepBy` char ',' <* eoa
expander <- lookupMacro name <$> getState
@@ -857,7 +855,7 @@ macro = try $ do
updateState $ \s -> s { orgStateMacroDepth = recursionDepth }
return res
where
- argument = many $ notFollowedBy eoa *> noneOf ","
+ argument = manyChar $ notFollowedBy eoa *> noneOf ","
eoa = string ")}}}"
smart :: PandocMonad m => OrgParser m (F Inlines)
diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs
index 0a388403e..811a5b974 100644
--- a/src/Text/Pandoc/Readers/Org/Meta.hs
+++ b/src/Text/Pandoc/Readers/Org/Meta.hs
@@ -1,6 +1,7 @@
{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.Meta
Copyright : Copyright (C) 2014-2019 Albert Krewinkel
@@ -30,11 +31,12 @@ import Text.Pandoc.Definition
import Text.Pandoc.Shared (safeRead)
import Control.Monad (mzero, void, when)
-import Data.Char (toLower)
import Data.List (intersperse)
import Data.Maybe (fromMaybe)
import qualified Data.Map as M
import qualified Data.Set as Set
+import Data.Text (Text)
+import qualified Data.Text as T
import Network.HTTP (urlEncode)
-- | Returns the current meta, respecting export options.
@@ -47,7 +49,7 @@ metaExport = do
. (if exportWithEmail settings then id else removeMeta "email")
<$> orgStateMeta st
-removeMeta :: String -> Meta -> Meta
+removeMeta :: Text -> Meta -> Meta
removeMeta key meta' =
let metaMap = unMeta meta'
in Meta $ M.delete key metaMap
@@ -60,18 +62,18 @@ metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine)
declarationLine :: PandocMonad m => OrgParser m ()
declarationLine = try $ do
- key <- map toLower <$> metaKey
+ key <- T.toLower <$> metaKey
(key', value) <- metaValue key
let addMetaValue st =
st { orgStateMeta = B.setMeta key' <$> value <*> orgStateMeta st }
when (key' /= "results") $ updateState addMetaValue
-metaKey :: Monad m => OrgParser m String
-metaKey = map toLower <$> many1 (noneOf ": \n\r")
- <* char ':'
- <* skipSpaces
+metaKey :: Monad m => OrgParser m Text
+metaKey = T.toLower <$> many1Char (noneOf ": \n\r")
+ <* char ':'
+ <* skipSpaces
-metaValue :: PandocMonad m => String -> OrgParser m (String, F MetaValue)
+metaValue :: PandocMonad m => Text -> OrgParser m (Text, F MetaValue)
metaValue key =
let inclKey = "header-includes"
in case key of
@@ -88,7 +90,7 @@ metaValue key =
-- Org-mode expects class options to contain the surrounding brackets,
-- pandoc does not.
"latex_class_options" -> ("classoption",) <$>
- metaModifiedString (filter (`notElem` "[]"))
+ metaModifiedString (T.filter (`notElem` ("[]" :: String)))
"html_head" -> (inclKey,) <$>
accumulatingList inclKey (metaExportSnippet "html")
_ -> (key,) <$> metaString
@@ -98,25 +100,25 @@ metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline
metaInlinesCommaSeparated :: PandocMonad m => OrgParser m (F MetaValue)
metaInlinesCommaSeparated = do
- itemStrs <- many1 (noneOf ",\n") `sepBy1` char ','
+ itemStrs <- many1Char (noneOf ",\n") `sepBy1` char ','
newline
- items <- mapM (parseFromString inlinesTillNewline . (++ "\n")) itemStrs
+ items <- mapM (parseFromString inlinesTillNewline . (<> "\n")) itemStrs
let toMetaInlines = MetaInlines . B.toList
return $ MetaList . map toMetaInlines <$> sequence items
metaString :: Monad m => OrgParser m (F MetaValue)
metaString = metaModifiedString id
-metaModifiedString :: Monad m => (String -> String) -> OrgParser m (F MetaValue)
+metaModifiedString :: Monad m => (Text -> Text) -> OrgParser m (F MetaValue)
metaModifiedString f = return . MetaString . f <$> anyLine
-- | Read an format specific meta definition
-metaExportSnippet :: Monad m => String -> OrgParser m (F MetaValue)
+metaExportSnippet :: Monad m => Text -> OrgParser m (F MetaValue)
metaExportSnippet format =
return . MetaInlines . B.toList . B.rawInline format <$> anyLine
-- | Accumulate the result of the @parser@ in a list under @key@.
-accumulatingList :: Monad m => String
+accumulatingList :: Monad m => Text
-> OrgParser m (F MetaValue)
-> OrgParser m (F MetaValue)
accumulatingList key p = do
@@ -147,33 +149,33 @@ optionLine = try $ do
"pandoc-emphasis-post" -> emphChars >>= updateState . setEmphasisPostChar
_ -> mzero
-addLinkFormat :: Monad m => String
- -> (String -> String)
+addLinkFormat :: Monad m => Text
+ -> (Text -> Text)
-> OrgParser m ()
addLinkFormat key formatter = updateState $ \s ->
let fs = orgStateLinkFormatters s
in s{ orgStateLinkFormatters = M.insert key formatter fs }
-parseLinkFormat :: Monad m => OrgParser m (String, String -> String)
+parseLinkFormat :: Monad m => OrgParser m (Text, Text -> Text)
parseLinkFormat = try $ do
- linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces
+ linkType <- T.cons <$> letter <*> manyChar (alphaNum <|> oneOf "-_") <* skipSpaces
linkSubst <- parseFormat
return (linkType, linkSubst)
-- | An ad-hoc, single-argument-only implementation of a printf-style format
-- parser.
-parseFormat :: Monad m => OrgParser m (String -> String)
+parseFormat :: Monad m => OrgParser m (Text -> Text)
parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend
where
-- inefficient, but who cares
- replacePlain = try $ (\x -> concat . flip intersperse x)
+ replacePlain = try $ (\x -> T.concat . flip intersperse x)
<$> sequence [tillSpecifier 's', rest]
- replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode)
+ replaceUrl = try $ (\x -> T.concat . flip intersperse x . T.pack . urlEncode . T.unpack)
<$> sequence [tillSpecifier 'h', rest]
- justAppend = try $ (++) <$> rest
+ justAppend = try $ (<>) <$> rest
- rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r")
- tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))
+ rest = manyTillChar anyChar (eof <|> () <$ oneOf "\n\r")
+ tillSpecifier c = manyTillChar (noneOf "\n\r") (try $ string ('%':c:""))
tagList :: Monad m => OrgParser m [Tag]
tagList = do
@@ -231,41 +233,41 @@ todoSequence = try $ do
(x:xs) -> return $ keywordsToSequence (reverse xs) [x]
where
- todoKeywords :: Monad m => OrgParser m [String]
+ todoKeywords :: Monad m => OrgParser m [Text]
todoKeywords = try $
- let keyword = many1 nonspaceChar <* skipSpaces
+ let keyword = many1Char nonspaceChar <* skipSpaces
endOfKeywords = todoDoneSep <|> void newline
in manyTill keyword (lookAhead endOfKeywords)
todoDoneSep :: Monad m => OrgParser m ()
todoDoneSep = void . try $ skipSpaces *> char '|' <* skipSpaces1
- keywordsToSequence :: [String] -> [String] -> TodoSequence
+ keywordsToSequence :: [Text] -> [Text] -> TodoSequence
keywordsToSequence todo done =
let todoMarkers = map (TodoMarker Todo) todo
doneMarkers = map (TodoMarker Done) done
in todoMarkers ++ doneMarkers
-macroDefinition :: Monad m => OrgParser m (String, [String] -> String)
+macroDefinition :: Monad m => OrgParser m (Text, [Text] -> Text)
macroDefinition = try $ do
- macroName <- many1 nonspaceChar <* skipSpaces
+ macroName <- many1Char nonspaceChar <* skipSpaces
firstPart <- expansionPart
(elemOrder, parts) <- unzip <$> many ((,) <$> placeholder <*> expansionPart)
let expander = mconcat . alternate (firstPart:parts) . reorder elemOrder
return (macroName, expander)
where
placeholder :: Monad m => OrgParser m Int
- placeholder = try . fmap (fromMaybe 1 . safeRead) $ char '$' *> many1 digit
+ placeholder = try . fmap (fromMaybe 1 . safeRead) $ char '$' *> many1Char digit
- expansionPart :: Monad m => OrgParser m String
- expansionPart = try $ many (notFollowedBy placeholder *> noneOf "\n\r")
+ expansionPart :: Monad m => OrgParser m Text
+ expansionPart = try $ manyChar (notFollowedBy placeholder *> noneOf "\n\r")
alternate :: [a] -> [a] -> [a]
alternate [] ys = ys
alternate xs [] = xs
alternate (x:xs) (y:ys) = x : y : alternate xs ys
- reorder :: [Int] -> [String] -> [String]
+ reorder :: [Int] -> [Text] -> [Text]
reorder perm xs =
let element n = take 1 $ drop (n - 1) xs
in concatMap element perm
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
index d6dde8b22..cf5583b76 100644
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@@ -1,6 +1,7 @@
-{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.ParserState
Copyright : Copyright (C) 2014-2019 Albert Krewinkel
@@ -63,16 +64,16 @@ import Text.Pandoc.Readers.LaTeX.Types (Macro)
type F = Future OrgParserState
-- | An inline note / footnote containing the note key and its (inline) value.
-type OrgNoteRecord = (String, F Blocks)
+type OrgNoteRecord = (Text, F Blocks)
-- | Table of footnotes
type OrgNoteTable = [OrgNoteRecord]
-- | Map of functions for link transformations. The map key is refers to the
-- link-type, the corresponding function transforms the given link string.
-type OrgLinkFormatters = M.Map String (String -> String)
+type OrgLinkFormatters = M.Map Text (Text -> Text)
-- | Macro expander function
-type MacroExpander = [String] -> String
+type MacroExpander = [Text] -> Text
-- | Tag
-newtype Tag = Tag { fromTag :: String }
+newtype Tag = Tag { fromTag :: Text }
deriving (Show, Eq, Ord)
-- | The states in which a todo item can be
@@ -82,7 +83,7 @@ data TodoState = Todo | Done
-- | A ToDo keyword like @TODO@ or @DONE@.
data TodoMarker = TodoMarker
{ todoMarkerState :: TodoState
- , todoMarkerName :: String
+ , todoMarkerName :: Text
}
deriving (Show, Eq)
@@ -91,7 +92,7 @@ type TodoSequence = [TodoMarker]
-- | Org-mode parser state
data OrgParserState = OrgParserState
- { orgStateAnchorIds :: [String]
+ { orgStateAnchorIds :: [Text]
, orgStateEmphasisCharStack :: [Char]
, orgStateEmphasisPreChars :: [Char] -- ^ Chars allowed to occur before
-- emphasis; spaces and newlines are
@@ -102,13 +103,13 @@ data OrgParserState = OrgParserState
, orgStateExcludeTags :: Set.Set Tag
, orgStateExcludeTagsChanged :: Bool
, orgStateExportSettings :: ExportSettings
- , orgStateIdentifiers :: Set.Set String
- , orgStateIncludeFiles :: [String]
+ , orgStateIdentifiers :: Set.Set Text
+ , orgStateIncludeFiles :: [Text]
, orgStateLastForbiddenCharPos :: Maybe SourcePos
, orgStateLastPreCharPos :: Maybe SourcePos
, orgStateLastStrPos :: Maybe SourcePos
, orgStateLinkFormatters :: OrgLinkFormatters
- , orgStateMacros :: M.Map String MacroExpander
+ , orgStateMacros :: M.Map Text MacroExpander
, orgStateMacroDepth :: Int
, orgStateMeta :: F Meta
, orgStateNotes' :: OrgNoteTable
@@ -212,10 +213,10 @@ activeTodoSequences st =
activeTodoMarkers :: OrgParserState -> TodoSequence
activeTodoMarkers = concat . activeTodoSequences
-lookupMacro :: String -> OrgParserState -> Maybe MacroExpander
+lookupMacro :: Text -> OrgParserState -> Maybe MacroExpander
lookupMacro macroName = M.lookup macroName . orgStateMacros
-registerMacro :: (String, MacroExpander) -> OrgParserState -> OrgParserState
+registerMacro :: (Text, MacroExpander) -> OrgParserState -> OrgParserState
registerMacro (name, expander) st =
let curMacros = orgStateMacros st
in st{ orgStateMacros = M.insert name expander curMacros }
@@ -236,7 +237,7 @@ data ArchivedTreesOption =
-- These settings can be changed via OPTIONS statements.
data ExportSettings = ExportSettings
{ exportArchivedTrees :: ArchivedTreesOption -- ^ How to treat archived trees
- , exportDrawers :: Either [String] [String]
+ , exportDrawers :: Either [Text] [Text]
-- ^ Specify drawer names which should be exported. @Left@ names are
-- explicitly excluded from the resulting output while @Right@ means that
-- only the listed drawer names should be included.
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index 24aa0779d..718925120 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -32,7 +32,13 @@ module Text.Pandoc.Readers.Org.Parsing
, orgTagWordChar
-- * Re-exports from Text.Pandoc.Parser
, ParserContext (..)
+ , textStr
+ , countChar
+ , manyChar
+ , many1Char
+ , manyTillChar
, many1Till
+ , many1TillChar
, notFollowedBy'
, spaceChar
, nonspaceChar
@@ -98,6 +104,7 @@ module Text.Pandoc.Readers.Org.Parsing
) where
import Prelude
+import Data.Text (Text)
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Parsing hiding (F, anyLine, blanklines, newline,
@@ -108,14 +115,14 @@ import Control.Monad (guard)
import Control.Monad.Reader (ReaderT)
-- | The parser used to read org files.
-type OrgParser m = ParserT [Char] OrgParserState (ReaderT OrgParserLocal m)
+type OrgParser m = ParserT Text OrgParserState (ReaderT OrgParserLocal m)
--
-- Adaptions and specializations of parsing utilities
--
-- | Parse any line of text
-anyLine :: Monad m => OrgParser m String
+anyLine :: Monad m => OrgParser m Text
anyLine =
P.anyLine
<* updateLastPreCharPos
@@ -123,7 +130,7 @@ anyLine =
-- | Like @'Text.Pandoc.Parsing'@, but resets the position of the last character
-- allowed before emphasised text.
-parseFromString :: Monad m => OrgParser m a -> String -> OrgParser m a
+parseFromString :: Monad m => OrgParser m a -> Text -> OrgParser m a
parseFromString parser str' = do
updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
result <- P.parseFromString parser str'
@@ -142,7 +149,7 @@ newline =
<* updateLastForbiddenCharPos
-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
-blanklines :: Monad m => OrgParser m [Char]
+blanklines :: Monad m => OrgParser m Text
blanklines =
P.blanklines
<* updateLastPreCharPos
@@ -192,21 +199,21 @@ updateLastPreCharPos = getPosition >>= \p ->
--
-- | Read the key of a plist style key-value list.
-orgArgKey :: Monad m => OrgParser m String
+orgArgKey :: Monad m => OrgParser m Text
orgArgKey = try $
skipSpaces *> char ':'
- *> many1 orgArgWordChar
+ *> many1Char orgArgWordChar
-- | Read the value of a plist style key-value list.
-orgArgWord :: Monad m => OrgParser m String
-orgArgWord = many1 orgArgWordChar
+orgArgWord :: Monad m => OrgParser m Text
+orgArgWord = many1Char orgArgWordChar
-- | Chars treated as part of a word in plists.
orgArgWordChar :: Monad m => OrgParser m Char
orgArgWordChar = alphaNum <|> oneOf "-_"
-orgTagWord :: Monad m => OrgParser m String
-orgTagWord = many1 orgTagWordChar
+orgTagWord :: Monad m => OrgParser m Text
+orgTagWord = many1Char orgTagWordChar
orgTagWordChar :: Monad m => OrgParser m Char
orgTagWordChar = alphaNum <|> oneOf "@%#_"
diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs
index 34f958373..be0a2068e 100644
--- a/src/Text/Pandoc/Readers/Org/Shared.hs
+++ b/src/Text/Pandoc/Readers/Org/Shared.hs
@@ -10,7 +10,7 @@
Utility functions used in other Pandoc Org modules.
-}
module Text.Pandoc.Readers.Org.Shared
- ( cleanLinkString
+ ( cleanLinkText
, isImageFilename
, originalLang
, translateLang
@@ -19,44 +19,44 @@ module Text.Pandoc.Readers.Org.Shared
import Prelude
import Data.Char (isAlphaNum)
-import Data.List (isPrefixOf)
+import Data.Text (Text)
+import qualified Data.Text as T
import System.FilePath (isValid, takeExtension)
-
+import Text.Pandoc.Shared (elemText)
-- | Check whether the given string looks like the path to of URL of an image.
-isImageFilename :: String -> Bool
-isImageFilename fp = hasImageExtension && (isValid fp || isKnownProtocolUri)
+isImageFilename :: Text -> Bool
+isImageFilename fp = hasImageExtension && (isValid (T.unpack fp) || isKnownProtocolUri)
where
- hasImageExtension = takeExtension fp `elem` imageExtensions
- isKnownProtocolUri = any (\x -> (x ++ "://") `isPrefixOf` fp) protocols
+ hasImageExtension = takeExtension (T.unpack fp) `elem` imageExtensions
+ isKnownProtocolUri = any (\x -> (x <> "://") `T.isPrefixOf` fp) protocols
imageExtensions = [ ".jpeg", ".jpg", ".png", ".gif", ".svg" ]
protocols = [ "file", "http", "https" ]
-- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if
-- the string does not appear to be a link.
-cleanLinkString :: String -> Maybe String
-cleanLinkString s =
- case s of
- '/':_ -> Just $ "file://" ++ s -- absolute path
- '.':'/':_ -> Just s -- relative path
- '.':'.':'/':_ -> Just s -- relative path
- -- Relative path or URL (file schema)
- 'f':'i':'l':'e':':':s' -> Just $ if "//" `isPrefixOf` s' then s else s'
- _ -> if isUrl s then Just s else Nothing
- where
- isUrl :: String -> Bool
- isUrl cs =
- let (scheme, path) = break (== ':') cs
- in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
- && not (null path)
+cleanLinkText :: Text -> Maybe Text
+cleanLinkText s
+ | Just _ <- T.stripPrefix "/" s = Just $ "file://" <> s -- absolute path
+ | Just _ <- T.stripPrefix "./" s = Just s -- relative path
+ | Just _ <- T.stripPrefix "../" s = Just s -- relative path
+ -- Relative path or URL (file schema)
+ | Just s' <- T.stripPrefix "file:" s = Just $ if "//" `T.isPrefixOf` s' then s else s'
+ | otherwise = if isUrl s then Just s else Nothing
+ where
+ isUrl :: Text -> Bool
+ isUrl cs =
+ let (scheme, path) = T.break (== ':') cs
+ in T.all (\c -> isAlphaNum c || c `elemText` ".-") scheme
+ && not (T.null path)
-- | Creates an key-value pair marking the original language name specified for
-- a piece of source code.
-- | Creates an key-value attributes marking the original language name
-- specified for a piece of source code.
-originalLang :: String -> [(String, String)]
+originalLang :: Text -> [(Text, Text)]
originalLang lang =
let transLang = translateLang lang
in if transLang == lang
@@ -66,7 +66,7 @@ originalLang lang =
-- | Translate from Org-mode's programming language identifiers to those used
-- by Pandoc. This is useful to allow for proper syntax highlighting in
-- Pandoc output.
-translateLang :: String -> String
+translateLang :: Text -> Text
translateLang cs =
case cs of
"C" -> "c"
@@ -79,5 +79,5 @@ translateLang cs =
"sqlite" -> "sql"
_ -> cs
-exportsCode :: [(String, String)] -> Bool
+exportsCode :: [(Text, Text)] -> Bool
exportsCode = maybe True (`elem` ["code", "both"]) . lookup "exports"
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 7e29caf28..d2fba4449 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -1,7 +1,8 @@
-{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.RST
Copyright : Copyright (C) 2006-2019 John MacFarlane
@@ -19,9 +20,8 @@ import Control.Arrow (second)
import Control.Monad (forM_, guard, liftM, mplus, mzero, when)
import Control.Monad.Except (throwError)
import Control.Monad.Identity (Identity (..))
-import Data.Char (isHexDigit, isSpace, toLower, toUpper, isAlphaNum)
-import Data.List (deleteFirstsBy, elemIndex, intercalate, isInfixOf, isSuffixOf,
- nub, sort, transpose)
+import Data.Char (isHexDigit, isSpace, toUpper, isAlphaNum)
+import Data.List (deleteFirstsBy, elemIndex, nub, sort, transpose)
import qualified Data.Map as M
import Data.Maybe (fromMaybe)
import Data.Sequence (ViewR (..), viewr)
@@ -47,16 +47,16 @@ import Text.Printf (printf)
-- | Parse reStructuredText string and return Pandoc document.
readRST :: PandocMonad m
=> ReaderOptions -- ^ Reader options
- -> Text -- ^ String to parse (assuming @'\n'@ line endings)
+ -> Text -- ^ Text to parse (assuming @'\n'@ line endings)
-> m Pandoc
readRST opts s = do
parsed <- readWithM parseRST def{ stateOptions = opts }
- (T.unpack (crFilter s) ++ "\n\n")
+ (crFilter s <> "\n\n")
case parsed of
Right result -> return result
Left e -> throwError e
-type RSTParser m = ParserT [Char] ParserState m
+type RSTParser m = ParserT Text ParserState m
--
-- Constants and data structure definitions
@@ -113,7 +113,7 @@ titleTransform (bs, meta) =
metaFromDefList :: [([Inline], [[Block]])] -> Meta -> Meta
metaFromDefList ds meta = adjustAuthors $ foldr f meta ds
- where f (k,v) = setMeta (map toLower $ stringify k) (mconcat $ map fromList v)
+ where f (k,v) = setMeta (T.toLower $ stringify k) (mconcat $ map fromList v)
adjustAuthors (Meta metamap) = Meta $ M.adjust splitAuthors "author"
$ M.adjust toPlain "date"
$ M.adjust toPlain "title"
@@ -136,13 +136,13 @@ metaFromDefList ds meta = adjustAuthors $ foldr f meta ds
isSp LineBreak = True
isSp _ = False
splitOnSemi = splitBy (==Str ";")
- factorSemi (Str []) = []
- factorSemi (Str s) = case break (==';') s of
- (xs,[]) -> [Str xs]
- (xs,';':ys) -> Str xs : Str ";" :
- factorSemi (Str ys)
- (xs,ys) -> Str xs :
- factorSemi (Str ys)
+ factorSemi (Str "") = []
+ factorSemi (Str s) = case T.break (==';') s of
+ (xs,"") -> [Str xs]
+ (xs,T.uncons -> Just (';',ys)) -> Str xs : Str ";" :
+ factorSemi (Str ys)
+ (xs,ys) -> Str xs :
+ factorSemi (Str ys)
factorSemi x = [x]
parseRST :: PandocMonad m => RSTParser m Pandoc
@@ -151,7 +151,7 @@ parseRST = do
startPos <- getPosition
-- go through once just to get list of reference keys and notes
-- docMinusKeys is the raw document with blanks where the keys were...
- docMinusKeys <- concat <$>
+ docMinusKeys <- T.concat <$>
manyTill (referenceKey <|> anchorDef <|>
noteBlock <|> citationBlock <|>
(snd <$> withRaw comment) <|>
@@ -180,7 +180,7 @@ parseRST = do
return $ Pandoc meta' (blocks' ++ refBlock)
parseCitation :: PandocMonad m
- => (String, String) -> RSTParser m (Inlines, [Blocks])
+ => (Text, Text) -> RSTParser m (Inlines, [Blocks])
parseCitation (ref, raw) = do
contents <- parseFromString' parseBlocks raw
return (B.spanWith (ref, ["citation-label"], []) (B.str ref),
@@ -215,23 +215,23 @@ block = choice [ codeBlock
-- field list
--
-rawFieldListItem :: Monad m => Int -> RSTParser m (String, String)
+rawFieldListItem :: Monad m => Int -> RSTParser m (Text, Text)
rawFieldListItem minIndent = try $ do
indent <- length <$> many (char ' ')
guard $ indent >= minIndent
char ':'
- name <- many1Till (noneOf "\n") (char ':')
+ name <- many1TillChar (noneOf "\n") (char ':')
(() <$ lookAhead newline) <|> skipMany1 spaceChar
first <- anyLine
rest <- option "" $ try $ do lookAhead (count indent (char ' ') >> spaceChar)
indentedBlock
- let raw = (if null first then "" else (first ++ "\n")) ++ rest ++ "\n"
+ let raw = (if T.null first then "" else (first <> "\n")) <> rest <> "\n"
return (name, raw)
fieldListItem :: PandocMonad m => Int -> RSTParser m (Inlines, [Blocks])
fieldListItem minIndent = try $ do
(name, raw) <- rawFieldListItem minIndent
- term <- parseInlineFromString name
+ term <- parseInlineFromText name
contents <- parseFromString' parseBlocks raw
optional blanklines
return (term, [contents])
@@ -251,12 +251,12 @@ fieldList = try $ do
lineBlock :: PandocMonad m => RSTParser m Blocks
lineBlock = try $ do
lines' <- lineBlockLines
- lines'' <- mapM parseInlineFromString lines'
+ lines'' <- mapM parseInlineFromText lines'
return $ B.lineBlock lines''
-lineBlockDirective :: PandocMonad m => String -> RSTParser m Blocks
+lineBlockDirective :: PandocMonad m => Text -> RSTParser m Blocks
lineBlockDirective body = do
- lines' <- mapM parseInlineFromString $ lines $ stripTrailingNewlines body
+ lines' <- mapM parseInlineFromText $ T.lines $ stripTrailingNewlines body
return $ B.lineBlock lines'
--
@@ -271,9 +271,9 @@ para = try $ do
newline
blanklines
case viewr (B.unMany result) of
- ys :> Str xs | "::" `isSuffixOf` xs -> do
+ ys :> Str xs | "::" `T.isSuffixOf` xs -> do
raw <- option mempty codeBlockBody
- return $ B.para (B.Many ys <> B.str (take (length xs - 1) xs))
+ return $ B.para (B.Many ys <> B.str (T.take (T.length xs - 1) xs))
<> raw
_ -> return (B.para result)
@@ -349,7 +349,7 @@ singleHeader' = try $ do
-- hrule block
--
-hrule :: Monad m => ParserT [Char] st m Blocks
+hrule :: Monad m => ParserT Text st m Blocks
hrule = try $ do
chr <- oneOf underlineChars
count 3 (char chr)
@@ -364,7 +364,7 @@ hrule = try $ do
-- read a line indented by a given string
indentedLine :: (HasReaderOptions st, Monad m)
- => Int -> ParserT [Char] st m [Char]
+ => Int -> ParserT Text st m Text
indentedLine indents = try $ do
lookAhead spaceChar
gobbleAtMostSpaces indents
@@ -373,29 +373,29 @@ indentedLine indents = try $ do
-- one or more indented lines, possibly separated by blank lines.
-- any amount of indentation will work.
indentedBlock :: (HasReaderOptions st, Monad m)
- => ParserT [Char] st m [Char]
+ => ParserT Text st m Text
indentedBlock = try $ do
indents <- length <$> lookAhead (many1 spaceChar)
lns <- many1 $ try $ do b <- option "" blanklines
l <- indentedLine indents
- return (b ++ l)
+ return (b <> l)
optional blanklines
- return $ unlines lns
+ return $ T.unlines lns
-quotedBlock :: Monad m => ParserT [Char] st m [Char]
+quotedBlock :: Monad m => ParserT Text st m Text
quotedBlock = try $ do
quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
lns <- many1 $ lookAhead (char quote) >> anyLine
optional blanklines
- return $ unlines lns
+ return $ T.unlines lns
-codeBlockStart :: Monad m => ParserT [Char] st m Char
+codeBlockStart :: Monad m => ParserT Text st m Char
codeBlockStart = string "::" >> blankline >> blankline
-codeBlock :: (HasReaderOptions st, Monad m) => ParserT [Char] st m Blocks
+codeBlock :: (HasReaderOptions st, Monad m) => ParserT Text st m Blocks
codeBlock = try $ codeBlockStart >> codeBlockBody
-codeBlockBody :: (HasReaderOptions st, Monad m) => ParserT [Char] st m Blocks
+codeBlockBody :: (HasReaderOptions st, Monad m) => ParserT Text st m Blocks
codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$>
(indentedBlock <|> quotedBlock)
@@ -407,24 +407,24 @@ lhsCodeBlock = try $ do
lns <- latexCodeBlock <|> birdCodeBlock
blanklines
return $ B.codeBlockWith ("", ["haskell","literate"], [])
- $ intercalate "\n" lns
+ $ T.intercalate "\n" lns
-latexCodeBlock :: Monad m => ParserT [Char] st m [[Char]]
+latexCodeBlock :: Monad m => ParserT Text st m [Text]
latexCodeBlock = try $ do
try (latexBlockLine "\\begin{code}")
many1Till anyLine (try $ latexBlockLine "\\end{code}")
where
latexBlockLine s = skipMany spaceChar >> string s >> blankline
-birdCodeBlock :: Monad m => ParserT [Char] st m [[Char]]
+birdCodeBlock :: Monad m => ParserT Text st m [Text]
birdCodeBlock = filterSpace <$> many1 birdTrackLine
where filterSpace lns =
-- if (as is normal) there is always a space after >, drop it
- if all (\ln -> null ln || take 1 ln == " ") lns
- then map (drop 1) lns
+ if all (\ln -> T.null ln || T.take 1 ln == " ") lns
+ then map (T.drop 1) lns
else lns
-birdTrackLine :: Monad m => ParserT [Char] st m [Char]
+birdTrackLine :: Monad m => ParserT Text st m Text
birdTrackLine = char '>' >> anyLine
--
@@ -435,7 +435,7 @@ blockQuote :: PandocMonad m => RSTParser m Blocks
blockQuote = do
raw <- indentedBlock
-- parse the extracted block, which may contain various block elements:
- contents <- parseFromString' parseBlocks $ raw ++ "\n\n"
+ contents <- parseFromString' parseBlocks $ raw <> "\n\n"
return $ B.blockQuote contents
{-
@@ -445,12 +445,12 @@ encoding
-}
includeDirective :: PandocMonad m
- => String -> [(String, String)] -> String
+ => Text -> [(Text, Text)] -> Text
-> RSTParser m Blocks
includeDirective top fields body = do
let f = trim top
- guard $ not (null f)
- guard $ null (trim body)
+ guard $ not (T.null f)
+ guard $ T.null (trim body)
-- options
let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead
let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead
@@ -458,11 +458,11 @@ includeDirective top fields body = do
oldInput <- getInput
containers <- stateContainers <$> getState
when (f `elem` containers) $
- throwError $ PandocParseError $ "Include file loop at " ++ show oldPos
+ throwError $ PandocParseError $ "Include file loop at " <> tshow oldPos
updateState $ \s -> s{ stateContainers = f : stateContainers s }
- mbContents <- readFileFromDirs ["."] f
+ mbContents <- readFileFromDirs ["."] $ T.unpack f
contentLines <- case mbContents of
- Just s -> return $ lines s
+ Just s -> return $ T.lines s
Nothing -> do
logMessage $ CouldNotLoadIncludeFile f oldPos
return []
@@ -478,23 +478,23 @@ includeDirective top fields body = do
let contentLines' = drop (startLine' - 1)
$ take (endLine' - 1) contentLines
let contentLines'' = (case trim <$> lookup "end-before" fields of
- Just patt -> takeWhile (not . (patt `isInfixOf`))
+ Just patt -> takeWhile (not . (patt `T.isInfixOf`))
Nothing -> id) .
(case trim <$> lookup "start-after" fields of
Just patt -> drop 1 .
- dropWhile (not . (patt `isInfixOf`))
+ dropWhile (not . (patt `T.isInfixOf`))
Nothing -> id) $ contentLines'
- let contents' = unlines contentLines'' ++ "\n"
+ let contents' = T.unlines contentLines'' <> "\n"
case lookup "code" fields of
Just lang -> do
let numberLines = lookup "number-lines" fields
- let classes = maybe [] words (lookup "class" fields)
+ let classes = maybe [] T.words (lookup "class" fields)
let ident = maybe "" trimr $ lookup "name" fields
codeblock ident classes numberLines (trimr lang) contents' False
Nothing -> case lookup "literal" fields of
Just _ -> return $ B.rawBlock "rst" contents'
Nothing -> do
- setPosition $ newPos f 1 1
+ setPosition $ newPos (T.unpack f) 1 1
setInput contents'
bs <- optional blanklines >>
(mconcat <$> many block)
@@ -519,14 +519,14 @@ definitionListItem = try $ do
term <- trimInlines . mconcat <$> many1Till inline endline
raw <- indentedBlock
-- parse the extracted block, which may contain various block elements:
- contents <- parseFromString' parseBlocks $ raw ++ "\n"
+ contents <- parseFromString' parseBlocks $ raw <> "\n"
return (term, [contents])
definitionList :: PandocMonad m => RSTParser m Blocks
definitionList = B.definitionList <$> many1 definitionListItem
-- parses bullet list start and returns its length (inc. following whitespace)
-bulletListStart :: Monad m => ParserT [Char] st m Int
+bulletListStart :: Monad m => ParserT Text st m Int
bulletListStart = try $ do
notFollowedBy' hrule -- because hrules start out just like lists
marker <- oneOf bulletListMarkers
@@ -543,7 +543,7 @@ orderedListStart style delim = try $ do
return $ markerLen + length white
-- parse a line of a list item
-listLine :: Monad m => Int -> RSTParser m [Char]
+listLine :: Monad m => Int -> RSTParser m Text
listLine markerLength = try $ do
notFollowedBy blankline
indentWith markerLength
@@ -551,21 +551,21 @@ listLine markerLength = try $ do
-- parse raw text for one list item, excluding start marker and continuations
rawListItem :: Monad m => RSTParser m Int
- -> RSTParser m (Int, [Char])
+ -> RSTParser m (Int, Text)
rawListItem start = try $ do
markerLength <- start
firstLine <- anyLineNewline
restLines <- many (listLine markerLength)
- return (markerLength, firstLine ++ concat restLines)
+ return (markerLength, firstLine <> T.concat restLines)
-- continuation of a list item - indented and separated by blankline or
-- (in compact lists) endline.
-- Note: nested lists are parsed as continuations.
-listContinuation :: Monad m => Int -> RSTParser m [Char]
+listContinuation :: Monad m => Int -> RSTParser m Text
listContinuation markerLength = try $ do
- blanks <- many1 blankline
+ blanks <- many1Char blankline
result <- many1 (listLine markerLength)
- return $ blanks ++ concat result
+ return $ blanks <> T.concat result
listItem :: PandocMonad m
=> RSTParser m Int
@@ -581,7 +581,7 @@ listItem start = try $ do
let oldContext = stateParserContext state
setState $ state {stateParserContext = ListItemState}
-- parse the extracted block, which may itself contain block elements
- parsed <- parseFromString' parseBlocks $ concat (first:rest) ++ "\n"
+ parsed <- parseFromString' parseBlocks $ T.concat (first:rest) <> "\n"
updateState (\st -> st {stateParserContext = oldContext})
return $ case B.toList parsed of
[Para xs] ->
@@ -617,9 +617,9 @@ comment = try $ do
optional indentedBlock
return mempty
-directiveLabel :: Monad m => RSTParser m String
-directiveLabel = map toLower
- <$> many1Till (letter <|> char '-') (try $ string "::")
+directiveLabel :: Monad m => RSTParser m Text
+directiveLabel = T.toLower
+ <$> many1TillChar (letter <|> char '-') (try $ string "::")
directive :: PandocMonad m => RSTParser m Blocks
directive = try $ do
@@ -631,7 +631,7 @@ directive' = do
skipMany1 spaceChar
label <- directiveLabel
skipMany spaceChar
- top <- many $ satisfy (/='\n')
+ top <- manyChar $ satisfy (/='\n')
<|> try (char '\n' <*
notFollowedBy' (rawFieldListItem 1) <*
many1 (char ' ') <*
@@ -644,35 +644,33 @@ directive' = do
else many $ rawFieldListItem fieldIndent
body <- option "" $ try $ blanklines >> indentedBlock
optional blanklines
- let body' = body ++ "\n\n"
+ let body' = body <> "\n\n"
name = trim $ fromMaybe "" (lookup "name" fields)
- classes = words $ maybe "" trim (lookup "class" fields)
+ classes = T.words $ maybe "" trim (lookup "class" fields)
keyvals = [(k, trim v) | (k, v) <- fields, k /= "name", k /= "class"]
imgAttr cl = (name, classes ++ alignClasses, widthAttr ++ heightAttr)
where
- alignClasses = words $ maybe "" trim (lookup cl fields) ++
- maybe "" (\x -> "align-" ++ trim x)
+ alignClasses = T.words $ maybe "" trim (lookup cl fields) <>
+ maybe "" (\x -> "align-" <> trim x)
(lookup "align" fields)
scale = case trim <$> lookup "scale" fields of
- Just v -> case reverse v of
- '%':vv ->
- case safeRead (reverse vv) of
- Just (percent :: Double)
- -> percent / 100.0
- Nothing -> 1.0
- _ ->
- case safeRead v of
- Just (s :: Double) -> s
- Nothing -> 1.0
- Nothing -> 1.0
+ Just v -> case T.unsnoc v of
+ Just (vv, '%') -> case safeRead vv of
+ Just (percent :: Double)
+ -> percent / 100.0
+ Nothing -> 1.0
+ _ -> case safeRead v of
+ Just (s :: Double) -> s
+ Nothing -> 1.0
+ Nothing -> 1.0
widthAttr = maybe [] (\x -> [("width",
- show $ scaleDimension scale x)])
+ tshow $ scaleDimension scale x)])
$ lookup "width" fields >>=
- (lengthToDim . filter (not . isSpace))
+ (lengthToDim . T.filter (not . isSpace))
heightAttr = maybe [] (\x -> [("height",
- show $ scaleDimension scale x)])
+ tshow $ scaleDimension scale x)])
$ lookup "height" fields >>=
- (lengthToDim . filter (not . isSpace))
+ (lengthToDim . T.filter (not . isSpace))
case label of
"include" -> includeDirective top fields body'
"table" -> tableDirective top fields body'
@@ -682,36 +680,37 @@ directive' = do
"raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body)
"role" -> addNewRole top $ map (second trim) fields
"container" -> B.divWith
- (name, "container" : words top ++ classes, []) <$>
+ (name, "container" : T.words top ++ classes, []) <$>
parseFromString' parseBlocks body'
"replace" -> B.para <$> -- consumed by substKey
- parseInlineFromString (trim top)
+ parseInlineFromText (trim top)
"unicode" -> B.para <$> -- consumed by substKey
- parseInlineFromString (trim $ unicodeTransform top)
+ parseInlineFromText (trim $ unicodeTransform top)
"compound" -> parseFromString' parseBlocks body'
"pull-quote" -> B.blockQuote <$> parseFromString' parseBlocks body'
"epigraph" -> B.blockQuote <$> parseFromString' parseBlocks body'
"highlights" -> B.blockQuote <$> parseFromString' parseBlocks body'
- "rubric" -> B.para . B.strong <$> parseInlineFromString top
+ "rubric" -> B.para . B.strong <$> parseInlineFromText top
_ | label `elem` ["attention","caution","danger","error","hint",
"important","note","tip","warning","admonition"] ->
- do bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body'
+ do bod <- parseFromString' parseBlocks $ top <> "\n\n" <> body'
let lab = case label of
- "admonition" -> mempty
- (l:ls) -> B.divWith ("",["title"],[])
- (B.para (B.str (toUpper l : ls)))
- [] -> mempty
+ "admonition" -> mempty
+ (T.uncons -> Just (l, ls))
+ -> B.divWith ("",["title"],[])
+ (B.para (B.str $ T.cons (toUpper l) ls))
+ _ -> mempty
return $ B.divWith (name,label:classes,keyvals) (lab <> bod)
"sidebar" ->
do let subtit = maybe "" trim $ lookup "subtitle" fields
- tit <- B.para . B.strong <$> parseInlineFromString
- (trim top ++ if null subtit
+ tit <- B.para . B.strong <$> parseInlineFromText
+ (trim top <> if T.null subtit
then ""
- else (": " ++ subtit))
+ else (": " <> subtit))
bod <- parseFromString' parseBlocks body'
return $ B.divWith (name,"sidebar":classes,keyvals) $ tit <> bod
"topic" ->
- do tit <- B.para . B.strong <$> parseInlineFromString top
+ do tit <- B.para . B.strong <$> parseInlineFromText top
bod <- parseFromString' parseBlocks body'
return $ B.divWith (name,"topic":classes,keyvals) $ tit <> bod
"default-role" -> mempty <$ updateState (\s ->
@@ -726,7 +725,7 @@ directive' = do
let attribs = (name, ["aafig"], map (second trimr) fields)
return $ B.codeBlockWith attribs $ stripTrailingNewlines body
"math" -> return $ B.para $ mconcat $ map B.displayMath
- $ toChunks $ top ++ "\n\n" ++ body
+ $ toChunks $ top <> "\n\n" <> body
"figure" -> do
(caption, legend) <- parseFromString' extractCaption body'
let src = escapeURI $ trim top
@@ -742,7 +741,7 @@ directive' = do
$ B.imageWith attr src "" alt
Nothing -> B.imageWith attr src "" alt
"class" -> do
- let attrs = (name, words (trim top), map (second trimr) fields)
+ let attrs = (name, T.words (trim top), map (second trimr) fields)
-- directive content or the first immediately following element
children <- case body of
"" -> block
@@ -750,12 +749,12 @@ directive' = do
return $ B.divWith attrs children
other -> do
pos <- getPosition
- logMessage $ SkippedContent (".. " ++ other) pos
- bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body'
+ logMessage $ SkippedContent (".. " <> other) pos
+ bod <- parseFromString' parseBlocks $ top <> "\n\n" <> body'
return $ B.divWith (name, other:classes, keyvals) bod
tableDirective :: PandocMonad m
- => String -> [(String, String)] -> String -> RSTParser m Blocks
+ => Text -> [(Text, Text)] -> Text -> RSTParser m Blocks
tableDirective top fields body = do
bs <- parseFromString' parseBlocks body
case B.toList bs of
@@ -770,7 +769,7 @@ tableDirective top fields body = do
Just "grid" -> widths'
Just specs -> normWidths
$ map (fromMaybe (0 :: Double) . safeRead)
- $ splitBy (`elem` (" ," :: String)) specs
+ $ splitTextBy (`elem` (" ," :: String)) specs
Nothing -> widths'
-- align is not applicable since we can't represent whole table align
return $ B.singleton $ Table (B.toList title)
@@ -783,7 +782,7 @@ tableDirective top fields body = do
-- since Pandoc doesn't support a table with multiple header rows.
-- We don't need to parse :align: as it represents the whole table align.
listTableDirective :: PandocMonad m
- => String -> [(String, String)] -> String
+ => Text -> [(Text, Text)] -> Text
-> RSTParser m Blocks
listTableDirective top fields body = do
bs <- parseFromString' parseBlocks body
@@ -799,7 +798,7 @@ listTableDirective top fields body = do
widths = case trim <$> lookup "widths" fields of
Just "auto" -> replicate numOfCols 0
Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) $
- splitBy (`elem` (" ," :: String)) specs
+ splitTextBy (`elem` (" ," :: String)) specs
_ -> replicate numOfCols 0
return $ B.table title
(zip (replicate numOfCols AlignDefault) widths)
@@ -812,7 +811,7 @@ listTableDirective top fields body = do
normWidths ws = map (/ max 1 (sum ws)) ws
csvTableDirective :: PandocMonad m
- => String -> [(String, String)] -> String
+ => Text -> [(Text, Text)] -> Text
-> RSTParser m Blocks
csvTableDirective top fields rawcsv = do
let explicitHeader = trim <$> lookup "header" fields
@@ -820,14 +819,17 @@ csvTableDirective top fields rawcsv = do
csvDelim = case trim <$> lookup "delim" fields of
Just "tab" -> '\t'
Just "space" -> ' '
- Just [c] -> c
+ Just (T.unpack -> [c])
+ -> c
_ -> ','
, csvQuote = case trim <$> lookup "quote" fields of
- Just [c] -> c
- _ -> '"'
+ Just (T.unpack -> [c])
+ -> c
+ _ -> '"'
, csvEscape = case trim <$> lookup "escape" fields of
- Just [c] -> Just c
- _ -> Nothing
+ Just (T.unpack -> [c])
+ -> Just c
+ _ -> Nothing
, csvKeepSpace = case trim <$> lookup "keepspace" fields of
Just "true" -> True
_ -> False
@@ -840,16 +842,16 @@ csvTableDirective top fields rawcsv = do
lookup "file" fields `mplus` lookup "url" fields of
Just u -> do
(bs, _) <- fetchItem u
- return $ UTF8.toString bs
+ return $ UTF8.toText bs
Nothing -> return rawcsv
- let res = parseCSV opts (T.pack $ case explicitHeader of
- Just h -> h ++ "\n" ++ rawcsv'
- Nothing -> rawcsv')
+ let res = parseCSV opts (case explicitHeader of
+ Just h -> h <> "\n" <> rawcsv'
+ Nothing -> rawcsv')
case res of
Left e ->
throwError $ PandocParsecError "csv table" e
Right rawrows -> do
- let parseCell = parseFromString' (plain <|> return mempty) . T.unpack
+ let parseCell = parseFromString' (plain <|> return mempty)
let parseRow = mapM parseCell
rows <- mapM parseRow rawrows
let (headerRow,bodyRows,numOfCols) =
@@ -865,7 +867,7 @@ csvTableDirective top fields rawcsv = do
Just "auto" -> replicate numOfCols 0
Just specs -> normWidths
$ map (fromMaybe (0 :: Double) . safeRead)
- $ splitBy (`elem` (" ," :: String)) specs
+ $ splitTextBy (`elem` (" ," :: String)) specs
_ -> replicate numOfCols 0
return $ B.table title
(zip (replicate numOfCols AlignDefault) widths)
@@ -876,10 +878,10 @@ csvTableDirective top fields rawcsv = do
-- - Only supports :format: fields with a single format for :raw: roles,
-- change Text.Pandoc.Definition.Format to fix
addNewRole :: PandocMonad m
- => String -> [(String, String)] -> RSTParser m Blocks
-addNewRole roleString fields = do
+ => Text -> [(Text, Text)] -> RSTParser m Blocks
+addNewRole roleText fields = do
pos <- getPosition
- (role, parentRole) <- parseFromString' inheritedRole roleString
+ (role, parentRole) <- parseFromString' inheritedRole roleText
customRoles <- stateRstCustomRoles <$> getState
let getBaseRole (r, f, a) roles =
case M.lookup r roles of
@@ -888,7 +890,7 @@ addNewRole roleString fields = do
(baseRole, baseFmt, baseAttr) =
getBaseRole (parentRole, Nothing, nullAttr) customRoles
fmt = if parentRole == "raw" then lookup "format" fields else baseFmt
- annotate :: [String] -> [String]
+ annotate :: [Text] -> [Text]
annotate = maybe id (:) $
if baseRole == "code"
then lookup "language" fields
@@ -904,7 +906,7 @@ addNewRole roleString fields = do
pos
"format" -> when (baseRole /= "raw") $ logMessage $
SkippedContent ":format: [because parent of role is not :raw:]" pos
- _ -> logMessage $ SkippedContent (":" ++ key ++ ":") pos
+ _ -> logMessage $ SkippedContent (":" <> key <> ":") pos
when (parentRole == "raw" && countKeys "format" > 1) $
logMessage $ SkippedContent
":format: [after first in definition of role]"
@@ -930,30 +932,29 @@ addNewRole roleString fields = do
-- hexadecimal numbers, prefixed by 0x, x, \x, U+, u, or \u
-- or as XML-style hexadecimal character entities, e.g. &#x1a2b;
-- or text, which is used as-is. Comments start with ..
-unicodeTransform :: String -> String
-unicodeTransform t =
- case t of
- ('.':'.':xs) -> unicodeTransform $ dropWhile (/='\n') xs -- comment
- ('0':'x':xs) -> go "0x" xs
- ('x':xs) -> go "x" xs
- ('\\':'x':xs) -> go "\\x" xs
- ('U':'+':xs) -> go "U+" xs
- ('u':xs) -> go "u" xs
- ('\\':'u':xs) -> go "\\u" xs
- ('&':'#':'x':xs) -> maybe ("&#x" ++ unicodeTransform xs)
- -- drop semicolon
- (\(c,s) -> c : unicodeTransform (drop 1 s))
- $ extractUnicodeChar xs
- (x:xs) -> x : unicodeTransform xs
- [] -> []
- where go pref zs = maybe (pref ++ unicodeTransform zs)
- (\(c,s) -> c : unicodeTransform s)
- $ extractUnicodeChar zs
-
-extractUnicodeChar :: String -> Maybe (Char, String)
+unicodeTransform :: Text -> Text
+unicodeTransform t
+ | Just xs <- T.stripPrefix ".." t = unicodeTransform $ T.dropWhile (/= '\n') xs -- comment
+ | Just xs <- T.stripPrefix "0x" t = go "0x" xs
+ | Just xs <- T.stripPrefix "x" t = go "x" xs
+ | Just xs <- T.stripPrefix "\\x" t = go "\\x" xs
+ | Just xs <- T.stripPrefix "U+" t = go "U+" xs
+ | Just xs <- T.stripPrefix "u" t = go "u" xs
+ | Just xs <- T.stripPrefix "\\u" t = go "\\u" xs
+ | Just xs <- T.stripPrefix "&#x" t = maybe ("&#x" <> unicodeTransform xs)
+ -- drop semicolon
+ (\(c,s) -> T.cons c $ unicodeTransform $ T.drop 1 s)
+ $ extractUnicodeChar xs
+ | Just (x, xs) <- T.uncons t = T.cons x $ unicodeTransform xs
+ | otherwise = ""
+ where go pref zs = maybe (pref <> unicodeTransform zs)
+ (\(c,s) -> T.cons c $ unicodeTransform s)
+ $ extractUnicodeChar zs
+
+extractUnicodeChar :: Text -> Maybe (Char, Text)
extractUnicodeChar s = maybe Nothing (\c -> Just (c,rest)) mbc
- where (ds,rest) = span isHexDigit s
- mbc = safeRead ('\'':'\\':'x':ds ++ "'")
+ where (ds,rest) = T.span isHexDigit s
+ mbc = safeRead ("'\\x" <> ds <> "'")
extractCaption :: PandocMonad m => RSTParser m (Inlines, Blocks)
extractCaption = do
@@ -963,16 +964,16 @@ extractCaption = do
-- divide string by blanklines, and surround with
-- \begin{aligned}...\end{aligned} if needed.
-toChunks :: String -> [String]
-toChunks = dropWhile null
- . map (addAligned . trim . unlines)
- . splitBy (all (`elem` (" \t" :: String))) . lines
+toChunks :: Text -> [Text]
+toChunks = dropWhile T.null
+ . map (addAligned . trim . T.unlines)
+ . splitBy (T.all (`elem` (" \t" :: String))) . T.lines
-- we put this in an aligned environment if it contains \\, see #4254
- where addAligned s = if "\\\\" `isInfixOf` s
- then "\\begin{aligned}\n" ++ s ++ "\n\\end{aligned}"
+ where addAligned s = if "\\\\" `T.isInfixOf` s
+ then "\\begin{aligned}\n" <> s <> "\n\\end{aligned}"
else s
-codeblock :: String -> [String] -> Maybe String -> String -> String -> Bool
+codeblock :: Text -> [Text] -> Maybe Text -> Text -> Text -> Bool
-> RSTParser m Blocks
codeblock ident classes numberLines lang body rmTrailingNewlines =
return $ B.codeBlockWith attribs $ stripTrailingNewlines' body
@@ -984,7 +985,7 @@ codeblock ident classes numberLines lang body rmTrailingNewlines =
: maybe [] (const ["numberLines"]) numberLines
++ classes
kvs = maybe [] (\n -> case trimr n of
- [] -> []
+ "" -> []
xs -> [("startFrom", xs)])
numberLines
@@ -992,25 +993,25 @@ codeblock ident classes numberLines lang body rmTrailingNewlines =
--- note block
---
-noteBlock :: Monad m => RSTParser m [Char]
+noteBlock :: Monad m => RSTParser m Text
noteBlock = try $ do
(ref, raw, replacement) <- noteBlock' noteMarker
updateState $ \s -> s { stateNotes = (ref, raw) : stateNotes s }
-- return blanks so line count isn't affected
return replacement
-citationBlock :: Monad m => RSTParser m [Char]
+citationBlock :: Monad m => RSTParser m Text
citationBlock = try $ do
(ref, raw, replacement) <- noteBlock' citationMarker
updateState $ \s ->
s { stateCitations = M.insert ref raw (stateCitations s),
- stateKeys = M.insert (toKey ref) (('#':ref,""), ("",["citation"],[]))
+ stateKeys = M.insert (toKey ref) (("#" <> ref,""), ("",["citation"],[]))
(stateKeys s) }
-- return blanks so line count isn't affected
return replacement
noteBlock' :: Monad m
- => RSTParser m String -> RSTParser m (String, String, String)
+ => RSTParser m Text -> RSTParser m (Text, Text, Text)
noteBlock' marker = try $ do
startPos <- getPosition
string ".."
@@ -1021,24 +1022,24 @@ noteBlock' marker = try $ do
blanks <- option "" blanklines
rest <- option "" indentedBlock
endPos <- getPosition
- let raw = first ++ "\n" ++ blanks ++ rest ++ "\n"
- let replacement =replicate (sourceLine endPos - sourceLine startPos) '\n'
+ let raw = first <> "\n" <> blanks <> rest <> "\n"
+ let replacement = T.replicate (sourceLine endPos - sourceLine startPos) "\n"
return (ref, raw, replacement)
-citationMarker :: Monad m => RSTParser m [Char]
+citationMarker :: Monad m => RSTParser m Text
citationMarker = do
char '['
res <- simpleReferenceName
char ']'
return res
-noteMarker :: Monad m => RSTParser m [Char]
+noteMarker :: Monad m => RSTParser m Text
noteMarker = do
char '['
- res <- many1 digit
+ res <- many1Char digit
<|>
- try (char '#' >> liftM ('#':) simpleReferenceName)
- <|> count 1 (oneOf "#*")
+ try (char '#' >> liftM ("#" <>) simpleReferenceName)
+ <|> countChar 1 (oneOf "#*")
char ']'
return res
@@ -1046,47 +1047,48 @@ noteMarker = do
-- reference key
--
-quotedReferenceName :: PandocMonad m => RSTParser m String
+quotedReferenceName :: PandocMonad m => RSTParser m Text
quotedReferenceName = try $ do
char '`' >> notFollowedBy (char '`') -- `` means inline code!
- manyTill anyChar (char '`')
+ manyTillChar anyChar (char '`')
-- Simple reference names are single words consisting of alphanumerics
-- plus isolated (no two adjacent) internal hyphens, underscores,
-- periods, colons and plus signs; no whitespace or other characters
-- are allowed.
-simpleReferenceName :: Monad m => ParserT [Char] st m String
+simpleReferenceName :: Monad m => ParserT Text st m Text
simpleReferenceName = do
x <- alphaNum
xs <- many $ alphaNum
<|> try (oneOf "-_:+." <* lookAhead alphaNum)
- return (x:xs)
+ return $ T.pack (x:xs)
-referenceName :: PandocMonad m => RSTParser m String
+referenceName :: PandocMonad m => RSTParser m Text
referenceName = quotedReferenceName <|> simpleReferenceName
-referenceKey :: PandocMonad m => RSTParser m [Char]
+referenceKey :: PandocMonad m => RSTParser m Text
referenceKey = do
startPos <- getPosition
choice [substKey, anonymousKey, regularKey]
optional blanklines
endPos <- getPosition
-- return enough blanks to replace key
- return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+ return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
-targetURI :: Monad m => ParserT [Char] st m [Char]
+targetURI :: Monad m => ParserT Text st m Text
targetURI = do
skipSpaces
optional $ try $ newline >> notFollowedBy blankline
contents <- trim <$>
- many1 (satisfy (/='\n')
+ many1Char (satisfy (/='\n')
<|> try (newline >> many1 spaceChar >> noneOf " \t\n"))
blanklines
- case reverse contents of
- -- strip backticks
- '_':'`':xs -> return (dropWhile (=='`') (reverse xs) ++ "_")
- '_':_ -> return contents
- _ -> return (escapeURI contents)
+ return $ stripBackticks contents
+ where
+ stripBackticks t
+ | Just xs <- T.stripSuffix "`_" t = T.dropWhile (=='`') xs <> "_"
+ | Just _ <- T.stripSuffix "_" t = t
+ | otherwise = escapeURI t
substKey :: PandocMonad m => RSTParser m ()
substKey = try $ do
@@ -1112,21 +1114,21 @@ anonymousKey = try $ do
oneOfStrings [".. __:", "__"]
src <- targetURI
pos <- getPosition
- let key = toKey $ "_" ++ printf "%09d" (sourceLine pos)
+ let key = toKey $ "_" <> T.pack (printf "%09d" (sourceLine pos))
updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
stateKeys s }
-referenceNames :: PandocMonad m => RSTParser m [String]
+referenceNames :: PandocMonad m => RSTParser m [Text]
referenceNames = do
let rn = try $ do
string ".. _"
ref <- quotedReferenceName
- <|> many ( noneOf ":\n"
- <|> try (char '\n' <*
- string " " <*
- notFollowedBy blankline)
- <|> try (char ':' <* lookAhead alphaNum)
- )
+ <|> manyChar ( noneOf ":\n"
+ <|> try (char '\n' <*
+ string " " <*
+ notFollowedBy blankline)
+ <|> try (char ':' <* lookAhead alphaNum)
+ )
char ':'
return ref
first <- rn
@@ -1140,18 +1142,18 @@ regularKey = try $ do
-- .. _goodbye: url.com
refs <- referenceNames
src <- targetURI
- guard $ not (null src)
+ guard $ not (T.null src)
let keys = map toKey refs
forM_ keys $ \key ->
updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
stateKeys s }
-anchorDef :: PandocMonad m => RSTParser m [Char]
+anchorDef :: PandocMonad m => RSTParser m Text
anchorDef = try $ do
(refs, raw) <- withRaw $ try (referenceNames <* blanklines)
forM_ refs $ \rawkey ->
updateState $ \s -> s { stateKeys =
- M.insert (toKey rawkey) (('#':rawkey,""), nullAttr) $ stateKeys s }
+ M.insert (toKey rawkey) (("#" <> rawkey,""), nullAttr) $ stateKeys s }
-- keep this for 2nd round of parsing, where we'll add the divs (anchor)
return raw
@@ -1174,12 +1176,12 @@ anchor = try $ do
-- because it hides them from promoteHeader, see #4240
_ -> return $ foldr addDiv b refs
-headerBlock :: PandocMonad m => RSTParser m [Char]
+headerBlock :: PandocMonad m => RSTParser m Text
headerBlock = do
((txt, _), raw) <- withRaw (doubleHeader' <|> singleHeader')
(ident,_,_) <- registerHeader nullAttr txt
let key = toKey (stringify txt)
- updateState $ \s -> s { stateKeys = M.insert key (('#':ident,""), nullAttr)
+ updateState $ \s -> s { stateKeys = M.insert key (("#" <> ident,""), nullAttr)
$ stateKeys s }
return raw
@@ -1201,13 +1203,13 @@ headerBlock = do
-- Grid tables TODO:
-- - column spans
-dashedLine :: Monad m => Char -> ParserT [Char] st m (Int, Int)
+dashedLine :: Monad m => Char -> ParserT Text st m (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many (char ' ')
return (length dashes, length $ dashes ++ sp)
-simpleDashedLines :: Monad m => Char -> ParserT [Char] st m [(Int,Int)]
+simpleDashedLines :: Monad m => Char -> ParserT Text st m [(Int,Int)]
simpleDashedLines ch = try $ many1 (dashedLine ch)
-- Parse a table row separator
@@ -1215,17 +1217,17 @@ simpleTableSep :: Monad m => Char -> RSTParser m Char
simpleTableSep ch = try $ simpleDashedLines ch >> newline
-- Parse a table footer
-simpleTableFooter :: Monad m => RSTParser m [Char]
+simpleTableFooter :: Monad m => RSTParser m Text
simpleTableFooter = try $ simpleTableSep '=' >> blanklines
-- Parse a raw line and split it into chunks by indices.
-simpleTableRawLine :: Monad m => [Int] -> RSTParser m [String]
+simpleTableRawLine :: Monad m => [Int] -> RSTParser m [Text]
simpleTableRawLine indices = simpleTableSplitLine indices <$> anyLine
-simpleTableRawLineWithEmptyCell :: Monad m => [Int] -> RSTParser m [String]
+simpleTableRawLineWithEmptyCell :: Monad m => [Int] -> RSTParser m [Text]
simpleTableRawLineWithEmptyCell indices = try $ do
cs <- simpleTableRawLine indices
- let isEmptyCell = all (\c -> c == ' ' || c == '\t')
+ let isEmptyCell = T.all (\c -> c == ' ' || c == '\t')
guard $ any isEmptyCell cs
return cs
@@ -1235,15 +1237,15 @@ simpleTableRow indices = do
notFollowedBy' simpleTableFooter
firstLine <- simpleTableRawLine indices
conLines <- many $ simpleTableRawLineWithEmptyCell indices
- let cols = map unlines . transpose $ firstLine : conLines ++
- [replicate (length indices) ""
- | not (null conLines)]
+ let cols = map T.unlines . transpose $ firstLine : conLines ++
+ [replicate (length indices) ""
+ | not (null conLines)]
mapM (parseFromString' parseBlocks) cols
-simpleTableSplitLine :: [Int] -> String -> [String]
+simpleTableSplitLine :: [Int] -> Text -> [Text]
simpleTableSplitLine indices line =
map trimr
- $ tail $ splitByIndices (init indices) line
+ $ tail $ splitTextByIndices (init indices) line
simpleTableHeader :: PandocMonad m
=> Bool -- ^ Headerless table
@@ -1322,35 +1324,35 @@ inlineContent = choice [ whitespace
, escapedChar
, symbol ] <?> "inline content"
-parseInlineFromString :: PandocMonad m => String -> RSTParser m Inlines
-parseInlineFromString = parseFromString' (trimInlines . mconcat <$> many inline)
+parseInlineFromText :: PandocMonad m => Text -> RSTParser m Inlines
+parseInlineFromText = parseFromString' (trimInlines . mconcat <$> many inline)
hyphens :: Monad m => RSTParser m Inlines
hyphens = do
- result <- many1 (char '-')
+ result <- many1Char (char '-')
optional endline
-- don't want to treat endline after hyphen or dash as a space
return $ B.str result
-escapedChar :: Monad m => ParserT [Char] st m Inlines
+escapedChar :: Monad m => ParserT Text st m Inlines
escapedChar = do c <- escaped anyChar
return $ if c == ' ' || c == '\n' || c == '\r'
-- '\ ' is null in RST
then mempty
- else B.str [c]
+ else B.str $ T.singleton c
symbol :: Monad m => RSTParser m Inlines
symbol = do
result <- oneOf specialChars
- return $ B.str [result]
+ return $ B.str $ T.singleton result
-- parses inline code, between codeStart and codeEnd
code :: Monad m => RSTParser m Inlines
code = try $ do
string "``"
- result <- manyTill anyChar (try (string "``"))
+ result <- manyTillChar anyChar (try (string "``"))
return $ B.code
- $ trim $ unwords $ lines result
+ $ trim $ T.unwords $ T.lines result
-- succeeds only if we're not right after a str (ie. in middle of word)
atStart :: Monad m => RSTParser m a -> RSTParser m a
@@ -1382,7 +1384,7 @@ interpretedRole = try $ do
renderRole contents Nothing role nullAttr
renderRole :: PandocMonad m
- => String -> Maybe String -> String -> Attr -> RSTParser m Inlines
+ => Text -> Maybe Text -> Text -> Attr -> RSTParser m Inlines
renderRole contents fmt role attr = case role of
"sup" -> return $ B.superscript $ treatAsText contents
"superscript" -> return $ B.superscript $ treatAsText contents
@@ -1412,36 +1414,36 @@ renderRole contents fmt role attr = case role of
contents
where
titleRef ref = return $ B.spanWith ("",["title-ref"],[]) $ treatAsText ref
- rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo)
- where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html"
- pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo)
- where padNo = replicate (4 - length pepNo) '0' ++ pepNo
- pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/"
+ rfcLink rfcNo = B.link rfcUrl ("RFC " <> rfcNo) $ B.str ("RFC " <> rfcNo)
+ where rfcUrl = "http://www.faqs.org/rfcs/rfc" <> rfcNo <> ".html"
+ pepLink pepNo = B.link pepUrl ("PEP " <> pepNo) $ B.str ("PEP " <> pepNo)
+ where padNo = T.replicate (4 - T.length pepNo) "0" <> pepNo
+ pepUrl = "http://www.python.org/dev/peps/pep-" <> padNo <> "/"
treatAsText = B.text . handleEscapes
- handleEscapes [] = []
- handleEscapes ('\\':' ':cs) = handleEscapes cs
- handleEscapes ('\\':c:cs) = c : handleEscapes cs
- handleEscapes (c:cs) = c : handleEscapes cs
+ handleEscapes = T.concat . removeSpace . T.splitOn "\\"
+ where headSpace t = fromMaybe t $ T.stripPrefix " " t
+ removeSpace (x:xs) = x : map headSpace xs
+ removeSpace [] = []
-roleName :: PandocMonad m => RSTParser m String
-roleName = many1 (letter <|> char '-')
+roleName :: PandocMonad m => RSTParser m Text
+roleName = many1Char (letter <|> char '-')
-roleMarker :: PandocMonad m => RSTParser m String
+roleMarker :: PandocMonad m => RSTParser m Text
roleMarker = char ':' *> roleName <* char ':'
-roleBefore :: PandocMonad m => RSTParser m (String,String)
+roleBefore :: PandocMonad m => RSTParser m (Text,Text)
roleBefore = try $ do
role <- roleMarker
contents <- unmarkedInterpretedText
return (role,contents)
-roleAfter :: PandocMonad m => RSTParser m (String,String)
+roleAfter :: PandocMonad m => RSTParser m (Text,Text)
roleAfter = try $ do
contents <- unmarkedInterpretedText
role <- roleMarker <|> (stateRstDefaultRole <$> getState)
return (role,contents)
-unmarkedInterpretedText :: PandocMonad m => RSTParser m [Char]
+unmarkedInterpretedText :: PandocMonad m => RSTParser m Text
unmarkedInterpretedText = try $ do
atStart (char '`')
contents <- mconcat <$> (many1
@@ -1453,7 +1455,7 @@ unmarkedInterpretedText = try $ do
lookAhead (satisfy isAlphaNum))
))
char '`'
- return contents
+ return $ T.pack contents
whitespace :: PandocMonad m => RSTParser m Inlines
whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace"
@@ -1461,7 +1463,7 @@ whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace"
str :: Monad m => RSTParser m Inlines
str = do
let strChar = noneOf ("\t\n " ++ specialChars)
- result <- many1 strChar
+ result <- many1Char strChar
updateLastStrPos
return $ B.str result
@@ -1489,7 +1491,7 @@ explicitLink = try $ do
notFollowedBy (char '`') -- `` marks start of inline code
label' <- trimInlines . mconcat <$>
manyTill (notFollowedBy (char '`') >> inlineContent) (char '<')
- src <- trim <$> manyTill (noneOf ">\n") (char '>')
+ src <- trim <$> manyTillChar (noneOf ">\n") (char '>')
skipSpaces
string "`_"
optional $ char '_' -- anonymous form
@@ -1501,22 +1503,22 @@ explicitLink = try $ do
if isURI src
then return ((src, ""), nullAttr)
else
- case reverse src of
- '_':xs -> lookupKey [] (toKey (reverse xs))
- _ -> return ((src, ""), nullAttr)
+ case T.unsnoc src of
+ Just (xs, '_') -> lookupKey [] (toKey xs)
+ _ -> return ((src, ""), nullAttr)
return $ B.linkWith attr (escapeURI src') tit label''
-citationName :: PandocMonad m => RSTParser m String
+citationName :: PandocMonad m => RSTParser m Text
citationName = do
raw <- citationMarker
- return $ "[" ++ raw ++ "]"
+ return $ "[" <> raw <> "]"
referenceLink :: PandocMonad m => RSTParser m Inlines
referenceLink = try $ do
ref <- (referenceName <|> citationName) <* char '_'
let label' = B.text ref
- let isAnonKey (Key ('_':_)) = True
- isAnonKey _ = False
+ let isAnonKey (Key (T.uncons -> Just ('_',_))) = True
+ isAnonKey _ = False
state <- getState
let keyTable = stateKeys state
key <- option (toKey ref) $
@@ -1533,7 +1535,7 @@ referenceLink = try $ do
-- We keep a list of oldkeys so we can detect lookup loops.
lookupKey :: PandocMonad m
- => [Key] -> Key -> RSTParser m ((String, String), Attr)
+ => [Key] -> Key -> RSTParser m ((Text, Text), Attr)
lookupKey oldkeys key = do
pos <- getPosition
state <- getState
@@ -1544,8 +1546,8 @@ lookupKey oldkeys key = do
logMessage $ ReferenceNotFound key' pos
return (("",""),nullAttr)
-- check for keys of the form link_, which need to be resolved:
- Just ((u@(c:_),""),_) | last u == '_', c /= '#' -> do
- let rawkey = init u
+ Just ((u, ""),_) | T.length u > 1, T.last u == '_', T.head u /= '#' -> do
+ let rawkey = T.init u
let newkey = toKey rawkey
if newkey `elem` oldkeys
then do
@@ -1576,7 +1578,7 @@ subst = try $ do
case M.lookup key substTable of
Nothing -> do
pos <- getPosition
- logMessage $ ReferenceNotFound (show key) pos
+ logMessage $ ReferenceNotFound (tshow key) pos
return mempty
Just target -> return target
diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs
index 6519587c6..73122cc14 100644
--- a/src/Text/Pandoc/Readers/Roff.hs
+++ b/src/Text/Pandoc/Readers/Roff.hs
@@ -1,5 +1,7 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.Roff
Copyright : Copyright (C) 2018-2019 Yan Pashkovsky and John MacFarlane
@@ -21,7 +23,7 @@ module Text.Pandoc.Readers.Roff
, TableRow
, RoffToken(..)
, RoffTokens(..)
- , linePartsToString
+ , linePartsToText
, lexRoff
)
where
@@ -40,7 +42,7 @@ import qualified Data.Text as T
import Text.Pandoc.Logging (LogMessage(..))
import Text.Pandoc.Options
import Text.Pandoc.Parsing
-import Text.Pandoc.Shared (safeRead, substitute)
+import Text.Pandoc.Shared (safeRead)
import Text.Parsec hiding (tokenPrim)
import Text.Pandoc.RoffChar (characterCodes, combiningAccents)
import qualified Data.Sequence as Seq
@@ -60,28 +62,28 @@ data FontSpec = FontSpec{ fontBold :: Bool
defaultFontSpec :: FontSpec
defaultFontSpec = FontSpec False False False
-data LinePart = RoffStr String
+data LinePart = RoffStr T.Text
| Font FontSpec
| MacroArg Int
deriving Show
type Arg = [LinePart]
-type TableOption = (String, String)
+type TableOption = (T.Text, T.Text)
data CellFormat =
CellFormat
{ columnType :: Char
, pipePrefix :: Bool
, pipeSuffix :: Bool
- , columnSuffixes :: [String]
+ , columnSuffixes :: [T.Text]
} deriving (Show, Eq, Ord)
type TableRow = ([CellFormat], [RoffTokens])
data RoffToken = TextLine [LinePart]
| EmptyLine
- | ControlLine String [Arg] SourcePos
+ | ControlLine T.Text [Arg] SourcePos
| Tbl [TableOption] [TableRow] SourcePos
deriving Show
@@ -95,7 +97,7 @@ data RoffMode = NormalMode
| CopyMode
deriving Show
-data RoffState = RoffState { customMacros :: M.Map String RoffTokens
+data RoffState = RoffState { customMacros :: M.Map T.Text RoffTokens
, prevFont :: FontSpec
, currentFont :: FontSpec
, tableTabChar :: Char
@@ -121,10 +123,10 @@ instance Default RoffState where
, afterConditional = False
}
-type RoffLexer m = ParserT [Char] RoffState m
+type RoffLexer m = ParserT T.Text RoffState m
--
--- Lexer: String -> RoffToken
+-- Lexer: T.Text -> RoffToken
--
eofline :: Stream s m Char => ParsecT s u m ()
@@ -133,11 +135,11 @@ eofline = void newline <|> eof <|> () <$ lookAhead (string "\\}")
spacetab :: Stream s m Char => ParsecT s u m Char
spacetab = char ' ' <|> char '\t'
-characterCodeMap :: M.Map String Char
+characterCodeMap :: M.Map T.Text Char
characterCodeMap =
M.fromList $ map (\(x,y) -> (y,x)) characterCodes
-combiningAccentsMap :: M.Map String Char
+combiningAccentsMap :: M.Map T.Text Char
combiningAccentsMap =
M.fromList $ map (\(x,y) -> (y,x)) combiningAccents
@@ -151,43 +153,40 @@ escapeGlyph = do
c <- lookAhead (oneOf ['[','('])
escapeArg >>= resolveGlyph c
-resolveGlyph :: PandocMonad m => Char -> String -> RoffLexer m [LinePart]
+resolveGlyph :: PandocMonad m => Char -> T.Text -> RoffLexer m [LinePart]
resolveGlyph delimChar glyph = do
- let cs = substitute "_u" " u" glyph -- unicode glyphs separated by _
- (case words cs of
+ let cs = T.replace "_u" " u" glyph -- unicode glyphs separated by _
+ (case T.words cs of
[] -> mzero
[s] -> case M.lookup s characterCodeMap `mplus` readUnicodeChar s of
Nothing -> mzero
- Just c -> return [RoffStr [c]]
+ Just c -> return [RoffStr $ T.singleton c]
(s:ss) -> do
basechar <- case M.lookup s characterCodeMap `mplus`
readUnicodeChar s of
Nothing ->
- case s of
+ case T.unpack s of
[ch] | isAscii ch && isAlphaNum ch ->
return ch
_ -> mzero
Just c -> return c
- let addAccents [] xs = return $ T.unpack .
- Normalize.normalize Normalize.NFC .
- T.pack $ reverse xs
+ let addAccents [] xs = return $ Normalize.normalize Normalize.NFC $
+ T.reverse xs
addAccents (a:as) xs =
case M.lookup a combiningAccentsMap `mplus` readUnicodeChar a of
- Just x -> addAccents as (x:xs)
+ Just x -> addAccents as $ T.cons x xs
Nothing -> mzero
- addAccents ss [basechar] >>= \xs -> return [RoffStr xs])
+ addAccents ss (T.singleton basechar) >>= \xs -> return [RoffStr xs])
<|> case delimChar of
- '[' -> escUnknown ("\\[" ++ glyph ++ "]")
- '(' -> escUnknown ("\\(" ++ glyph)
- '\'' -> escUnknown ("\\C'" ++ glyph ++ "'")
+ '[' -> escUnknown ("\\[" <> glyph <> "]")
+ '(' -> escUnknown ("\\(" <> glyph)
+ '\'' -> escUnknown ("\\C'" <> glyph <> "'")
_ -> Prelude.fail "resolveGlyph: unknown glyph delimiter"
-readUnicodeChar :: String -> Maybe Char
-readUnicodeChar ('u':cs@(_:_:_:_:_)) =
- case safeRead ('0':'x':cs) of
- Just i -> Just (chr i)
- Nothing -> Nothing
-readUnicodeChar _ = Nothing
+readUnicodeChar :: T.Text -> Maybe Char
+readUnicodeChar t = case T.uncons t of
+ Just ('u', cs) | T.length cs > 3 -> chr <$> safeRead ("0x" <> cs)
+ _ -> Nothing
escapeNormal :: PandocMonad m => RoffLexer m [LinePart]
escapeNormal = do
@@ -218,14 +217,14 @@ escapeNormal = do
NormalMode -> return [RoffStr "\\"]
'H' -> escIgnore 'H' [quoteArg]
'L' -> escIgnore 'L' [quoteArg]
- 'M' -> escIgnore 'M' [escapeArg, count 1 (satisfy (/='\n'))]
+ 'M' -> escIgnore 'M' [escapeArg, countChar 1 (satisfy (/='\n'))]
'N' -> escIgnore 'N' [quoteArg]
- 'O' -> escIgnore 'O' [count 1 (oneOf ['0','1'])]
+ 'O' -> escIgnore 'O' [countChar 1 (oneOf ['0','1'])]
'R' -> escIgnore 'R' [quoteArg]
'S' -> escIgnore 'S' [quoteArg]
- 'V' -> escIgnore 'V' [escapeArg, count 1 alphaNum]
+ 'V' -> escIgnore 'V' [escapeArg, countChar 1 alphaNum]
'X' -> escIgnore 'X' [quoteArg]
- 'Y' -> escIgnore 'Y' [escapeArg, count 1 (satisfy (/='\n'))]
+ 'Y' -> escIgnore 'Y' [escapeArg, countChar 1 (satisfy (/='\n'))]
'Z' -> escIgnore 'Z' [quoteArg]
'\'' -> return [RoffStr "`"]
'\n' -> return mempty -- line continuation
@@ -238,12 +237,12 @@ escapeNormal = do
'd' -> escIgnore 'd' [] -- forward down 1/2em
'e' -> return [RoffStr "\\"]
'f' -> escFont
- 'g' -> escIgnore 'g' [escapeArg, count 1 (satisfy (/='\n'))]
+ 'g' -> escIgnore 'g' [escapeArg, countChar 1 (satisfy (/='\n'))]
'h' -> escIgnore 'h' [quoteArg]
- 'k' -> escIgnore 'k' [escapeArg, count 1 (satisfy (/='\n'))]
+ 'k' -> escIgnore 'k' [escapeArg, countChar 1 (satisfy (/='\n'))]
'l' -> escIgnore 'l' [quoteArg]
- 'm' -> escIgnore 'm' [escapeArg, count 1 (satisfy (/='\n'))]
- 'n' -> escIgnore 'm' [escapeArg, count 1 (satisfy (/='\n'))]
+ 'm' -> escIgnore 'm' [escapeArg, countChar 1 (satisfy (/='\n'))]
+ 'n' -> escIgnore 'm' [escapeArg, countChar 1 (satisfy (/='\n'))]
'o' -> escIgnore 'o' [quoteArg]
'p' -> escIgnore 'p' []
'r' -> escIgnore 'r' []
@@ -253,7 +252,7 @@ escapeNormal = do
'v' -> escIgnore 'v' [quoteArg]
'w' -> escIgnore 'w' [quoteArg]
'x' -> escIgnore 'x' [quoteArg]
- 'z' -> escIgnore 'z' [count 1 anyChar]
+ 'z' -> escIgnore 'z' [countChar 1 anyChar]
'|' -> return [RoffStr "\x2006"] --1/6 em space
'~' -> return [RoffStr "\160"] -- nonbreaking space
'\\' -> do
@@ -262,40 +261,40 @@ escapeNormal = do
CopyMode -> char '\\'
NormalMode -> return '\\'
return [RoffStr "\\"]
- _ -> return [RoffStr [c]]
+ _ -> return [RoffStr $ T.singleton c]
-- man 7 groff: "If a backslash is followed by a character that
-- does not constitute a defined escape sequence, the backslash
-- is silently ignored and the character maps to itself."
escIgnore :: PandocMonad m
=> Char
- -> [RoffLexer m String]
+ -> [RoffLexer m T.Text]
-> RoffLexer m [LinePart]
escIgnore c argparsers = do
pos <- getPosition
arg <- snd <$> withRaw (choice argparsers) <|> return ""
- report $ SkippedContent ('\\':c:arg) pos
+ report $ SkippedContent ("\\" <> T.cons c arg) pos
return mempty
-escUnknown :: PandocMonad m => String -> RoffLexer m [LinePart]
+escUnknown :: PandocMonad m => T.Text -> RoffLexer m [LinePart]
escUnknown s = do
pos <- getPosition
report $ SkippedContent s pos
return [RoffStr "\xFFFD"]
-signedNumber :: PandocMonad m => RoffLexer m String
+signedNumber :: PandocMonad m => RoffLexer m T.Text
signedNumber = try $ do
sign <- option "" ("-" <$ char '-' <|> "" <$ char '+')
- ds <- many1 digit
- return (sign ++ ds)
+ ds <- many1Char digit
+ return (sign <> ds)
-- Parses: [..] or (..
-escapeArg :: PandocMonad m => RoffLexer m String
+escapeArg :: PandocMonad m => RoffLexer m T.Text
escapeArg = choice
[ char '[' *> optional expandString *>
- manyTill (noneOf ['\n',']']) (char ']')
+ manyTillChar (noneOf ['\n',']']) (char ']')
, char '(' *> optional expandString *>
- count 2 (satisfy (/='\n'))
+ countChar 2 (satisfy (/='\n'))
]
expandString :: PandocMonad m => RoffLexer m ()
@@ -303,21 +302,21 @@ expandString = try $ do
pos <- getPosition
char '\\'
char '*'
- cs <- escapeArg <|> count 1 anyChar
- s <- linePartsToString <$> resolveString cs pos
- getInput >>= setInput . (s ++)
+ cs <- escapeArg <|> countChar 1 anyChar
+ s <- linePartsToText <$> resolveText cs pos
+ getInput >>= setInput . (s <>)
return ()
-- Parses: '..'
-quoteArg :: PandocMonad m => RoffLexer m String
-quoteArg = char '\'' *> manyTill (noneOf ['\n','\'']) (char '\'')
+quoteArg :: PandocMonad m => RoffLexer m T.Text
+quoteArg = char '\'' *> manyTillChar (noneOf ['\n','\'']) (char '\'')
escFont :: PandocMonad m => RoffLexer m [LinePart]
escFont = do
- font <- escapeArg <|> count 1 alphaNum
- font' <- if null font || font == "P"
+ font <- escapeArg <|> countChar 1 alphaNum
+ font' <- if T.null font || font == "P"
then prevFont <$> getState
- else return $ foldr processFontLetter defaultFontSpec font
+ else return $ foldr processFontLetter defaultFontSpec $ T.unpack font
modifyState $ \st -> st{ prevFont = currentFont st
, currentFont = font' }
return [Font font']
@@ -345,7 +344,7 @@ lexMacro = do
guard $ sourceColumn pos == 1 || afterConditional st
char '.' <|> char '\''
skipMany spacetab
- macroName <- many (satisfy isAlphaNum)
+ macroName <- manyChar (satisfy isAlphaNum)
case macroName of
"nop" -> return mempty
"ie" -> lexConditional "ie"
@@ -374,8 +373,8 @@ lexTable pos = do
spaces
opts <- try tableOptions <|> [] <$ optional (char ';')
case lookup "tab" opts of
- Just (c:_) -> modifyState $ \st -> st{ tableTabChar = c }
- _ -> modifyState $ \st -> st{ tableTabChar = '\t' }
+ Just (T.uncons -> Just (c, _)) -> modifyState $ \st -> st{ tableTabChar = c }
+ _ -> modifyState $ \st -> st{ tableTabChar = '\t' }
spaces
skipMany lexComment
spaces
@@ -388,7 +387,7 @@ lexTable pos = do
string ".TE"
skipMany spacetab
eofline
- return $ singleTok $ Tbl opts (rows ++ concat morerows) pos
+ return $ singleTok $ Tbl opts (rows <> concat morerows) pos
lexTableRows :: PandocMonad m => RoffLexer m [TableRow]
lexTableRows = do
@@ -428,11 +427,11 @@ tableOptions = many1 tableOption <* spaces <* char ';'
tableOption :: PandocMonad m => RoffLexer m TableOption
tableOption = do
- k <- many1 letter
+ k <- many1Char letter
v <- option "" $ try $ do
skipMany spacetab
char '('
- manyTill anyChar (char ')')
+ manyTillChar anyChar (char ')')
skipMany spacetab
optional (char ',' >> skipMany spacetab)
return (k,v)
@@ -444,7 +443,7 @@ tableFormatSpec = do
let speclines = first:rest
spaces
char '.'
- return $ speclines ++ repeat (lastDef [] speclines) -- last line is default
+ return $ speclines <> repeat (lastDef [] speclines) -- last line is default
tableFormatSpecLine :: PandocMonad m => RoffLexer m [CellFormat]
tableFormatSpecLine =
@@ -456,19 +455,19 @@ tableColFormat = do
$ True <$ try (string "|" <* notFollowedBy spacetab)
c <- oneOf ['a','A','c','C','l','L','n','N','r','R','s','S','^','_','-',
'=','|']
- suffixes <- many $ try (skipMany spacetab *> count 1 digit) <|>
+ suffixes <- many $ try (skipMany spacetab *> countChar 1 digit) <|>
(do x <- oneOf ['b','B','d','D','e','E','f','F','i','I','m','M',
'p','P','t','T','u','U','v','V','w','W','x','X', 'z','Z']
num <- case toLower x of
'w' -> many1 digit <|>
(do char '('
xs <- manyTill anyChar (char ')')
- return ("(" ++ xs ++ ")")) <|>
+ return ("(" <> xs <> ")")) <|>
return ""
'f' -> count 1 alphaNum <* skipMany spacetab
'm' -> count 1 alphaNum <* skipMany spacetab
_ -> return ""
- return $ x : num)
+ return $ T.pack $ x : num)
pipeSuffix' <- option False $ True <$ string "|"
return $ CellFormat
{ columnType = c
@@ -479,7 +478,7 @@ tableColFormat = do
-- We don't fully handle the conditional. But we do
-- include everything under '.ie n', which occurs commonly
-- in man pages.
-lexConditional :: PandocMonad m => String -> RoffLexer m RoffTokens
+lexConditional :: PandocMonad m => T.Text -> RoffLexer m RoffTokens
lexConditional mname = do
pos <- getPosition
skipMany spacetab
@@ -498,7 +497,7 @@ lexConditional mname = do
case mbtest of
Nothing -> do
putState st -- reset state, so we don't record macros in skipped section
- report $ SkippedContent ('.':mname) pos
+ report $ SkippedContent (T.cons '.' mname) pos
return mempty
Just True -> return ifPart
Just False -> do
@@ -508,7 +507,7 @@ lexConditional mname = do
expression :: PandocMonad m => RoffLexer m (Maybe Bool)
expression = do
raw <- charsInBalanced '(' ')' (satisfy (/= '\n'))
- <|> many1 nonspaceChar
+ <|> many1Char nonspaceChar
returnValue $
case raw of
"1" -> Just True
@@ -533,17 +532,17 @@ lexIncludeFile args = do
pos <- getPosition
case args of
(f:_) -> do
- let fp = linePartsToString f
+ let fp = linePartsToText f
dirs <- getResourcePath
- result <- readFileFromDirs dirs fp
+ result <- readFileFromDirs dirs $ T.unpack fp
case result of
Nothing -> report $ CouldNotLoadIncludeFile fp pos
- Just s -> getInput >>= setInput . (s ++)
+ Just s -> getInput >>= setInput . (s <>)
return mempty
[] -> return mempty
resolveMacro :: PandocMonad m
- => String -> [Arg] -> SourcePos -> RoffLexer m RoffTokens
+ => T.Text -> [Arg] -> SourcePos -> RoffLexer m RoffTokens
resolveMacro macroName args pos = do
macros <- customMacros <$> getState
case M.lookup macroName macros of
@@ -552,7 +551,7 @@ resolveMacro macroName args pos = do
let fillLP (MacroArg i) zs =
case drop (i - 1) args of
[] -> zs
- (ys:_) -> ys ++ zs
+ (ys:_) -> ys <> zs
fillLP z zs = z : zs
let fillMacroArg (TextLine lineparts) =
TextLine (foldr fillLP [] lineparts)
@@ -565,7 +564,7 @@ lexStringDef args = do -- string definition
[] -> Prelude.fail "No argument to .ds"
(x:ys) -> do
let ts = singleTok $ TextLine (intercalate [RoffStr " " ] ys)
- let stringName = linePartsToString x
+ let stringName = linePartsToText x
modifyState $ \st ->
st{ customMacros = M.insert stringName ts (customMacros st) }
return mempty
@@ -575,14 +574,14 @@ lexMacroDef args = do -- macro definition
modifyState $ \st -> st{ roffMode = CopyMode }
(macroName, stopMacro) <-
case args of
- (x : y : _) -> return (linePartsToString x, linePartsToString y)
+ (x : y : _) -> return (linePartsToText x, linePartsToText y)
-- optional second arg
- (x:_) -> return (linePartsToString x, ".")
+ (x:_) -> return (linePartsToText x, ".")
[] -> Prelude.fail "No argument to .de"
let stop = try $ do
char '.' <|> char '\''
skipMany spacetab
- string stopMacro
+ textStr stopMacro
_ <- lexArgs
return ()
ts <- mconcat <$> manyTill manToken stop
@@ -628,7 +627,7 @@ lexArgs = do
char '"'
return [RoffStr "\""]
-checkDefined :: PandocMonad m => String -> RoffLexer m [LinePart]
+checkDefined :: PandocMonad m => T.Text -> RoffLexer m [LinePart]
checkDefined name = do
macros <- customMacros <$> getState
case M.lookup name macros of
@@ -638,19 +637,19 @@ checkDefined name = do
escString :: PandocMonad m => RoffLexer m [LinePart]
escString = try $ do
pos <- getPosition
- (do cs <- escapeArg <|> count 1 anyChar
- resolveString cs pos)
+ (do cs <- escapeArg <|> countChar 1 anyChar
+ resolveText cs pos)
<|> mempty <$ char 'S'
-- strings and macros share namespace
-resolveString :: PandocMonad m
- => String -> SourcePos -> RoffLexer m [LinePart]
-resolveString stringname pos = do
+resolveText :: PandocMonad m
+ => T.Text -> SourcePos -> RoffLexer m [LinePart]
+resolveText stringname pos = do
RoffTokens ts <- resolveMacro stringname [] pos
case Foldable.toList ts of
[TextLine xs] -> return xs
_ -> do
- report $ SkippedContent ("unknown string " ++ stringname) pos
+ report $ SkippedContent ("unknown string " <> stringname) pos
return mempty
lexLine :: PandocMonad m => RoffLexer m RoffTokens
@@ -688,16 +687,16 @@ macroArg = try $ do
pos <- getPosition
backslash
char '$'
- x <- escapeArg <|> count 1 digit
+ x <- escapeArg <|> countChar 1 digit
case safeRead x of
Just i -> return [MacroArg i]
Nothing -> do
- report $ SkippedContent ("illegal macro argument " ++ x) pos
+ report $ SkippedContent ("illegal macro argument " <> x) pos
return []
regularText :: PandocMonad m => RoffLexer m [LinePart]
regularText = do
- s <- many1 $ noneOf "\n\r\t \\\""
+ s <- many1Char $ noneOf "\n\r\t \\\""
return [RoffStr s]
quoteChar :: PandocMonad m => RoffLexer m [LinePart]
@@ -708,7 +707,7 @@ quoteChar = do
spaceTabChar :: PandocMonad m => RoffLexer m [LinePart]
spaceTabChar = do
c <- spacetab
- return [RoffStr [c]]
+ return [RoffStr $ T.singleton c]
lexEmptyLine :: PandocMonad m => RoffLexer m RoffTokens
lexEmptyLine = newline >> return (singleTok EmptyLine)
@@ -716,8 +715,8 @@ lexEmptyLine = newline >> return (singleTok EmptyLine)
manToken :: PandocMonad m => RoffLexer m RoffTokens
manToken = lexComment <|> lexMacro <|> lexLine <|> lexEmptyLine
-linePartsToString :: [LinePart] -> String
-linePartsToString = mconcat . map go
+linePartsToText :: [LinePart] -> T.Text
+linePartsToText = mconcat . map go
where
go (RoffStr s) = s
go _ = mempty
@@ -726,7 +725,7 @@ linePartsToString = mconcat . map go
lexRoff :: PandocMonad m => SourcePos -> T.Text -> m RoffTokens
lexRoff pos txt = do
eithertokens <- readWithM (do setPosition pos
- mconcat <$> many manToken) def (T.unpack txt)
+ mconcat <$> many manToken) def txt
case eithertokens of
Left e -> throwError e
Right tokenz -> return tokenz
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
index 9796de4b9..d587bc41b 100644
--- a/src/Text/Pandoc/Readers/TWiki.hs
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -1,5 +1,6 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RelaxedPolyRec #-}
-- RelaxedPolyRec needed for inlinesBetween on GHC < 7
{- |
@@ -31,7 +32,7 @@ import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed, nested)
import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag)
-import Text.Pandoc.Shared (crFilter)
+import Text.Pandoc.Shared (crFilter, tshow)
import Text.Pandoc.XML (fromEntities)
-- | Read twiki from an input string and return a Pandoc document.
@@ -41,19 +42,19 @@ readTWiki :: PandocMonad m
-> m Pandoc
readTWiki opts s = do
res <- readWithM parseTWiki def{ stateOptions = opts }
- (T.unpack (crFilter s) ++ "\n\n")
+ (crFilter s <> "\n\n")
case res of
Left e -> throwError e
Right d -> return d
-type TWParser = ParserT [Char] ParserState
+type TWParser = ParserT Text ParserState
--
-- utility functions
--
-tryMsg :: String -> TWParser m a -> TWParser m a
-tryMsg msg p = try p <?> msg
+tryMsg :: Text -> TWParser m a -> TWParser m a
+tryMsg msg p = try p <?> T.unpack msg
nested :: PandocMonad m => TWParser m a -> TWParser m a
nested p = do
@@ -64,25 +65,25 @@ nested p = do
updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
return res
-htmlElement :: PandocMonad m => String -> TWParser m (Attr, String)
+htmlElement :: PandocMonad m => Text -> TWParser m (Attr, Text)
htmlElement tag = tryMsg tag $ do
(TagOpen _ attr, _) <- htmlTag (~== TagOpen tag [])
- content <- manyTill anyChar (endtag <|> endofinput)
+ content <- T.pack <$> manyTill anyChar (endtag <|> endofinput)
return (htmlAttrToPandoc attr, trim content)
where
endtag = void $ htmlTag (~== TagClose tag)
endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof
- trim = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse
+ trim = T.dropAround (=='\n')
-htmlAttrToPandoc :: [Attribute String] -> Attr
+htmlAttrToPandoc :: [Attribute Text] -> Attr
htmlAttrToPandoc attrs = (ident, classes, keyvals)
where
ident = fromMaybe "" $ lookup "id" attrs
- classes = maybe [] words $ lookup "class" attrs
+ classes = maybe [] T.words $ lookup "class" attrs
keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
parseHtmlContentWithAttrs :: PandocMonad m
- => String -> TWParser m a -> TWParser m (Attr, [a])
+ => Text -> TWParser m a -> TWParser m (Attr, [a])
parseHtmlContentWithAttrs tag parser = do
(attr, content) <- htmlElement tag
parsedContent <- try $ parseContent content
@@ -91,7 +92,13 @@ parseHtmlContentWithAttrs tag parser = do
parseContent = parseFromString' $ nested $ manyTill parser endOfContent
endOfContent = try $ skipMany blankline >> skipSpaces >> eof
-parseHtmlContent :: PandocMonad m => String -> TWParser m a -> TWParser m [a]
+parseCharHtmlContentWithAttrs :: PandocMonad m
+ => Text -> TWParser m Char -> TWParser m (Attr, Text)
+parseCharHtmlContentWithAttrs tag = fmap go . parseHtmlContentWithAttrs tag
+ where
+ go (x, y) = (x, T.pack y)
+
+parseHtmlContent :: PandocMonad m => Text -> TWParser m a -> TWParser m [a]
parseHtmlContent tag p = snd <$> parseHtmlContentWithAttrs tag p
--
@@ -113,7 +120,7 @@ block = do
<|> blockElements
<|> para
skipMany blankline
- trace (take 60 $ show $ B.toList res)
+ trace (T.take 60 $ tshow $ B.toList res)
return res
blockElements :: PandocMonad m => TWParser m B.Blocks
@@ -150,38 +157,38 @@ literal = rawBlock <$> htmlElement "literal"
format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs
rawBlock (attrs, content) = B.rawBlock (format attrs) content
-list :: PandocMonad m => String -> TWParser m B.Blocks
+list :: PandocMonad m => Text -> TWParser m B.Blocks
list prefix = choice [ bulletList prefix
, orderedList prefix
, definitionList prefix]
-definitionList :: PandocMonad m => String -> TWParser m B.Blocks
+definitionList :: PandocMonad m => Text -> TWParser m B.Blocks
definitionList prefix = tryMsg "definitionList" $ do
- indent <- lookAhead $ string prefix *> many1 (string " ") <* string "$ "
- elements <- many $ parseDefinitionListItem (prefix ++ concat indent)
+ indent <- lookAhead $ textStr prefix *> many1 (textStr " ") <* textStr "$ "
+ elements <- many $ parseDefinitionListItem (prefix <> T.concat indent)
return $ B.definitionList elements
where
parseDefinitionListItem :: PandocMonad m
- => String -> TWParser m (B.Inlines, [B.Blocks])
+ => Text -> TWParser m (B.Inlines, [B.Blocks])
parseDefinitionListItem indent = do
- string (indent ++ "$ ") >> skipSpaces
+ textStr (indent <> "$ ") >> skipSpaces
term <- many1Till inline $ string ": "
line <- listItemLine indent $ string "$ "
return (mconcat term, [line])
-bulletList :: PandocMonad m => String -> TWParser m B.Blocks
+bulletList :: PandocMonad m => Text -> TWParser m B.Blocks
bulletList prefix = tryMsg "bulletList" $
parseList prefix (char '*') (char ' ')
-orderedList :: PandocMonad m => String -> TWParser m B.Blocks
+orderedList :: PandocMonad m => Text -> TWParser m B.Blocks
orderedList prefix = tryMsg "orderedList" $
parseList prefix (oneOf "1iIaA") (string ". ")
parseList :: PandocMonad m
- => String -> TWParser m Char -> TWParser m a -> TWParser m B.Blocks
+ => Text -> TWParser m Char -> TWParser m a -> TWParser m B.Blocks
parseList prefix marker delim = do
- (indent, style) <- lookAhead $ string prefix *> listStyle <* delim
- blocks <- many $ parseListItem (prefix ++ indent) (char style <* delim)
+ (indent, style) <- lookAhead $ textStr prefix *> listStyle <* delim
+ blocks <- many $ parseListItem (prefix <> indent) (char style <* delim)
return $ case style of
'1' -> B.orderedListWith (1, DefaultStyle, DefaultDelim) blocks
'i' -> B.orderedListWith (1, LowerRoman, DefaultDelim) blocks
@@ -191,24 +198,24 @@ parseList prefix marker delim = do
_ -> B.bulletList blocks
where
listStyle = do
- indent <- many1 $ string " "
+ indent <- many1 $ textStr " "
style <- marker
- return (concat indent, style)
+ return (T.concat indent, style)
parseListItem :: (PandocMonad m, Show a)
- => String -> TWParser m a -> TWParser m B.Blocks
-parseListItem prefix marker = string prefix >> marker >> listItemLine prefix marker
+ => Text -> TWParser m a -> TWParser m B.Blocks
+parseListItem prefix marker = textStr prefix >> marker >> listItemLine prefix marker
listItemLine :: (PandocMonad m, Show a)
- => String -> TWParser m a -> TWParser m B.Blocks
+ => Text -> TWParser m a -> TWParser m B.Blocks
listItemLine prefix marker = mconcat <$> (lineContent >>= parseContent)
where
lineContent = do
content <- anyLine
continuation <- optionMaybe listContinuation
- return $ filterSpaces content ++ "\n" ++ maybe "" (" " ++) continuation
- filterSpaces = reverse . dropWhile (== ' ') . reverse
- listContinuation = notFollowedBy (string prefix >> marker) >>
+ return $ filterSpaces content <> "\n" <> maybe "" (" " <>) continuation
+ filterSpaces = T.dropWhileEnd (== ' ')
+ listContinuation = notFollowedBy (textStr prefix >> marker) >>
string " " >> lineContent
parseContent = parseFromString' $ many1 $ nestedList <|> parseInline
parseInline = (B.plain . mconcat) <$> many1Till inline (lastNewline <|> newlineBeforeNestedList)
@@ -352,29 +359,29 @@ macroWithParameters = try $ do
char '%'
return $ buildSpan name kvs $ B.str content
-buildSpan :: String -> [(String, String)] -> B.Inlines -> B.Inlines
+buildSpan :: Text -> [(Text, Text)] -> B.Inlines -> B.Inlines
buildSpan className kvs = B.spanWith attrs
where
attrs = ("", ["twiki-macro", className] ++ additionalClasses, kvsWithoutClasses)
- additionalClasses = maybe [] words $ lookup "class" kvs
+ additionalClasses = maybe [] T.words $ lookup "class" kvs
kvsWithoutClasses = [(k,v) | (k,v) <- kvs, k /= "class"]
-macroName :: PandocMonad m => TWParser m String
+macroName :: PandocMonad m => TWParser m Text
macroName = do
first <- letter
rest <- many $ alphaNum <|> char '_'
- return (first:rest)
+ return $ T.pack $ first:rest
-attributes :: PandocMonad m => TWParser m (String, [(String, String)])
-attributes = foldr (either mkContent mkKvs) ([], [])
+attributes :: PandocMonad m => TWParser m (Text, [(Text, Text)])
+attributes = foldr (either mkContent mkKvs) ("", [])
<$> (char '{' *> spnl *> many (attribute <* spnl) <* char '}')
where
spnl = skipMany (spaceChar <|> newline)
- mkContent c ([], kvs) = (c, kvs)
- mkContent c (rest, kvs) = (c ++ " " ++ rest, kvs)
+ mkContent c ("", kvs) = (c, kvs)
+ mkContent c (rest, kvs) = (c <> " " <> rest, kvs)
mkKvs kv (cont, rest) = (cont, kv : rest)
-attribute :: PandocMonad m => TWParser m (Either String (String, String))
+attribute :: PandocMonad m => TWParser m (Either Text (Text, Text))
attribute = withKey <|> withoutKey
where
withKey = try $ do
@@ -383,10 +390,10 @@ attribute = withKey <|> withoutKey
curry Right key <$> parseValue False
withoutKey = try $ Left <$> parseValue True
parseValue allowSpaces = fromEntities <$> (withQuotes <|> withoutQuotes allowSpaces)
- withQuotes = between (char '"') (char '"') (\_ -> count 1 $ noneOf ['"'])
+ withQuotes = between (char '"') (char '"') (\_ -> countChar 1 $ noneOf ['"'])
withoutQuotes allowSpaces
- | allowSpaces = many1 $ noneOf "}"
- | otherwise = many1 $ noneOf " }"
+ | allowSpaces = many1Char $ noneOf "}"
+ | otherwise = many1Char $ noneOf " }"
nestedInlines :: (Show a, PandocMonad m)
=> TWParser m a -> TWParser m B.Inlines
@@ -413,10 +420,10 @@ emphHtml :: PandocMonad m => TWParser m B.Inlines
emphHtml = B.emph . mconcat <$> (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline)
nestedString :: (Show a, PandocMonad m)
- => TWParser m a -> TWParser m String
-nestedString end = innerSpace <|> count 1 nonspaceChar
+ => TWParser m a -> TWParser m Text
+nestedString end = innerSpace <|> countChar 1 nonspaceChar
where
- innerSpace = try $ many1 spaceChar <* notFollowedBy end
+ innerSpace = try $ many1Char spaceChar <* notFollowedBy end
boldCode :: PandocMonad m => TWParser m B.Inlines
boldCode = try $ (B.strong . B.code . fromEntities) <$> enclosed (string "==") nestedString
@@ -429,7 +436,7 @@ code = try $ (B.code . fromEntities) <$> enclosed (char '=') nestedString
codeHtml :: PandocMonad m => TWParser m B.Inlines
codeHtml = do
- (attrs, content) <- parseHtmlContentWithAttrs "code" anyChar
+ (attrs, content) <- parseCharHtmlContentWithAttrs "code" anyChar
return $ B.codeWith attrs $ fromEntities content
autoLink :: PandocMonad m => TWParser m B.Inlines
@@ -437,7 +444,7 @@ autoLink = try $ do
state <- getState
guard $ stateAllowLinks state
(text, url) <- parseLink
- guard $ checkLink (last url)
+ guard $ checkLink (T.last url)
return $ makeLink (text, url)
where
parseLink = notFollowedBy nop >> (uri <|> emailAddress)
@@ -447,17 +454,17 @@ autoLink = try $ do
| otherwise = isAlphaNum c
str :: PandocMonad m => TWParser m B.Inlines
-str = B.str <$> (many1 alphaNum <|> count 1 characterReference)
+str = B.str <$> (many1Char alphaNum <|> countChar 1 characterReference)
nop :: PandocMonad m => TWParser m B.Inlines
nop = try $ (void exclamation <|> void nopTag) >> followContent
where
exclamation = char '!'
nopTag = stringAnyCase "<nop>"
- followContent = B.str . fromEntities <$> many1 nonspaceChar
+ followContent = B.str . fromEntities <$> many1Char nonspaceChar
symbol :: PandocMonad m => TWParser m B.Inlines
-symbol = B.str <$> count 1 nonspaceChar
+symbol = B.str <$> countChar 1 nonspaceChar
smart :: PandocMonad m => TWParser m B.Inlines
smart = do
@@ -491,13 +498,13 @@ link = try $ do
setState $ st{ stateAllowLinks = True }
return $ B.link url title content
-linkText :: PandocMonad m => TWParser m (String, String, B.Inlines)
+linkText :: PandocMonad m => TWParser m (Text, Text, B.Inlines)
linkText = do
string "[["
- url <- many1Till anyChar (char ']')
+ url <- T.pack <$> many1Till anyChar (char ']')
content <- option (B.str url) (mconcat <$> linkContent)
char ']'
return (url, "", content)
where
- linkContent = char '[' >> many1Till anyChar (char ']') >>= parseLinkContent
+ linkContent = char '[' >> many1Till anyChar (char ']') >>= parseLinkContent . T.pack
parseLinkContent = parseFromString' $ many1 inline
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index a638fdf40..5e7aaf910 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -1,4 +1,6 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.Textile
Copyright : Copyright (C) 2010-2012 Paul Rivier
@@ -38,7 +40,7 @@ import Prelude
import Control.Monad (guard, liftM)
import Control.Monad.Except (throwError)
import Data.Char (digitToInt, isUpper)
-import Data.List (intercalate, intersperse, transpose)
+import Data.List (intersperse, transpose)
import Data.Text (Text)
import qualified Data.Text as T
import Text.HTML.TagSoup (Tag (..), fromAttrib)
@@ -52,7 +54,7 @@ import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag)
import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
-import Text.Pandoc.Shared (crFilter, trim, underlineSpan)
+import Text.Pandoc.Shared (crFilter, trim, underlineSpan, tshow)
-- | Parse a Textile text and return a Pandoc document.
readTextile :: PandocMonad m
@@ -61,21 +63,21 @@ readTextile :: PandocMonad m
-> m Pandoc
readTextile opts s = do
parsed <- readWithM parseTextile def{ stateOptions = opts }
- (T.unpack (crFilter s) ++ "\n\n")
+ (crFilter s <> "\n\n")
case parsed of
Right result -> return result
Left e -> throwError e
-- | Generate a Pandoc ADT from a textile document
-parseTextile :: PandocMonad m => ParserT [Char] ParserState m Pandoc
+parseTextile :: PandocMonad m => ParserT Text ParserState m Pandoc
parseTextile = do
many blankline
startPos <- getPosition
-- go through once just to get list of reference keys and notes
-- docMinusKeys is the raw document with blanks where the keys/notes were...
let firstPassParser = noteBlock <|> lineClump
- manyTill firstPassParser eof >>= setInput . concat
+ manyTill firstPassParser eof >>= setInput . T.concat
setPosition startPos
st' <- getState
let reversedNotes = stateNotes st'
@@ -84,29 +86,29 @@ parseTextile = do
blocks <- parseBlocks
return $ Pandoc nullMeta (B.toList blocks) -- FIXME
-noteMarker :: PandocMonad m => ParserT [Char] ParserState m [Char]
-noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.')
+noteMarker :: PandocMonad m => ParserT Text ParserState m Text
+noteMarker = skipMany spaceChar >> string "fn" >> T.pack <$> manyTill digit (char '.')
-noteBlock :: PandocMonad m => ParserT [Char] ParserState m [Char]
+noteBlock :: PandocMonad m => ParserT Text ParserState m Text
noteBlock = try $ do
startPos <- getPosition
ref <- noteMarker
optional blankline
- contents <- unlines <$> many1Till anyLine (blanklines <|> noteBlock)
+ contents <- T.unlines <$> many1Till anyLine (blanklines <|> noteBlock)
endPos <- getPosition
- let newnote = (ref, contents ++ "\n")
+ let newnote = (ref, contents <> "\n")
st <- getState
let oldnotes = stateNotes st
updateState $ \s -> s { stateNotes = newnote : oldnotes }
-- return blanks so line count isn't affected
- return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+ return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
-- | Parse document blocks
-parseBlocks :: PandocMonad m => ParserT [Char] ParserState m Blocks
+parseBlocks :: PandocMonad m => ParserT Text ParserState m Blocks
parseBlocks = mconcat <$> manyTill block eof
-- | Block parsers list tried in definition order
-blockParsers :: PandocMonad m => [ParserT [Char] ParserState m Blocks]
+blockParsers :: PandocMonad m => [ParserT Text ParserState m Blocks]
blockParsers = [ codeBlock
, header
, blockQuote
@@ -121,22 +123,22 @@ blockParsers = [ codeBlock
]
-- | Any block in the order of definition of blockParsers
-block :: PandocMonad m => ParserT [Char] ParserState m Blocks
+block :: PandocMonad m => ParserT Text ParserState m Blocks
block = do
res <- choice blockParsers <?> "block"
- trace (take 60 $ show $ B.toList res)
+ trace (T.take 60 $ tshow $ B.toList res)
return res
-commentBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks
+commentBlock :: PandocMonad m => ParserT Text ParserState m Blocks
commentBlock = try $ do
string "###."
manyTill anyLine blanklines
return mempty
-codeBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks
+codeBlock :: PandocMonad m => ParserT Text ParserState m Blocks
codeBlock = codeBlockBc <|> codeBlockPre
-codeBlockBc :: PandocMonad m => ParserT [Char] ParserState m Blocks
+codeBlockBc :: PandocMonad m => ParserT Text ParserState m Blocks
codeBlockBc = try $ do
string "bc."
extended <- option False (True <$ char '.')
@@ -150,31 +152,31 @@ codeBlockBc = try $ do
rest <- many (notFollowedBy ender *> anyLine)
return (f:rest)
else manyTill anyLine blanklines
- return $ B.codeBlock (trimTrailingNewlines (unlines contents))
+ return $ B.codeBlock (trimTrailingNewlines (T.unlines contents))
-trimTrailingNewlines :: String -> String
-trimTrailingNewlines = reverse . dropWhile (=='\n') . reverse
+trimTrailingNewlines :: Text -> Text
+trimTrailingNewlines = T.dropWhileEnd (=='\n')
-- | Code Blocks in Textile are between <pre> and </pre>
-codeBlockPre :: PandocMonad m => ParserT [Char] ParserState m Blocks
+codeBlockPre :: PandocMonad m => ParserT Text ParserState m Blocks
codeBlockPre = try $ do
(t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True))
- result' <- manyTill anyChar (htmlTag (tagClose (=="pre")))
+ result' <- T.pack <$> manyTill anyChar (htmlTag (tagClose (=="pre")))
-- drop leading newline if any
- let result'' = case result' of
- '\n':xs -> xs
- _ -> result'
+ let result'' = case T.uncons result' of
+ Just ('\n', xs) -> xs
+ _ -> result'
-- drop trailing newline if any
- let result''' = case reverse result'' of
- '\n':_ -> init result''
- _ -> result''
- let classes = words $ fromAttrib "class" t
+ let result''' = case T.unsnoc result'' of
+ Just (xs, '\n') -> xs
+ _ -> result''
+ let classes = T.words $ fromAttrib "class" t
let ident = fromAttrib "id" t
let kvs = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
return $ B.codeBlockWith (ident,classes,kvs) result'''
-- | Header of the form "hN. content" with N in 1..6
-header :: PandocMonad m => ParserT [Char] ParserState m Blocks
+header :: PandocMonad m => ParserT Text ParserState m Blocks
header = try $ do
char 'h'
level <- digitToInt <$> oneOf "123456"
@@ -186,14 +188,14 @@ header = try $ do
return $ B.headerWith attr' level name
-- | Blockquote of the form "bq. content"
-blockQuote :: PandocMonad m => ParserT [Char] ParserState m Blocks
+blockQuote :: PandocMonad m => ParserT Text ParserState m Blocks
blockQuote = try $ do
string "bq" >> attributes >> char '.' >> whitespace
B.blockQuote <$> para
-- Horizontal rule
-hrule :: PandocMonad m => ParserT [Char] st m Blocks
+hrule :: PandocMonad m => ParserT Text st m Blocks
hrule = try $ do
skipSpaces
start <- oneOf "-*"
@@ -208,39 +210,39 @@ hrule = try $ do
-- | Can be a bullet list or an ordered list. This implementation is
-- strict in the nesting, sublist must start at exactly "parent depth
-- plus one"
-anyList :: PandocMonad m => ParserT [Char] ParserState m Blocks
+anyList :: PandocMonad m => ParserT Text ParserState m Blocks
anyList = try $ anyListAtDepth 1 <* blanklines
-- | This allow one type of list to be nested into an other type,
-- provided correct nesting
-anyListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
+anyListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
anyListAtDepth depth = choice [ bulletListAtDepth depth,
orderedListAtDepth depth,
definitionList ]
-- | Bullet List of given depth, depth being the number of leading '*'
-bulletListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
+bulletListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth depth)
-- | Bullet List Item of given depth, depth being the number of
-- leading '*'
-bulletListItemAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
+bulletListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
bulletListItemAtDepth = genericListItemAtDepth '*'
-- | Ordered List of given depth, depth being the number of
-- leading '#'
-orderedListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
+orderedListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
orderedListAtDepth depth = try $ do
items <- many1 (orderedListItemAtDepth depth)
return $ B.orderedList items
-- | Ordered List Item of given depth, depth being the number of
-- leading '#'
-orderedListItemAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
+orderedListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
orderedListItemAtDepth = genericListItemAtDepth '#'
-- | Common implementation of list items
-genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT [Char] ParserState m Blocks
+genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT Text ParserState m Blocks
genericListItemAtDepth c depth = try $ do
count depth (char c) >> attributes >> whitespace
contents <- mconcat <$> many ((B.plain . mconcat <$> many1 inline) <|>
@@ -250,25 +252,25 @@ genericListItemAtDepth c depth = try $ do
return $ contents <> sublist
-- | A definition list is a set of consecutive definition items
-definitionList :: PandocMonad m => ParserT [Char] ParserState m Blocks
+definitionList :: PandocMonad m => ParserT Text ParserState m Blocks
definitionList = try $ B.definitionList <$> many1 definitionListItem
-- | List start character.
-listStart :: PandocMonad m => ParserT [Char] ParserState m ()
+listStart :: PandocMonad m => ParserT Text ParserState m ()
listStart = genericListStart '*'
<|> () <$ genericListStart '#'
<|> () <$ definitionListStart
-genericListStart :: PandocMonad m => Char -> ParserT [Char] st m ()
+genericListStart :: PandocMonad m => Char -> ParserT Text st m ()
genericListStart c = () <$ try (many1 (char c) >> whitespace)
-basicDLStart :: PandocMonad m => ParserT [Char] ParserState m ()
+basicDLStart :: PandocMonad m => ParserT Text ParserState m ()
basicDLStart = do
char '-'
whitespace
notFollowedBy newline
-definitionListStart :: PandocMonad m => ParserT [Char] ParserState m Inlines
+definitionListStart :: PandocMonad m => ParserT Text ParserState m Inlines
definitionListStart = try $ do
basicDLStart
trimInlines . mconcat <$>
@@ -281,26 +283,26 @@ definitionListStart = try $ do
-- the term defined, then spaces and ":=". The definition follows, on
-- the same single line, or spaned on multiple line, after a line
-- break.
-definitionListItem :: PandocMonad m => ParserT [Char] ParserState m (Inlines, [Blocks])
+definitionListItem :: PandocMonad m => ParserT Text ParserState m (Inlines, [Blocks])
definitionListItem = try $ do
term <- (mconcat . intersperse B.linebreak) <$> many1 definitionListStart
def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef)
return (term, def')
- where inlineDef :: PandocMonad m => ParserT [Char] ParserState m [Blocks]
+ where inlineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
inlineDef = liftM (\d -> [B.plain d])
$ optional whitespace >> (trimInlines . mconcat <$> many inline) <* newline
- multilineDef :: PandocMonad m => ParserT [Char] ParserState m [Blocks]
+ multilineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
multilineDef = try $ do
optional whitespace >> newline
- s <- many1Till anyChar (try (string "=:" >> newline))
- -- this ++ "\n\n" does not look very good
- ds <- parseFromString' parseBlocks (s ++ "\n\n")
+ s <- T.pack <$> many1Till anyChar (try (string "=:" >> newline))
+ -- this <> "\n\n" does not look very good
+ ds <- parseFromString' parseBlocks (s <> "\n\n")
return [ds]
-- raw content
-- | A raw Html Block, optionally followed by blanklines
-rawHtmlBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks
+rawHtmlBlock :: PandocMonad m => ParserT Text ParserState m Blocks
rawHtmlBlock = try $ do
skipMany spaceChar
(_,b) <- htmlTag isBlockTag
@@ -308,14 +310,14 @@ rawHtmlBlock = try $ do
return $ B.rawBlock "html" b
-- | Raw block of LaTeX content
-rawLaTeXBlock' :: PandocMonad m => ParserT [Char] ParserState m Blocks
+rawLaTeXBlock' :: PandocMonad m => ParserT Text ParserState m Blocks
rawLaTeXBlock' = do
guardEnabled Ext_raw_tex
B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces)
-- | In textile, paragraphs are separated by blank lines.
-para :: PandocMonad m => ParserT [Char] ParserState m Blocks
+para :: PandocMonad m => ParserT Text ParserState m Blocks
para = B.para . trimInlines . mconcat <$> many1 inline
-- Tables
@@ -326,7 +328,7 @@ toAlignment '>' = AlignRight
toAlignment '=' = AlignCenter
toAlignment _ = AlignDefault
-cellAttributes :: PandocMonad m => ParserT [Char] ParserState m (Bool, Alignment)
+cellAttributes :: PandocMonad m => ParserT Text ParserState m (Bool, Alignment)
cellAttributes = try $ do
isHeader <- option False (True <$ char '_')
-- we just ignore colspan and rowspan markers:
@@ -339,18 +341,18 @@ cellAttributes = try $ do
return (isHeader, alignment)
-- | A table cell spans until a pipe |
-tableCell :: PandocMonad m => ParserT [Char] ParserState m ((Bool, Alignment), Blocks)
+tableCell :: PandocMonad m => ParserT Text ParserState m ((Bool, Alignment), Blocks)
tableCell = try $ do
char '|'
(isHeader, alignment) <- option (False, AlignDefault) cellAttributes
notFollowedBy blankline
- raw <- trim <$>
+ raw <- trim . T.pack <$>
many (noneOf "|\n" <|> try (char '\n' <* notFollowedBy blankline))
content <- mconcat <$> parseFromString' (many inline) raw
return ((isHeader, alignment), B.plain content)
-- | A table row is made of many table cells
-tableRow :: PandocMonad m => ParserT [Char] ParserState m [((Bool, Alignment), Blocks)]
+tableRow :: PandocMonad m => ParserT Text ParserState m [((Bool, Alignment), Blocks)]
tableRow = try $ do
-- skip optional row attributes
optional $ try $ do
@@ -360,7 +362,7 @@ tableRow = try $ do
many1 tableCell <* char '|' <* blankline
-- | A table with an optional header.
-table :: PandocMonad m => ParserT [Char] ParserState m Blocks
+table :: PandocMonad m => ParserT Text ParserState m Blocks
table = try $ do
-- ignore table attributes
caption <- option mempty $ try $ do
@@ -384,7 +386,7 @@ table = try $ do
(map (map snd) rows)
-- | Ignore markers for cols, thead, tfoot.
-ignorableRow :: PandocMonad m => ParserT [Char] ParserState m ()
+ignorableRow :: PandocMonad m => ParserT Text ParserState m ()
ignorableRow = try $ do
char '|'
oneOf ":^-~"
@@ -393,9 +395,9 @@ ignorableRow = try $ do
_ <- anyLine
return ()
-explicitBlockStart :: PandocMonad m => String -> ParserT [Char] ParserState m ()
+explicitBlockStart :: PandocMonad m => Text -> ParserT Text ParserState m ()
explicitBlockStart name = try $ do
- string name
+ string (T.unpack name)
attributes
char '.'
optional whitespace
@@ -404,9 +406,9 @@ explicitBlockStart name = try $ do
-- | Blocks like 'p' and 'table' do not need explicit block tag.
-- However, they can be used to set HTML/CSS attributes when needed.
maybeExplicitBlock :: PandocMonad m
- => String -- ^ block tag name
- -> ParserT [Char] ParserState m Blocks -- ^ implicit block
- -> ParserT [Char] ParserState m Blocks
+ => Text -- ^ block tag name
+ -> ParserT Text ParserState m Blocks -- ^ implicit block
+ -> ParserT Text ParserState m Blocks
maybeExplicitBlock name blk = try $ do
optional $ explicitBlockStart name
blk
@@ -419,11 +421,11 @@ maybeExplicitBlock name blk = try $ do
-- | Any inline element
-inline :: PandocMonad m => ParserT [Char] ParserState m Inlines
+inline :: PandocMonad m => ParserT Text ParserState m Inlines
inline = choice inlineParsers <?> "inline"
-- | Inline parsers tried in order
-inlineParsers :: PandocMonad m => [ParserT [Char] ParserState m Inlines]
+inlineParsers :: PandocMonad m => [ParserT Text ParserState m Inlines]
inlineParsers = [ str
, whitespace
, endline
@@ -437,13 +439,13 @@ inlineParsers = [ str
, link
, image
, mark
- , (B.str . (:[])) <$> characterReference
+ , (B.str . T.singleton) <$> characterReference
, smartPunctuation inline
, symbol
]
-- | Inline markups
-inlineMarkup :: PandocMonad m => ParserT [Char] ParserState m Inlines
+inlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
, simpleInline (string "**") B.strong
, simpleInline (string "__") B.emph
@@ -457,33 +459,33 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
]
-- | Trademark, registered, copyright
-mark :: PandocMonad m => ParserT [Char] st m Inlines
+mark :: PandocMonad m => ParserT Text st m Inlines
mark = try $ char '(' >> (try tm <|> try reg <|> copy)
-reg :: PandocMonad m => ParserT [Char] st m Inlines
+reg :: PandocMonad m => ParserT Text st m Inlines
reg = do
oneOf "Rr"
char ')'
return $ B.str "\174"
-tm :: PandocMonad m => ParserT [Char] st m Inlines
+tm :: PandocMonad m => ParserT Text st m Inlines
tm = do
oneOf "Tt"
oneOf "Mm"
char ')'
return $ B.str "\8482"
-copy :: PandocMonad m => ParserT [Char] st m Inlines
+copy :: PandocMonad m => ParserT Text st m Inlines
copy = do
oneOf "Cc"
char ')'
return $ B.str "\169"
-note :: PandocMonad m => ParserT [Char] ParserState m Inlines
+note :: PandocMonad m => ParserT Text ParserState m Inlines
note = try $ do
ref <- char '[' *> many1 digit <* char ']'
notes <- stateNotes <$> getState
- case lookup ref notes of
+ case lookup (T.pack ref) notes of
Nothing -> Prelude.fail "note not found"
Just raw -> B.note <$> parseFromString' parseBlocks raw
@@ -500,42 +502,42 @@ stringBreakers :: [Char]
stringBreakers = " \t\n\r.,\"'?!;:<>«»„“”‚‘’()[]"
wordBoundaries :: [Char]
-wordBoundaries = markupChars ++ stringBreakers
+wordBoundaries = markupChars <> stringBreakers
-- | Parse a hyphened sequence of words
-hyphenedWords :: PandocMonad m => ParserT [Char] ParserState m String
+hyphenedWords :: PandocMonad m => ParserT Text ParserState m Text
hyphenedWords = do
x <- wordChunk
xs <- many (try $ char '-' >> wordChunk)
- return $ intercalate "-" (x:xs)
+ return $ T.intercalate "-" (x:xs)
-wordChunk :: PandocMonad m => ParserT [Char] ParserState m String
+wordChunk :: PandocMonad m => ParserT Text ParserState m Text
wordChunk = try $ do
hd <- noneOf wordBoundaries
tl <- many ( noneOf wordBoundaries <|>
try (notFollowedBy' note *> oneOf markupChars
<* lookAhead (noneOf wordBoundaries) ) )
- return $ hd:tl
+ return $ T.pack $ hd:tl
-- | Any string
-str :: PandocMonad m => ParserT [Char] ParserState m Inlines
+str :: PandocMonad m => ParserT Text ParserState m Inlines
str = do
baseStr <- hyphenedWords
-- RedCloth compliance : if parsed word is uppercase and immediately
-- followed by parens, parens content is unconditionally word acronym
fullStr <- option baseStr $ try $ do
- guard $ all isUpper baseStr
- acro <- enclosed (char '(') (char ')') anyChar'
- return $ concat [baseStr, " (", acro, ")"]
+ guard $ T.all isUpper baseStr
+ acro <- T.pack <$> enclosed (char '(') (char ')') anyChar'
+ return $ T.concat [baseStr, " (", acro, ")"]
updateLastStrPos
return $ B.str fullStr
-- | Some number of space chars
-whitespace :: PandocMonad m => ParserT [Char] st m Inlines
+whitespace :: PandocMonad m => ParserT Text st m Inlines
whitespace = many1 spaceChar >> return B.space <?> "whitespace"
-- | In Textile, an isolated endline character is a line break
-endline :: PandocMonad m => ParserT [Char] ParserState m Inlines
+endline :: PandocMonad m => ParserT Text ParserState m Inlines
endline = try $ do
newline
notFollowedBy blankline
@@ -543,18 +545,18 @@ endline = try $ do
notFollowedBy rawHtmlBlock
return B.linebreak
-rawHtmlInline :: PandocMonad m => ParserT [Char] ParserState m Inlines
+rawHtmlInline :: PandocMonad m => ParserT Text ParserState m Inlines
rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
-- | Raw LaTeX Inline
-rawLaTeXInline' :: PandocMonad m => ParserT [Char] ParserState m Inlines
+rawLaTeXInline' :: PandocMonad m => ParserT Text ParserState m Inlines
rawLaTeXInline' = try $ do
guardEnabled Ext_raw_tex
B.rawInline "latex" <$> rawLaTeXInline
-- | Textile standard link syntax is "label":target. But we
-- can also have ["label":target].
-link :: PandocMonad m => ParserT [Char] ParserState m Inlines
+link :: PandocMonad m => ParserT Text ParserState m Inlines
link = try $ do
bracketed <- (True <$ char '[') <|> return False
char '"' *> notFollowedBy (oneOf " \t\n\r")
@@ -567,121 +569,122 @@ link = try $ do
else lookAhead $ space <|> eof' <|>
try (oneOf "!.,;:" *>
(space <|> newline <|> eof'))
- url <- many1Till nonspaceChar stop
+ url <- T.pack <$> many1Till nonspaceChar stop
let name' = if B.toList name == [Str "$"] then B.str url else name
return $ if attr == nullAttr
then B.link url "" name'
else B.spanWith attr $ B.link url "" name'
-- | image embedding
-image :: PandocMonad m => ParserT [Char] ParserState m Inlines
+image :: PandocMonad m => ParserT Text ParserState m Inlines
image = try $ do
char '!' >> notFollowedBy space
(ident, cls, kvs) <- attributes
let attr = case lookup "style" kvs of
Just stls -> (ident, cls, pickStylesToKVs ["width", "height"] stls)
Nothing -> (ident, cls, kvs)
- src <- many1 (noneOf " \t\n\r!(")
- alt <- option "" $ try $ char '(' *> manyTill anyChar (char ')')
+ src <- T.pack <$> many1 (noneOf " \t\n\r!(")
+ alt <- fmap T.pack $ option "" $ try $ char '(' *> manyTill anyChar (char ')')
char '!'
return $ B.imageWith attr src alt (B.str alt)
-escapedInline :: PandocMonad m => ParserT [Char] ParserState m Inlines
+escapedInline :: PandocMonad m => ParserT Text ParserState m Inlines
escapedInline = escapedEqs <|> escapedTag
-escapedEqs :: PandocMonad m => ParserT [Char] ParserState m Inlines
-escapedEqs = B.str <$>
+escapedEqs :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedEqs = B.str . T.pack <$>
try (string "==" *> manyTill anyChar' (try $ string "=="))
-- | literal text escaped btw <notextile> tags
-escapedTag :: PandocMonad m => ParserT [Char] ParserState m Inlines
-escapedTag = B.str <$>
+escapedTag :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedTag = B.str . T.pack <$>
try (string "<notextile>" *>
manyTill anyChar' (try $ string "</notextile>"))
-- | Any special symbol defined in wordBoundaries
-symbol :: PandocMonad m => ParserT [Char] ParserState m Inlines
-symbol = B.str . singleton <$> (notFollowedBy newline *>
- notFollowedBy rawHtmlBlock *>
- oneOf wordBoundaries)
+symbol :: PandocMonad m => ParserT Text ParserState m Inlines
+symbol = B.str . T.singleton <$> (notFollowedBy newline *>
+ notFollowedBy rawHtmlBlock *>
+ oneOf wordBoundaries)
-- | Inline code
-code :: PandocMonad m => ParserT [Char] ParserState m Inlines
+code :: PandocMonad m => ParserT Text ParserState m Inlines
code = code1 <|> code2
-- any character except a newline before a blank line
-anyChar' :: PandocMonad m => ParserT [Char] ParserState m Char
+anyChar' :: PandocMonad m => ParserT Text ParserState m Char
anyChar' =
satisfy (/='\n') <|>
try (char '\n' <* notFollowedBy blankline)
-code1 :: PandocMonad m => ParserT [Char] ParserState m Inlines
-code1 = B.code <$> surrounded (char '@') anyChar'
+code1 :: PandocMonad m => ParserT Text ParserState m Inlines
+code1 = B.code . T.pack <$> surrounded (char '@') anyChar'
-code2 :: PandocMonad m => ParserT [Char] ParserState m Inlines
+code2 :: PandocMonad m => ParserT Text ParserState m Inlines
code2 = do
htmlTag (tagOpen (=="tt") null)
- B.code <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt"))
+ B.code . T.pack <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt"))
-- | Html / CSS attributes
-attributes :: PandocMonad m => ParserT [Char] ParserState m Attr
+attributes :: PandocMonad m => ParserT Text ParserState m Attr
attributes = foldl (flip ($)) ("",[],[]) <$>
try (do special <- option id specialAttribute
attrs <- many attribute
return (special : attrs))
-specialAttribute :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
+specialAttribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
specialAttribute = do
alignStr <- ("center" <$ char '=') <|>
("justify" <$ try (string "<>")) <|>
("right" <$ char '>') <|>
("left" <$ char '<')
notFollowedBy spaceChar
- return $ addStyle ("text-align:" ++ alignStr)
+ return $ addStyle $ T.pack $ "text-align:" ++ alignStr
-attribute :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
+attribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
attribute = try $
(classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar
-classIdAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
+classIdAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
classIdAttr = try $ do -- (class class #id)
char '('
- ws <- words `fmap` manyTill anyChar' (char ')')
+ ws <- T.words `fmap` T.pack <$> manyTill anyChar' (char ')')
case reverse ws of
- [] -> return $ \(_,_,keyvals) -> ("",[],keyvals)
- (('#':ident'):classes') -> return $ \(_,_,keyvals) ->
- (ident',classes',keyvals)
- classes' -> return $ \(_,_,keyvals) ->
- ("",classes',keyvals)
-
-styleAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
+ []
+ -> return $ \(_,_,keyvals) -> ("",[],keyvals)
+ ((T.uncons -> Just ('#', ident')):classes')
+ -> return $ \(_,_,keyvals) -> (ident',classes',keyvals)
+ classes'
+ -> return $ \(_,_,keyvals) -> ("",classes',keyvals)
+
+styleAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
styleAttr = do
style <- try $ enclosed (char '{') (char '}') anyChar'
- return $ addStyle style
+ return $ addStyle $ T.pack style
-addStyle :: String -> Attr -> Attr
+addStyle :: Text -> Attr -> Attr
addStyle style (id',classes,keyvals) =
(id',classes,keyvals')
where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"]
- style' = style ++ ";" ++ concat [v | ("style",v) <- keyvals]
+ style' = style <> ";" <> T.concat [v | ("style",v) <- keyvals]
-langAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
+langAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
langAttr = do
lang <- try $ enclosed (char '[') (char ']') alphaNum
- return $ \(id',classes,keyvals) -> (id',classes,("lang",lang):keyvals)
+ return $ \(id',classes,keyvals) -> (id',classes,("lang",T.pack lang):keyvals)
-- | Parses material surrounded by a parser.
surrounded :: (PandocMonad m, Show t)
- => ParserT [Char] st m t -- ^ surrounding parser
- -> ParserT [Char] st m a -- ^ content parser (to be used repeatedly)
- -> ParserT [Char] st m [a]
+ => ParserT Text st m t -- ^ surrounding parser
+ -> ParserT Text st m a -- ^ content parser (to be used repeatedly)
+ -> ParserT Text st m [a]
surrounded border =
enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
simpleInline :: PandocMonad m
- => ParserT [Char] ParserState m t -- ^ surrounding parser
+ => ParserT Text ParserState m t -- ^ surrounding parser
-> (Inlines -> Inlines) -- ^ Inline constructor
- -> ParserT [Char] ParserState m Inlines -- ^ content parser (to be used repeatedly)
+ -> ParserT Text ParserState m Inlines -- ^ content parser (to be used repeatedly)
simpleInline border construct = try $ do
notAfterString
border *> notFollowedBy (oneOf " \t\n\r")
@@ -695,7 +698,7 @@ simpleInline border construct = try $ do
then body
else B.spanWith attr body
-groupedInlineMarkup :: PandocMonad m => ParserT [Char] ParserState m Inlines
+groupedInlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
groupedInlineMarkup = try $ do
char '['
sp1 <- option mempty $ B.space <$ whitespace
@@ -704,9 +707,5 @@ groupedInlineMarkup = try $ do
char ']'
return $ sp1 <> result <> sp2
--- | Create a singleton list
-singleton :: a -> [a]
-singleton x = [x]
-
-eof' :: Monad m => ParserT [Char] s m Char
+eof' :: Monad m => ParserT Text s m Char
eof' = '\n' <$ eof
diff --git a/src/Text/Pandoc/Readers/TikiWiki.hs b/src/Text/Pandoc/Readers/TikiWiki.hs
index 5daf6b0bb..501c204f5 100644
--- a/src/Text/Pandoc/Readers/TikiWiki.hs
+++ b/src/Text/Pandoc/Readers/TikiWiki.hs
@@ -43,19 +43,19 @@ readTikiWiki :: PandocMonad m
-> m Pandoc
readTikiWiki opts s = do
res <- readWithM parseTikiWiki def{ stateOptions = opts }
- (T.unpack (crFilter s) ++ "\n\n")
+ (crFilter s <> "\n\n")
case res of
Left e -> throwError e
Right d -> return d
-type TikiWikiParser = ParserT [Char] ParserState
+type TikiWikiParser = ParserT Text ParserState
--
-- utility functions
--
-tryMsg :: String -> TikiWikiParser m a -> TikiWikiParser m a
-tryMsg msg p = try p <?> msg
+tryMsg :: Text -> TikiWikiParser m a -> TikiWikiParser m a
+tryMsg msg p = try p <?> (T.unpack msg)
skip :: TikiWikiParser m a -> TikiWikiParser m ()
skip parser = Control.Monad.void parser
@@ -89,7 +89,7 @@ block = do
<|> para
skipMany blankline
when (verbosity >= INFO) $
- trace (printf "line %d: %s" (sourceLine pos) (take 60 $ show $ B.toList res))
+ trace (T.pack $ printf "line %d: %s" (sourceLine pos) (take 60 $ show $ B.toList res))
return res
blockElements :: PandocMonad m => TikiWikiParser m B.Blocks
@@ -133,7 +133,7 @@ tableRow :: PandocMonad m => TikiWikiParser m [B.Blocks]
tableRow = try $ do
-- row <- sepBy1 (many1Till inline $ oneOf "\n|") (try $ string "|" <* notFollowedBy (oneOf "|\n"))
-- return $ map (B.plain . mconcat) row
- row <- sepBy1 (many1 (noneOf "\n|") >>= parseColumn) (try $ string "|" <* notFollowedBy (oneOf "|\n"))
+ row <- sepBy1 (many1 (noneOf "\n|") >>= parseColumn . T.pack) (try $ string "|" <* notFollowedBy (oneOf "|\n"))
return $ map B.plain row
where
parseColumn x = do
@@ -342,15 +342,15 @@ listItemLine nest = lineContent >>= parseContent
lineContent = do
content <- anyLine
continuation <- optionMaybe listContinuation
- return $ filterSpaces content ++ "\n" ++ Data.Maybe.fromMaybe "" continuation
- filterSpaces = reverse . dropWhile (== ' ') . reverse
+ return $ filterSpaces content <> "\n" <> Data.Maybe.fromMaybe "" continuation
+ filterSpaces = T.dropWhileEnd (== ' ')
listContinuation = string (replicate nest '+') >> lineContent
parseContent x = do
parsed <- parseFromString (many1 inline) x
return $ mconcat $ dropWhileEnd (== B.space) parsed
-- Turn the CODE macro attributes into Pandoc code block attributes.
-mungeAttrs :: [(String, String)] -> (String, [String], [(String, String)])
+mungeAttrs :: [(Text, Text)] -> (Text, [Text], [(Text, Text)])
mungeAttrs rawAttrs = ("", classes, rawAttrs)
where
-- "colors" is TikiWiki CODE macro for "name of language to do
@@ -370,7 +370,7 @@ codeMacro = try $ do
string "{CODE("
rawAttrs <- macroAttrs
string ")}"
- body <- manyTill anyChar (try (string "{CODE}"))
+ body <- T.pack <$> manyTill anyChar (try (string "{CODE}"))
newline
if not (null rawAttrs)
then
@@ -428,9 +428,9 @@ nbsp = try $ do
htmlComment :: PandocMonad m => TikiWikiParser m B.Inlines
htmlComment = try $ do
string "~hc~"
- inner <- many1 $ noneOf "~"
+ inner <- fmap T.pack $ many1 $ noneOf "~"
string "~/hc~"
- return $ B.str $ " NOT SUPPORTED: ~hc~ (html comment opener) BEGIN: " ++ inner ++ " ~/hc~ :END "
+ return $ B.str $ " NOT SUPPORTED: ~hc~ (html comment opener) BEGIN: " <> inner <> " ~/hc~ :END "
linebreak :: PandocMonad m => TikiWikiParser m B.Inlines
linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline)
@@ -469,15 +469,15 @@ image = try $ do
let title = fromMaybe src $ lookup "desc" rawAttrs
let alt = fromMaybe title $ lookup "alt" rawAttrs
let classes = map fst $ filter (\(_,b) -> b == "" || b == "y") rawAttrs
- if not (null src)
+ if not (T.null src)
then
return $ B.imageWith ("", classes, rawAttrs) src title (B.str alt)
else
- return $ B.str $ " NOT SUPPORTED: image without src attribute BEGIN: {img " ++ printAttrs rawAttrs ++ "} :END "
+ return $ B.str $ " NOT SUPPORTED: image without src attribute BEGIN: {img " <> printAttrs rawAttrs <> "} :END "
where
- printAttrs attrs = unwords $ map (\(a, b) -> a ++ "=\"" ++ b ++ "\"") attrs
+ printAttrs attrs = T.unwords $ map (\(a, b) -> a <> "=\"" <> b <> "\"") attrs
-imageAttr :: PandocMonad m => TikiWikiParser m (String, String)
+imageAttr :: PandocMonad m => TikiWikiParser m (Text, Text)
imageAttr = try $ do
key <- many1 (noneOf "=} \t\n")
char '='
@@ -485,7 +485,7 @@ imageAttr = try $ do
value <- many1 (noneOf "}\"\n")
optional $ char '"'
optional $ char ','
- return (key, value)
+ return (T.pack key, T.pack value)
-- __strong__
@@ -500,57 +500,57 @@ emph = try $ fmap B.emph (enclosed (string "''") nestedInlines)
escapedChar :: PandocMonad m => TikiWikiParser m B.Inlines
escapedChar = try $ do
string "~"
- mNumber <- safeRead <$> many1 digit
+ mNumber <- safeRead . T.pack <$> many1 digit
string "~"
return $ B.str $
case mNumber of
- Just number -> [toEnum (number :: Int) :: Char]
- Nothing -> []
+ Just number -> T.singleton $ toEnum (number :: Int)
+ Nothing -> ""
-- UNSUPPORTED, as there doesn't seem to be any facility in calibre
-- for this
centered :: PandocMonad m => TikiWikiParser m B.Inlines
centered = try $ do
string "::"
- inner <- many1 $ noneOf ":\n"
+ inner <- fmap T.pack $ many1 $ noneOf ":\n"
string "::"
- return $ B.str $ " NOT SUPPORTED: :: (centered) BEGIN: ::" ++ inner ++ ":: :END "
+ return $ B.str $ " NOT SUPPORTED: :: (centered) BEGIN: ::" <> inner <> ":: :END "
-- UNSUPPORTED, as there doesn't seem to be any facility in calibre
-- for this
colored :: PandocMonad m => TikiWikiParser m B.Inlines
colored = try $ do
string "~~"
- inner <- many1 $ noneOf "~\n"
+ inner <- fmap T.pack $ many1 $ noneOf "~\n"
string "~~"
- return $ B.str $ " NOT SUPPORTED: ~~ (colored) BEGIN: ~~" ++ inner ++ "~~ :END "
+ return $ B.str $ " NOT SUPPORTED: ~~ (colored) BEGIN: ~~" <> inner <> "~~ :END "
-- UNSUPPORTED, as there doesn't seem to be any facility in calibre
-- for this
underlined :: PandocMonad m => TikiWikiParser m B.Inlines
underlined = try $ do
string "==="
- inner <- many1 $ noneOf "=\n"
+ inner <- fmap T.pack $ many1 $ noneOf "=\n"
string "==="
- return $ B.str $ " NOT SUPPORTED: ==== (underlined) BEGIN: ===" ++ inner ++ "=== :END "
+ return $ B.str $ " NOT SUPPORTED: ==== (underlined) BEGIN: ===" <> inner <> "=== :END "
-- UNSUPPORTED, as there doesn't seem to be any facility in calibre
-- for this
boxed :: PandocMonad m => TikiWikiParser m B.Inlines
boxed = try $ do
string "^"
- inner <- many1 $ noneOf "^\n"
+ inner <- fmap T.pack $ many1 $ noneOf "^\n"
string "^"
- return $ B.str $ " NOT SUPPORTED: ^ (boxed) BEGIN: ^" ++ inner ++ "^ :END "
+ return $ B.str $ " NOT SUPPORTED: ^ (boxed) BEGIN: ^" <> inner <> "^ :END "
-- --text--
strikeout :: PandocMonad m => TikiWikiParser m B.Inlines
strikeout = try $ fmap B.strikeout (enclosed (string "--") nestedInlines)
-nestedString :: (Show a, PandocMonad m) => TikiWikiParser m a -> TikiWikiParser m String
-nestedString end = innerSpace <|> count 1 nonspaceChar
+nestedString :: (Show a, PandocMonad m) => TikiWikiParser m a -> TikiWikiParser m Text
+nestedString end = innerSpace <|> countChar 1 nonspaceChar
where
- innerSpace = try $ many1 spaceChar <* notFollowedBy end
+ innerSpace = try $ T.pack <$> many1 spaceChar <* notFollowedBy end
breakChars :: PandocMonad m => TikiWikiParser m B.Inlines
breakChars = try $ string "%%%" >> return B.linebreak
@@ -564,7 +564,7 @@ superMacro = try $ do
string "{SUP("
manyTill anyChar (string ")}")
body <- manyTill anyChar (string "{SUP}")
- return $ B.superscript $ B.text body
+ return $ B.superscript $ B.text $ T.pack body
-- subscript: baz{TAG(tag=>sub)}sub{TAG}qux / qux{SUB()}sub2{SUB}qux
subTag :: PandocMonad m => TikiWikiParser m B.Inlines
@@ -575,22 +575,22 @@ subMacro = try $ do
string "{SUB("
manyTill anyChar (string ")}")
body <- manyTill anyChar (string "{SUB}")
- return $ B.subscript $ B.text body
+ return $ B.subscript $ B.text $ T.pack body
-- -+text+-
code :: PandocMonad m => TikiWikiParser m B.Inlines
code = try $ fmap (B.code . fromEntities) ( between (string "-+") (string "+-") nestedString)
-macroAttr :: PandocMonad m => TikiWikiParser m (String, String)
+macroAttr :: PandocMonad m => TikiWikiParser m (Text, Text)
macroAttr = try $ do
key <- many1 (noneOf "=)")
char '='
optional $ char '"'
value <- many1 (noneOf " )\"")
optional $ char '"'
- return (key, value)
+ return (T.pack key, T.pack value)
-macroAttrs :: PandocMonad m => TikiWikiParser m [(String, String)]
+macroAttrs :: PandocMonad m => TikiWikiParser m [(Text, Text)]
macroAttrs = try $ sepEndBy macroAttr spaces
-- ~np~ __not bold__ ~/np~
@@ -598,13 +598,13 @@ noparse :: PandocMonad m => TikiWikiParser m B.Inlines
noparse = try $ do
string "~np~"
body <- manyTill anyChar (string "~/np~")
- return $ B.str body
+ return $ B.str $ T.pack body
str :: PandocMonad m => TikiWikiParser m B.Inlines
-str = fmap B.str (many1 alphaNum <|> count 1 characterReference)
+str = fmap B.str (T.pack <$> many1 alphaNum <|> countChar 1 characterReference)
symbol :: PandocMonad m => TikiWikiParser m B.Inlines
-symbol = fmap B.str (count 1 nonspaceChar)
+symbol = fmap B.str (countChar 1 nonspaceChar)
-- [[not a link]
notExternalLink :: PandocMonad m => TikiWikiParser m B.Inlines
@@ -612,14 +612,14 @@ notExternalLink = try $ do
start <- string "[["
body <- many (noneOf "\n[]")
end <- string "]"
- return $ B.text (start ++ body ++ end)
+ return $ B.text $ T.pack $ start ++ body ++ end
-- [http://www.somesite.org url|Some Site title]
-- ((internal link))
--
-- The ((...)) wiki links and [...] external links are handled
-- exactly the same; this abstracts that out
-makeLink :: PandocMonad m => String -> String -> String -> TikiWikiParser m B.Inlines
+makeLink :: PandocMonad m => Text -> Text -> Text -> TikiWikiParser m B.Inlines
makeLink start middle end = try $ do
st <- getState
guard $ stateAllowLinks st
@@ -627,15 +627,15 @@ makeLink start middle end = try $ do
(url, title, anchor) <- wikiLinkText start middle end
parsedTitle <- parseFromString (many1 inline) title
setState $ st{ stateAllowLinks = True }
- return $ B.link (url++anchor) "" $mconcat parsedTitle
+ return $ B.link (url <> anchor) "" $ mconcat parsedTitle
-wikiLinkText :: PandocMonad m => String -> String -> String -> TikiWikiParser m (String, String, String)
+wikiLinkText :: PandocMonad m => Text -> Text -> Text -> TikiWikiParser m (Text, Text, Text)
wikiLinkText start middle end = do
- string start
- url <- many1 (noneOf $ middle ++ "\n")
+ string (T.unpack start)
+ url <- T.pack <$> many1 (noneOf $ T.unpack middle ++ "\n")
seg1 <- option url linkContent
seg2 <- option "" linkContent
- string end
+ string (T.unpack end)
if seg2 /= ""
then
return (url, seg2, seg1)
@@ -644,7 +644,7 @@ wikiLinkText start middle end = do
where
linkContent = do
char '|'
- many (noneOf middle)
+ T.pack <$> many (noneOf $ T.unpack middle)
externalLink :: PandocMonad m => TikiWikiParser m B.Inlines
externalLink = makeLink "[" "]|" "]"
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index 0af52e046..996a818fd 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Txt2Tags
Copyright : Copyright (C) 2014 Matthew Pickering
@@ -18,7 +19,6 @@ import Prelude
import Control.Monad (guard, void, when)
import Control.Monad.Except (catchError, throwError)
import Control.Monad.Reader (Reader, asks, runReader)
-import Data.Char (toLower)
import Data.Default
import Data.List (intercalate, transpose)
import Data.Maybe (fromMaybe)
@@ -36,13 +36,13 @@ import Text.Pandoc.Parsing hiding (space, spaces, uri)
import Text.Pandoc.Shared (compactify, compactifyDL, crFilter, escapeURI,
underlineSpan)
-type T2T = ParserT String ParserState (Reader T2TMeta)
+type T2T = ParserT Text ParserState (Reader T2TMeta)
-- | An object for the T2T macros meta information
-- the contents of each field is simply substituted verbatim into the file
data T2TMeta = T2TMeta {
- date :: String -- ^ Current date
- , mtime :: String -- ^ Last modification time of infile
+ date :: Text -- ^ Current date
+ , mtime :: Text -- ^ Last modification time of infile
, infile :: FilePath -- ^ Input file
, outfile :: FilePath -- ^ Output file
} deriving Show
@@ -63,7 +63,7 @@ getT2TMeta = do
_ -> catchError
(maximum <$> mapM getModTime inps)
(const (return ""))
- return $ T2TMeta curDate curMtime (intercalate ", " inps) outp
+ return $ T2TMeta (T.pack curDate) (T.pack curMtime) (intercalate ", " inps) outp
-- | Read Txt2Tags from an input string returning a Pandoc document
readTxt2Tags :: PandocMonad m
@@ -74,14 +74,14 @@ readTxt2Tags opts s = do
meta <- getT2TMeta
let parsed = flip runReader meta $
readWithM parseT2T (def {stateOptions = opts}) $
- T.unpack (crFilter s) ++ "\n\n"
+ crFilter s <> "\n\n"
case parsed of
Right result -> return result
Left e -> throwError e
-- | Read Txt2Tags (ignoring all macros) from an input string returning
-- a Pandoc document
--- readTxt2TagsNoMacros :: PandocMonad m => ReaderOptions -> String -> m Pandoc
+-- readTxt2TagsNoMacros :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-- readTxt2TagsNoMacros = readTxt2Tags
parseT2T :: T2T Pandoc
@@ -106,7 +106,7 @@ parseHeader = do
header :: T2T ()
header = titleline >> authorline >> dateline
-headerline :: B.ToMetaValue a => String -> T2T a -> T2T ()
+headerline :: B.ToMetaValue a => Text -> T2T a -> T2T ()
headerline field p = (() <$ try blankline)
<|> (p >>= updateState . B.setMeta field)
@@ -123,15 +123,15 @@ authorline =
dateline :: T2T ()
dateline = headerline "date" (trimInlines . mconcat <$> manyTill inline newline)
-type Keyword = String
-type Value = String
+type Keyword = Text
+type Value = Text
setting :: T2T (Keyword, Value)
setting = do
string "%!"
- keyword <- ignoreSpacesCap (many1 alphaNum)
+ keyword <- ignoreSpacesCap (many1Char alphaNum)
char ':'
- value <- ignoreSpacesCap (manyTill anyChar newline)
+ value <- ignoreSpacesCap (manyTillChar anyChar newline)
return (keyword, value)
-- Blocks
@@ -163,10 +163,10 @@ balancedTitle c = try $ do
spaces
level <- length <$> many1 (char c)
guard (level <= 5) -- Max header level 5
- heading <- manyTill (noneOf "\n\r") (count level (char c))
+ heading <- manyTillChar (noneOf "\n\r") (count level (char c))
label <- optionMaybe (enclosed (char '[') (char ']') (alphaNum <|> oneOf "_-"))
many spaceChar *> newline
- let attr = maybe nullAttr (\x -> (x, [], [])) label
+ let attr = maybe nullAttr (\x -> (T.pack x, [], [])) label
return $ B.headerWith attr level (trimInlines $ B.text heading)
para :: T2T Blocks
@@ -192,7 +192,7 @@ quote :: T2T Blocks
quote = try $ do
lookAhead tab
rawQuote <- many1 (tab *> optional spaces *> anyLine)
- contents <- parseFromString' parseBlocks (intercalate "\n" rawQuote ++ "\n\n")
+ contents <- parseFromString' parseBlocks (T.intercalate "\n" rawQuote <> "\n\n")
return $ B.blockQuote contents
commentLine :: T2T Inlines
@@ -243,17 +243,17 @@ listItem start end = try $ do
markerLength <- try start
firstLine <- anyLineNewline
blank <- option "" ("\n" <$ blankline)
- rest <- concat <$> many (listContinuation markerLength)
- parseFromString' end $ firstLine ++ blank ++ rest
+ rest <- T.concat <$> many (listContinuation markerLength)
+ parseFromString' end $ firstLine <> blank <> rest
-- continuation of a list item - indented and separated by blankline or endline.
-- Note: nested lists are parsed as continuations.
listContinuation :: Int
- -> T2T String
+ -> T2T Text
listContinuation markerLength = try $
notFollowedBy' (blankline >> blankline)
- *> (mappend <$> (concat <$> many1 listLine)
- <*> many blankline)
+ *> (mappend <$> (T.concat <$> many1 listLine)
+ <*> manyChar blankline)
where listLine = try $ indentWith markerLength *> anyLineNewline
-- Table
@@ -327,16 +327,16 @@ taggedBlock = do
-- Generic
-genericBlock :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks
+genericBlock :: Monoid a => T2T a -> (a -> Blocks) -> Text -> T2T Blocks
genericBlock p f s = blockMarkupArea p f s <|> blockMarkupLine p f s
-blockMarkupArea :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks
+blockMarkupArea :: Monoid a => T2T a -> (a -> Blocks) -> Text -> T2T Blocks
blockMarkupArea p f s = try (do
- string s *> blankline
- f . mconcat <$> manyTill p (eof <|> void (string s *> blankline)))
+ textStr s *> blankline
+ f . mconcat <$> manyTill p (eof <|> void (textStr s *> blankline)))
-blockMarkupLine :: T2T a -> (a -> Blocks) -> String -> T2T Blocks
-blockMarkupLine p f s = try (f <$> (string s *> space *> p))
+blockMarkupLine :: T2T a -> (a -> Blocks) -> Text -> T2T Blocks
+blockMarkupLine p f s = try (f <$> (textStr s *> space *> p))
-- Can be in either block or inline position
comment :: Monoid a => T2T a
@@ -385,15 +385,15 @@ italic :: T2T Inlines
italic = inlineMarkup inline B.emph '/' B.str
code :: T2T Inlines
-code = inlineMarkup ((:[]) <$> anyChar) B.code '`' id
+code = inlineMarkup (T.singleton <$> anyChar) B.code '`' id
raw :: T2T Inlines
-raw = inlineMarkup ((:[]) <$> anyChar) B.text '"' id
+raw = inlineMarkup (T.singleton <$> anyChar) B.text '"' id
tagged :: T2T Inlines
tagged = do
target <- getTarget
- inlineMarkup ((:[]) <$> anyChar) (B.rawInline target) '\'' id
+ inlineMarkup (T.singleton <$> anyChar) (B.rawInline target) '\'' id
-- Parser for markup indicated by a double character.
-- Inline markup is greedy and glued
@@ -404,33 +404,33 @@ inlineMarkup :: Monoid a
=> T2T a -- Content parser
-> (a -> Inlines) -- Constructor
-> Char -- Fence
- -> (String -> a) -- Special Case to handle ******
+ -> (Text -> a) -- Special Case to handle ******
-> T2T Inlines
inlineMarkup p f c special = try $ do
- start <- many1 (char c)
- let l = length start
+ start <- many1Char (char c)
+ let l = T.length start
guard (l >= 2)
when (l == 2) (void $ notFollowedBy space)
-- We must make sure that there is no space before the start of the
-- closing tags
- body <- optionMaybe (try $ manyTill (noneOf "\n\r")
+ body <- optionMaybe (try $ manyTillChar (noneOf "\n\r")
(try $ lookAhead (noneOf " " >> string [c,c] )))
case body of
Just middle -> do
lastChar <- anyChar
- end <- many1 (char c)
+ end <- many1Char (char c)
let parser inp = parseFromString' (mconcat <$> many p) inp
- let start' = case drop 2 start of
+ let start' = case T.drop 2 start of
"" -> mempty
xs -> special xs
- body' <- parser (middle ++ [lastChar])
- let end' = case drop 2 end of
+ body' <- parser (middle <> T.singleton lastChar)
+ let end' = case T.drop 2 end of
"" -> mempty
xs -> special xs
return $ f (start' `mappend` body' `mappend` end')
Nothing -> do -- Either bad or case such as *****
guard (l >= 5)
- let body' = replicate (l - 4) c
+ let body' = T.replicate (l - 4) $ T.singleton c
return $ f (special body')
link :: T2T Inlines
@@ -441,12 +441,12 @@ titleLink :: T2T Inlines
titleLink = try $ do
char '['
notFollowedBy space
- tokens <- sepBy1 (many $ noneOf " ]") space
+ tokens <- sepBy1 (manyChar $ noneOf " ]") space
guard (length tokens >= 2)
char ']'
let link' = last tokens
- guard $ not $ null link'
- let tit = unwords (init tokens)
+ guard $ not $ T.null link'
+ let tit = T.unwords (init tokens)
return $ B.link link' "" (B.text tit)
-- Link with image
@@ -455,7 +455,7 @@ imageLink = try $ do
char '['
body <- image
many1 space
- l <- manyTill (noneOf "\n\r ") (char ']')
+ l <- manyTillChar (noneOf "\n\r ") (char ']')
return (B.link l "" body)
macro :: T2T Inlines
@@ -466,7 +466,7 @@ macro = try $ do
maybe (return mempty) (\f -> B.str <$> asks f) (lookup name commands)
where
commands = [ ("date", date), ("mtime", mtime)
- , ("infile", infile), ("outfile", outfile)]
+ , ("infile", T.pack . infile), ("outfile", T.pack . outfile)]
-- raw URLs in text are automatically linked
url :: T2T Inlines
@@ -474,7 +474,7 @@ url = try $ do
(rawUrl, escapedUrl) <- try uri <|> emailAddress
return $ B.link rawUrl "" (B.str escapedUrl)
-uri :: T2T (String, String)
+uri :: T2T (Text, Text)
uri = try $ do
address <- t2tURI
return (address, escapeURI address)
@@ -486,25 +486,25 @@ uri = try $ do
--isT2TURI (parse t2tURI "" -> Right _) = True
--isT2TURI _ = False
-t2tURI :: T2T String
+t2tURI :: T2T Text
t2tURI = do
- start <- try ((++) <$> proto <*> urlLogin) <|> guess
- domain <- many1 chars
- sep <- many (char '/')
- form' <- option mempty ((:) <$> char '?' <*> many1 form)
- anchor' <- option mempty ((:) <$> char '#' <*> many anchor)
- return (start ++ domain ++ sep ++ form' ++ anchor')
+ start <- try ((<>) <$> proto <*> urlLogin) <|> guess
+ domain <- many1Char chars
+ sep <- manyChar (char '/')
+ form' <- option mempty (T.cons <$> char '?' <*> many1Char form)
+ anchor' <- option mempty (T.cons <$> char '#' <*> manyChar anchor)
+ return (start <> domain <> sep <> form' <> anchor')
where
protos = ["http", "https", "ftp", "telnet", "gopher", "wais"]
- proto = (++) <$> oneOfStrings protos <*> string "://"
- guess = (++) <$> (((++) <$> stringAnyCase "www" <*> option mempty ((:[]) <$> oneOf "23"))
- <|> stringAnyCase "ftp") <*> ((:[]) <$> char '.')
+ proto = (<>) <$> oneOfStrings protos <*> textStr "://"
+ guess = (<>) <$> (((<>) <$> stringAnyCase "www" <*> option mempty (T.singleton <$> oneOf "23"))
+ <|> stringAnyCase "ftp") <*> (T.singleton <$> char '.')
login = alphaNum <|> oneOf "_.-"
- pass = many (noneOf " @")
+ pass = manyChar (noneOf " @")
chars = alphaNum <|> oneOf "%._/~:,=$@&+-"
anchor = alphaNum <|> oneOf "%._0"
form = chars <|> oneOf ";*"
- urlLogin = option mempty $ try ((\x y z -> x ++ y ++ [z]) <$> many1 login <*> option mempty ((:) <$> char ':' <*> pass) <*> char '@')
+ urlLogin = option mempty $ try ((\x y z -> x <> y <> T.singleton z) <$> many1Char login <*> option mempty (T.cons <$> char ':' <*> pass) <*> char '@')
image :: T2T Inlines
@@ -512,12 +512,12 @@ image = try $ do
-- List taken from txt2tags source
let extensions = [".jpg", ".jpeg", ".gif", ".png", ".eps", ".bmp"]
char '['
- (path, ext) <- manyUntil (noneOf "\n\t\r ") (oneOfStrings extensions)
+ (path, ext) <- manyUntilChar (noneOf "\n\t\r ") (oneOfStrings extensions)
char ']'
- return $ B.image (path ++ ext) "" mempty
+ return $ B.image (path <> ext) "" mempty
-- Characters used in markup
-specialChars :: String
+specialChars :: [Char]
specialChars = "%*-_/|:+;"
tab :: T2T Char
@@ -526,8 +526,8 @@ tab = char '\t'
space :: T2T Char
space = char ' '
-spaces :: T2T String
-spaces = many space
+spaces :: T2T Text
+spaces = manyChar space
endline :: T2T Inlines
endline = try $ do
@@ -544,17 +544,17 @@ endline = try $ do
return B.softbreak
str :: T2T Inlines
-str = try $ B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
+str = try $ B.str <$> many1Char (noneOf $ specialChars ++ "\n\r ")
whitespace :: T2T Inlines
whitespace = try $ B.space <$ spaceChar
symbol :: T2T Inlines
-symbol = B.str . (:[]) <$> oneOf specialChars
+symbol = B.str . T.singleton <$> oneOf specialChars
-- Utility
-getTarget :: T2T String
+getTarget :: T2T Text
getTarget = do
mv <- lookupMeta "target" . stateMeta <$> getState
return $ case mv of
@@ -565,5 +565,5 @@ getTarget = do
atStart :: T2T ()
atStart = (sourceColumn <$> getPosition) >>= guard . (== 1)
-ignoreSpacesCap :: T2T String -> T2T String
-ignoreSpacesCap p = map toLower <$> (spaces *> p <* spaces)
+ignoreSpacesCap :: T2T Text -> T2T Text
+ignoreSpacesCap p = T.toLower <$> (spaces *> p <* spaces)
diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs
index 27b7d7245..f7edabc48 100644
--- a/src/Text/Pandoc/Readers/Vimwiki.hs
+++ b/src/Text/Pandoc/Readers/Vimwiki.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE CPP #-}
{- |
Module : Text.Pandoc.Readers.Vimwiki
@@ -51,9 +52,10 @@ import Prelude
import Control.Monad (guard)
import Control.Monad.Except (throwError)
import Data.Default
-import Data.List (isInfixOf, isPrefixOf)
+import Data.List (isInfixOf)
import Data.Maybe
import Data.Text (Text)
+import qualified Data.Text as T
import Text.Pandoc.Builder (Blocks, Inlines, fromList, toList, trimInlines)
import qualified Text.Pandoc.Builder as B (blockQuote, bulletList, code,
codeBlockWith, definitionList,
@@ -73,12 +75,13 @@ import Text.Pandoc.Options (ReaderOptions)
import Text.Pandoc.Parsing (ParserState, ParserT, blanklines, emailAddress,
many1Till, orderedListMarker, readWithM,
registerHeader, spaceChar, stateMeta,
- stateOptions, uri)
-import Text.Pandoc.Shared (crFilter, splitBy, stringify, stripFirstAndLast,
- isURI)
+ stateOptions, uri, manyTillChar, manyChar, textStr,
+ many1Char, countChar, many1TillChar)
+import Text.Pandoc.Shared (crFilter, splitTextBy, stringify, stripFirstAndLast,
+ isURI, tshow)
import Text.Parsec.Char (alphaNum, anyChar, char, newline, noneOf, oneOf, space,
spaces, string)
-import Text.Parsec.Combinator (between, choice, count, eof, lookAhead, many1,
+import Text.Parsec.Combinator (between, choice, eof, lookAhead, many1,
manyTill, notFollowedBy, option, skipMany1)
import Text.Parsec.Prim (getState, many, try, updateState, (<|>))
@@ -128,7 +131,7 @@ block = do
, definitionList
, para
]
- trace (take 60 $ show $ toList res)
+ trace (T.take 60 $ tshow $ toList res)
return res
blockML :: PandocMonad m => VwParser m Blocks
@@ -218,32 +221,32 @@ defMarkerM = string "::" >> spaceChar
defMarkerE :: PandocMonad m => VwParser m Char
defMarkerE = string "::" >> newline
-hasDefMarkerM :: PandocMonad m => VwParser m String
-hasDefMarkerM = manyTill (noneOf "\n") (try defMarkerM)
+hasDefMarkerM :: PandocMonad m => VwParser m Text
+hasDefMarkerM = manyTillChar (noneOf "\n") (try defMarkerM)
preformatted :: PandocMonad m => VwParser m Blocks
preformatted = try $ do
many spaceChar >> string "{{{"
- attrText <- many (noneOf "\n")
+ attrText <- manyChar (noneOf "\n")
lookAhead newline
- contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}}"
+ contents <- manyTillChar anyChar (try (char '\n' >> many spaceChar >> string "}}}"
>> many spaceChar >> newline))
- if (contents /= "") && (head contents == '\n')
- then return $ B.codeBlockWith (makeAttr attrText) (tail contents)
+ if (contents /= "") && (T.head contents == '\n')
+ then return $ B.codeBlockWith (makeAttr attrText) (T.tail contents)
else return $ B.codeBlockWith (makeAttr attrText) contents
-makeAttr :: String -> Attr
+makeAttr :: Text -> Attr
makeAttr s =
- let xs = splitBy (`elem` " \t") s in
+ let xs = splitTextBy (`elem` (" \t" :: String)) s in
("", [], mapMaybe nameValue xs)
-nameValue :: String -> Maybe (String, String)
+nameValue :: Text -> Maybe (Text, Text)
nameValue s =
- let t = splitBy (== '=') s in
+ let t = splitTextBy (== '=') s in
if length t /= 2
then Nothing
else let (a, b) = (head t, last t) in
- if (length b < 2) || ((head b, last b) /= ('"', '"'))
+ if (T.length b < 2) || ((T.head b, T.last b) /= ('"', '"'))
then Nothing
else Just (a, stripFirstAndLast b)
@@ -253,16 +256,16 @@ displayMath = try $ do
many spaceChar >> string "{{$"
mathTag <- option "" mathTagParser
many space
- contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}$"
+ contents <- manyTillChar anyChar (try (char '\n' >> many spaceChar >> string "}}$"
>> many spaceChar >> newline))
let contentsWithTags
| mathTag == "" = contents
- | otherwise = "\\begin{" ++ mathTag ++ "}\n" ++ contents
- ++ "\n\\end{" ++ mathTag ++ "}"
+ | otherwise = "\\begin{" <> mathTag <> "}\n" <> contents
+ <> "\n\\end{" <> mathTag <> "}"
return $ B.para $ B.displayMath contentsWithTags
-mathTagLaTeX :: String -> String
+mathTagLaTeX :: Text -> Text
mathTagLaTeX s = case s of
"equation" -> ""
"equation*" -> ""
@@ -360,17 +363,17 @@ combineList x [y] = case toList y of
_ -> x:[y]
combineList x xs = x:xs
-listStart :: PandocMonad m => VwParser m (Int, String)
+listStart :: PandocMonad m => VwParser m (Int, Text)
listStart = try $ do
s <- many spaceChar
listType <- bulletListMarkers <|> orderedListMarkers
spaceChar
return (length s, listType)
-bulletListMarkers :: PandocMonad m => VwParser m String
+bulletListMarkers :: PandocMonad m => VwParser m Text
bulletListMarkers = "ul" <$ (char '*' <|> char '-')
-orderedListMarkers :: PandocMonad m => VwParser m String
+orderedListMarkers :: PandocMonad m => VwParser m Text
orderedListMarkers =
("ol" <$choice (orderedListMarker Decimal Period:(($OneParen) . orderedListMarker <$> [Decimal, LowerRoman, UpperRoman, LowerAlpha, UpperAlpha])))
<|> ("ol" <$ char '#')
@@ -421,9 +424,9 @@ placeholder :: PandocMonad m => VwParser m ()
placeholder = try $
choice (ph <$> ["title", "date"]) <|> noHtmlPh <|> templatePh
-ph :: PandocMonad m => String -> VwParser m ()
+ph :: PandocMonad m => Text -> VwParser m ()
ph s = try $ do
- many spaceChar >>string ('%':s) >> spaceChar
+ many spaceChar >> textStr (T.cons '%' s) >> spaceChar
contents <- trimInlines . mconcat <$> manyTill inline (lookAhead newline)
--use lookAhead because of placeholder in the whitespace parser
let meta' = B.setMeta s contents nullMeta
@@ -476,7 +479,7 @@ inlineML :: PandocMonad m => VwParser m Inlines
inlineML = choice $ whitespace endlineML:inlineList
str :: PandocMonad m => VwParser m Inlines
-str = B.str <$>many1 (noneOf $ spaceChars ++ specialChars)
+str = B.str <$> many1Char (noneOf $ spaceChars ++ specialChars)
whitespace :: PandocMonad m => VwParser m () -> VwParser m Inlines
whitespace endline = B.space <$ (skipMany1 spaceChar <|>
@@ -487,7 +490,7 @@ whitespace' :: PandocMonad m => VwParser m Inlines
whitespace' = B.space <$ skipMany1 spaceChar
special :: PandocMonad m => VwParser m Inlines
-special = B.str <$> count 1 (oneOf specialChars)
+special = B.str <$> countChar 1 (oneOf specialChars)
bareURL :: PandocMonad m => VwParser m Inlines
bareURL = try $ do
@@ -505,8 +508,8 @@ strong = try $ do
return $ B.spanWith (makeId contents, [], []) mempty
<> B.strong contents
-makeId :: Inlines -> String
-makeId i = concat (stringify <$> toList i)
+makeId :: Inlines -> Text
+makeId i = T.concat (stringify <$> toList i)
emph :: PandocMonad m => VwParser m Inlines
emph = try $ do
@@ -527,7 +530,7 @@ strikeout = try $ do
code :: PandocMonad m => VwParser m Inlines
code = try $ do
char '`'
- contents <- many1Till (noneOf "\n") (char '`')
+ contents <- many1TillChar (noneOf "\n") (char '`')
return $ B.code contents
superscript :: PandocMonad m => VwParser m Inlines
@@ -542,8 +545,8 @@ subscript = try $
link :: PandocMonad m => VwParser m Inlines
link = try $ do
string "[["
- contents <- lookAhead $ manyTill anyChar (string "]]")
- case '|' `elem` contents of
+ contents <- lookAhead $ manyTillChar anyChar (string "]]")
+ case T.any (== '|') contents of
False -> do
manyTill anyChar (string "]]")
-- not using try here because [[hell]o]] is not rendered as a link in vimwiki
@@ -552,7 +555,7 @@ link = try $ do
else "wikilink"
return $ B.link (procLink contents) tit (B.str contents)
True -> do
- url <- manyTill anyChar $ char '|'
+ url <- manyTillChar anyChar $ char '|'
lab <- mconcat <$> manyTill inline (string "]]")
let tit = if isURI url
then ""
@@ -568,52 +571,52 @@ image = try $ do
images :: PandocMonad m => Int -> VwParser m Inlines
images k
| k == 0 = do
- imgurl <- manyTill anyChar (try $ string "}}")
+ imgurl <- manyTillChar anyChar (try $ string "}}")
return $ B.image (procImgurl imgurl) "" (B.str "")
| k == 1 = do
- imgurl <- manyTill anyChar (char '|')
+ imgurl <- manyTillChar anyChar (char '|')
alt <- mconcat <$> manyTill inline (try $ string "}}")
return $ B.image (procImgurl imgurl) "" alt
| k == 2 = do
- imgurl <- manyTill anyChar (char '|')
- alt <- mconcat <$>manyTill inline (char '|')
- attrText <- manyTill anyChar (try $ string "}}")
+ imgurl <- manyTillChar anyChar (char '|')
+ alt <- mconcat <$> manyTill inline (char '|')
+ attrText <- manyTillChar anyChar (try $ string "}}")
return $ B.imageWith (makeAttr attrText) (procImgurl imgurl) "" alt
| otherwise = do
- imgurl <- manyTill anyChar (char '|')
- alt <- mconcat <$>manyTill inline (char '|')
- attrText <- manyTill anyChar (char '|')
+ imgurl <- manyTillChar anyChar (char '|')
+ alt <- mconcat <$> manyTill inline (char '|')
+ attrText <- manyTillChar anyChar (char '|')
manyTill anyChar (try $ string "}}")
return $ B.imageWith (makeAttr attrText) (procImgurl imgurl) "" alt
-procLink' :: String -> String
+procLink' :: Text -> Text
procLink' s
- | take 6 s == "local:" = "file" ++ drop 5 s
- | take 6 s == "diary:" = "diary/" ++ drop 6 s
- | or ((`isPrefixOf` s) <$> [ "http:", "https:", "ftp:", "file:", "mailto:",
+ | T.take 6 s == "local:" = "file" <> T.drop 5 s
+ | T.take 6 s == "diary:" = "diary/" <> T.drop 6 s
+ | or ((`T.isPrefixOf` s) <$> [ "http:", "https:", "ftp:", "file:", "mailto:",
"news:", "telnet:" ])
= s
| s == "" = ""
- | last s == '/' = s
+ | T.last s == '/' = s
| otherwise = s
-procLink :: String -> String
-procLink s = procLink' x ++ y
- where (x, y) = break (=='#') s
+procLink :: Text -> Text
+procLink s = procLink' x <> y
+ where (x, y) = T.break (=='#') s
-procImgurl :: String -> String
-procImgurl s = if take 6 s == "local:" then "file" ++ drop 5 s else s
+procImgurl :: Text -> Text
+procImgurl s = if T.take 6 s == "local:" then "file" <> T.drop 5 s else s
inlineMath :: PandocMonad m => VwParser m Inlines
inlineMath = try $
- B.math <$ char '$' <*> many1Till (noneOf "\n") (char '$')
+ B.math <$ char '$' <*> many1TillChar (noneOf "\n") (char '$')
tag :: PandocMonad m => VwParser m Inlines
tag = try $ do
char ':'
- s <- manyTill (noneOf spaceChars) (try (char ':' >> lookAhead space))
- guard $ not $ "::" `isInfixOf` (":" ++ s ++ ":")
- let ss = splitBy (==':') s
+ s <- manyTillChar (noneOf spaceChars) (try (char ':' >> lookAhead space))
+ guard $ not $ "::" `T.isInfixOf` (":" <> s <> ":")
+ let ss = splitTextBy (==':') s
return $ mconcat $ makeTagSpan' (head ss):(makeTagSpan <$> tail ss)
todoMark :: PandocMonad m => VwParser m Inlines
@@ -646,16 +649,16 @@ nFBTTBSB =
hasDefMarker :: PandocMonad m => VwParser m ()
hasDefMarker = () <$ manyTill (noneOf "\n") (string "::" >> oneOf spaceChars)
-makeTagSpan' :: String -> Inlines
-makeTagSpan' s = B.spanWith ('-' : s, [], []) (B.str "") <>
+makeTagSpan' :: Text -> Inlines
+makeTagSpan' s = B.spanWith (T.cons '-' s, [], []) (B.str "") <>
B.spanWith (s, ["tag"], []) (B.str s)
-makeTagSpan :: String -> Inlines
+makeTagSpan :: Text -> Inlines
makeTagSpan s = B.space <> makeTagSpan' s
-mathTagParser :: PandocMonad m => VwParser m String
+mathTagParser :: PandocMonad m => VwParser m Text
mathTagParser = do
- s <- try $ lookAhead (char '%' >> manyTill (noneOf spaceChars)
+ s <- try $ lookAhead (char '%' >> manyTillChar (noneOf spaceChars)
(try $ char '%' >> many (noneOf $ '%':spaceChars) >> space))
- char '%' >> string s >> char '%'
+ char '%' >> textStr s >> char '%'
return $ mathTagLaTeX s