aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
authordespresc <christian.j.j.despres@gmail.com>2019-11-04 16:12:37 -0500
committerJohn MacFarlane <jgm@berkeley.edu>2019-11-12 16:03:45 -0800
commit90e436d49604e3fd1ef9432fb23f6d7f6245c7fd (patch)
tree4e7f0692f989643189f1fc6786050d95e239a0ea /src/Text/Pandoc/Readers
parentd3966372f5049eea56213b069fc4d70d8af9144c (diff)
downloadpandoc-90e436d49604e3fd1ef9432fb23f6d7f6245c7fd.tar.gz
Switch to new pandoc-types and use Text instead of String [API change].
PR #5884. + Use pandoc-types 1.20 and texmath 0.12. + Text is now used instead of String, with a few exceptions. + In the MediaBag module, some of the types using Strings were switched to use FilePath instead (not Text). + In the Parsing module, new parsers `manyChar`, `many1Char`, `manyTillChar`, `many1TillChar`, `many1Till`, `manyUntil`, `mantyUntilChar` have been added: these are like their unsuffixed counterparts but pack some or all of their output. + `glob` in Text.Pandoc.Class still takes String since it seems to be intended as an interface to Glob, which uses strings. It seems to be used only once in the package, in the EPUB writer, so that is not hard to change.
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/CommonMark.hs45
-rw-r--r--src/Text/Pandoc/Readers/Creole.hs28
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs102
-rw-r--r--src/Text/Pandoc/Readers/Docx.hs76
-rw-r--r--src/Text/Pandoc/Readers/Docx/Combine.hs3
-rw-r--r--src/Text/Pandoc/Readers/Docx/Fields.hs33
-rw-r--r--src/Text/Pandoc/Readers/Docx/Lists.hs11
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs171
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse/Styles.hs48
-rw-r--r--src/Text/Pandoc/Readers/Docx/Util.hs9
-rw-r--r--src/Text/Pandoc/Readers/DokuWiki.hs187
-rw-r--r--src/Text/Pandoc/Readers/EPUB.hs53
-rw-r--r--src/Text/Pandoc/Readers/FB2.hs123
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs231
-rw-r--r--src/Text/Pandoc/Readers/Haddock.hs52
-rw-r--r--src/Text/Pandoc/Readers/Ipynb.hs79
-rw-r--r--src/Text/Pandoc/Readers/JATS.hs76
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs334
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Lang.hs16
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Parsing.hs32
-rw-r--r--src/Text/Pandoc/Readers/Man.hs32
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs426
-rw-r--r--src/Text/Pandoc/Readers/MediaWiki.hs198
-rw-r--r--src/Text/Pandoc/Readers/Muse.hs144
-rw-r--r--src/Text/Pandoc/Readers/Native.hs13
-rw-r--r--src/Text/Pandoc/Readers/OPML.hs29
-rw-r--r--src/Text/Pandoc/Readers/Odt.hs3
-rw-r--r--src/Text/Pandoc/Readers/Odt/ContentReader.hs61
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs33
-rw-r--r--src/Text/Pandoc/Readers/Odt/StyleReader.hs10
-rw-r--r--src/Text/Pandoc/Readers/Org.hs4
-rw-r--r--src/Text/Pandoc/Readers/Org/BlockStarts.hs25
-rw-r--r--src/Text/Pandoc/Readers/Org/Blocks.hs219
-rw-r--r--src/Text/Pandoc/Readers/Org/DocumentTree.hs44
-rw-r--r--src/Text/Pandoc/Readers/Org/ExportSettings.hs40
-rw-r--r--src/Text/Pandoc/Readers/Org/Inlines.hs154
-rw-r--r--src/Text/Pandoc/Readers/Org/Meta.hs72
-rw-r--r--src/Text/Pandoc/Readers/Org/ParserState.hs27
-rw-r--r--src/Text/Pandoc/Readers/Org/Parsing.hs27
-rw-r--r--src/Text/Pandoc/Readers/Org/Shared.hs50
-rw-r--r--src/Text/Pandoc/Readers/RST.hs486
-rw-r--r--src/Text/Pandoc/Readers/Roff.hs185
-rw-r--r--src/Text/Pandoc/Readers/TWiki.hs117
-rw-r--r--src/Text/Pandoc/Readers/Textile.hs273
-rw-r--r--src/Text/Pandoc/Readers/TikiWiki.hs94
-rw-r--r--src/Text/Pandoc/Readers/Txt2Tags.hs132
-rw-r--r--src/Text/Pandoc/Readers/Vimwiki.hs127
47 files changed, 2427 insertions, 2307 deletions
diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs
index 368c86d4f..40b6f77c9 100644
--- a/src/Text/Pandoc/Readers/CommonMark.hs
+++ b/src/Text/Pandoc/Readers/CommonMark.hs
@@ -1,4 +1,6 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.CommonMark
Copyright : Copyright (C) 2015-2019 John MacFarlane
@@ -18,9 +20,9 @@ where
import Prelude
import CMarkGFM
import Control.Monad.State
-import Data.List (groupBy)
import qualified Data.Set as Set
-import Data.Text (Text, unpack)
+import Data.Text (Text)
+import qualified Data.Text as T
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Emoji (emojiToInline)
@@ -40,24 +42,24 @@ readCommonMark opts s = return $
[ extTable | isEnabled Ext_pipe_tables opts ] ++
[ extAutolink | isEnabled Ext_autolink_bare_uris opts ]
-convertEmojis :: String -> [Inline]
-convertEmojis s@(':':xs) =
- case break (==':') xs of
- (ys,':':zs) ->
+convertEmojis :: Text -> [Inline]
+convertEmojis s@(T.uncons -> Just (':',xs)) =
+ case T.break (==':') xs of
+ (ys, T.uncons -> Just (':',zs)) ->
case emojiToInline ys of
Just em -> em : convertEmojis zs
- Nothing -> Str (':' : ys) : convertEmojis (':':zs)
+ Nothing -> Str (":" <> ys) : convertEmojis (":" <> zs)
_ -> [Str s]
convertEmojis s =
- case break (==':') s of
+ case T.break (==':') s of
("","") -> []
(_,"") -> [Str s]
- (xs,ys) -> Str xs:convertEmojis ys
+ (xs,ys) -> Str xs : convertEmojis ys
addHeaderIdentifiers :: ReaderOptions -> Pandoc -> Pandoc
addHeaderIdentifiers opts doc = evalState (walkM (addHeaderId opts) doc) mempty
-addHeaderId :: ReaderOptions -> Block -> State (Set.Set String) Block
+addHeaderId :: ReaderOptions -> Block -> State (Set.Set Text) Block
addHeaderId opts (Header lev (_,classes,kvs) ils) = do
ids <- get
let ident = uniqueIdent (readerExtensions opts) ils ids
@@ -82,14 +84,14 @@ addBlock _ (Node _ THEMATIC_BREAK _) =
addBlock opts (Node _ BLOCK_QUOTE nodes) =
(BlockQuote (addBlocks opts nodes) :)
addBlock opts (Node _ (HTML_BLOCK t) _)
- | isEnabled Ext_raw_html opts = (RawBlock (Format "html") (unpack t) :)
+ | isEnabled Ext_raw_html opts = (RawBlock (Format "html") t :)
| otherwise = id
-- Note: the cmark parser will never generate CUSTOM_BLOCK,
-- so we don't need to handle it:
addBlock _ (Node _ (CUSTOM_BLOCK _onEnter _onExit) _nodes) =
id
addBlock _ (Node _ (CODE_BLOCK info t) _) =
- (CodeBlock ("", take 1 (words (unpack info)), []) (unpack t) :)
+ (CodeBlock ("", take 1 (T.words info), []) t :)
addBlock opts (Node _ (HEADING lev) nodes) =
(Header lev ("",[],[]) (addInlines opts nodes) :)
addBlock opts (Node _ (LIST listAttrs) nodes) =
@@ -176,29 +178,28 @@ addInlines opts = foldr (addInline opts) []
addInline :: ReaderOptions -> Node -> [Inline] -> [Inline]
addInline opts (Node _ (TEXT t) _) = (foldr ((++) . toinl) [] clumps ++)
- where raw = unpack t
- clumps = groupBy samekind raw
+ where clumps = T.groupBy samekind t
samekind ' ' ' ' = True
samekind ' ' _ = False
samekind _ ' ' = False
samekind _ _ = True
- toinl (' ':_) = [Space]
- toinl xs = if isEnabled Ext_emoji opts
- then convertEmojis xs
- else [Str xs]
+ toinl (T.uncons -> Just (' ', _)) = [Space]
+ toinl xs = if isEnabled Ext_emoji opts
+ then convertEmojis xs
+ else [Str xs]
addInline _ (Node _ LINEBREAK _) = (LineBreak :)
addInline opts (Node _ SOFTBREAK _)
| isEnabled Ext_hard_line_breaks opts = (LineBreak :)
| otherwise = (SoftBreak :)
addInline opts (Node _ (HTML_INLINE t) _)
- | isEnabled Ext_raw_html opts = (RawInline (Format "html") (unpack t) :)
+ | isEnabled Ext_raw_html opts = (RawInline (Format "html") t :)
| otherwise = id
-- Note: the cmark parser will never generate CUSTOM_BLOCK,
-- so we don't need to handle it:
addInline _ (Node _ (CUSTOM_INLINE _onEnter _onExit) _nodes) =
id
addInline _ (Node _ (CODE t) _) =
- (Code ("",[],[]) (unpack t) :)
+ (Code ("",[],[]) t :)
addInline opts (Node _ EMPH nodes) =
(Emph (addInlines opts nodes) :)
addInline opts (Node _ STRONG nodes) =
@@ -206,7 +207,7 @@ addInline opts (Node _ STRONG nodes) =
addInline opts (Node _ STRIKETHROUGH nodes) =
(Strikeout (addInlines opts nodes) :)
addInline opts (Node _ (LINK url title) nodes) =
- (Link nullAttr (addInlines opts nodes) (unpack url, unpack title) :)
+ (Link nullAttr (addInlines opts nodes) (url, title) :)
addInline opts (Node _ (IMAGE url title) nodes) =
- (Image nullAttr (addInlines opts nodes) (unpack url, unpack title) :)
+ (Image nullAttr (addInlines opts nodes) (url, title) :)
addInline _ _ = id
diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs
index ceb63ac84..1aa1dfaa4 100644
--- a/src/Text/Pandoc/Readers/Creole.hs
+++ b/src/Text/Pandoc/Readers/Creole.hs
@@ -19,6 +19,7 @@ import Control.Monad.Except (guard, liftM2, throwError)
import qualified Data.Foldable as F
import Data.Maybe (fromMaybe)
import Data.Text (Text)
+import qualified Data.Text as T
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad (..))
import Text.Pandoc.Definition
@@ -70,7 +71,6 @@ parseCreole = do
eof
return $ B.doc bs
-
--
-- block parsers
--
@@ -92,9 +92,9 @@ nowiki = try $ fmap (B.codeBlock . mconcat) (nowikiStart
>> manyTill content nowikiEnd)
where
content = brackets <|> line
- brackets = try $ option "" ((:[]) <$> newline)
- <+> (char ' ' >> (many (char ' ') <+> string "}}}") <* eol)
- line = option "" ((:[]) <$> newline) <+> manyTill anyChar eol
+ brackets = try $ option "" (T.singleton <$> newline)
+ <+> (char ' ' >> (manyChar (char ' ') <+> textStr "}}}") <* eol)
+ line = option "" (T.singleton <$> newline) <+> manyTillChar anyChar eol
eol = lookAhead $ try $ nowikiEnd <|> newline
nowikiStart = optional newline >> string "{{{" >> skipMany spaceChar >> newline
nowikiEnd = try $ linebreak >> string "}}}" >> skipMany spaceChar >> newline
@@ -106,7 +106,7 @@ header = try $ do
fmap length (many1 (char '='))
guard $ level <= 6
skipSpaces
- content <- B.str <$> manyTill (noneOf "\n") headerEnd
+ content <- B.str <$> manyTillChar (noneOf "\n") headerEnd
return $ B.header level content
where
headerEnd = try $ skipSpaces >> many (char '=') >> skipSpaces >> newline
@@ -204,7 +204,7 @@ inline = choice [ whitespace
escapedChar :: PandocMonad m => CRLParser m B.Inlines
escapedChar =
- fmap (B.str . (:[])) (try $ char '~' >> noneOf "\t\n ")
+ fmap (B.str . T.singleton) (try $ char '~' >> noneOf "\t\n ")
escapedLink :: PandocMonad m => CRLParser m B.Inlines
escapedLink = try $ do
@@ -217,8 +217,8 @@ image = try $ do
(orig, src) <- wikiImg
return $ B.image src "" (B.str orig)
where
- linkSrc = many $ noneOf "|}\n\r\t"
- linkDsc = char '|' >> many (noneOf "}\n\r\t")
+ linkSrc = manyChar $ noneOf "|}\n\r\t"
+ linkDsc = char '|' >> manyChar (noneOf "}\n\r\t")
wikiImg = try $ do
string "{{"
src <- linkSrc
@@ -231,11 +231,11 @@ link = try $ do
(orig, src) <- uriLink <|> wikiLink
return $ B.link src "" orig
where
- linkSrc = many $ noneOf "|]\n\r\t"
- linkDsc :: PandocMonad m => String -> CRLParser m B.Inlines
+ linkSrc = manyChar $ noneOf "|]\n\r\t"
+ linkDsc :: PandocMonad m => Text -> CRLParser m B.Inlines
linkDsc otxt = B.str
<$> try (option otxt
- (char '|' >> many (noneOf "]\n\r\t")))
+ (char '|' >> manyChar (noneOf "]\n\r\t")))
linkImg = try $ char '|' >> image
wikiLink = try $ do
string "[["
@@ -248,7 +248,7 @@ link = try $ do
return (B.str orig, src)
inlineNowiki :: PandocMonad m => CRLParser m B.Inlines
-inlineNowiki = B.code <$> (start >> manyTill (noneOf "\n\r") end)
+inlineNowiki = B.code <$> (start >> manyTillChar (noneOf "\n\r") end)
where
start = try $ string "{{{"
end = try $ string "}}}" >> lookAhead (noneOf "}")
@@ -271,11 +271,11 @@ linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline)
innerNewline = return B.space
symbol :: PandocMonad m => CRLParser m B.Inlines
-symbol = fmap (B.str . (:[])) (oneOf specialChars)
+symbol = fmap (B.str . T.singleton) (oneOf specialChars)
str :: PandocMonad m => CRLParser m B.Inlines
str = let strChar = noneOf ("\t\n " ++ specialChars) in
- fmap B.str (many1 strChar)
+ fmap B.str (many1Char strChar)
bold :: PandocMonad m => CRLParser m B.Inlines
bold = B.strong . mconcat <$>
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 3f44f83f8..ade9d27a3 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.DocBook
Copyright : Copyright (C) 2006-2019 John MacFarlane
@@ -536,20 +537,22 @@ instance Default DBState where
readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readDocBook _ inp = do
- let tree = normalizeTree . parseXML . handleInstructions
- $ T.unpack $ crFilter inp
+ let tree = normalizeTree . parseXML . handleInstructions $ crFilter inp
(bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree
return $ Pandoc (dbMeta st') (toList . mconcat $ bs)
-- We treat <?asciidoc-br?> specially (issue #1236), converting it
-- to <br/>, since xml-light doesn't parse the instruction correctly.
-- Other xml instructions are simply removed from the input stream.
-handleInstructions :: String -> String
-handleInstructions ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions xs
-handleInstructions xs = case break (=='<') xs of
+handleInstructions :: Text -> Text
+handleInstructions = T.pack . handleInstructions' . T.unpack
+
+handleInstructions' :: String -> String
+handleInstructions' ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions' xs
+handleInstructions' xs = case break (=='<') xs of
(ys, []) -> ys
- ([], '<':zs) -> '<' : handleInstructions zs
- (ys, zs) -> ys ++ handleInstructions zs
+ ([], '<':zs) -> '<' : handleInstructions' zs
+ (ys, zs) -> ys ++ handleInstructions' zs
getFigure :: PandocMonad m => Element -> DB m Blocks
getFigure e = do
@@ -580,13 +583,13 @@ convertEntity :: String -> String
convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
-- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> String
+attrValue :: String -> Element -> Text
attrValue attr elt =
- fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
+ maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
-- convenience function
-named :: String -> Element -> Bool
-named s e = qName (elName e) == s
+named :: Text -> Element -> Bool
+named s e = qName (elName e) == T.unpack s
--
@@ -611,7 +614,7 @@ addMetadataFromElement e = do
[z] -> getInlines z >>= addMeta fieldname
zs -> mapM getInlines zs >>= addMeta fieldname
-addMeta :: PandocMonad m => ToMetaValue a => String -> a -> DB m ()
+addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> DB m ()
addMeta field val = modify (setMeta field val)
instance HasMeta DBState where
@@ -638,10 +641,8 @@ admonitionTags :: [String]
admonitionTags = ["important","caution","note","tip","warning"]
-- Trim leading and trailing newline characters
-trimNl :: String -> String
-trimNl = reverse . go . reverse . go
- where go ('\n':xs) = xs
- go xs = xs
+trimNl :: Text -> Text
+trimNl = T.dropAround (== '\n')
-- meld text into beginning of first paragraph of Blocks.
-- assumes Blocks start with a Para; if not, does nothing.
@@ -668,7 +669,7 @@ getMediaobject e = do
h = case atVal "depth" of
"" -> []
d -> [("height", d)]
- atr = (atVal "id", words $ atVal "role", w ++ h)
+ atr = (atVal "id", T.words $ atVal "role", w ++ h)
in return (atVal "fileref", atr)
let getCaption el = case filterChild (\x -> named "caption" x
|| named "textobject" x
@@ -691,8 +692,8 @@ parseBlock :: PandocMonad m => Content -> DB m Blocks
parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
parseBlock (Text (CData _ s _)) = if all isSpace s
then return mempty
- else return $ plain $ trimInlines $ text s
-parseBlock (CRef x) = return $ plain $ str $ map toUpper x
+ else return $ plain $ trimInlines $ text $ T.pack s
+parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x
parseBlock (Elem e) =
case qName (elName e) of
"toc" -> skip -- skip TOC, since in pandoc it's autogenerated
@@ -740,7 +741,7 @@ parseBlock (Elem e) =
"refsect2" -> sect 2
"refsect3" -> sect 3
"refsection" -> gets dbSectionLevel >>= sect . (+1)
- l@_ | l `elem` admonitionTags -> parseAdmonition l
+ l@_ | l `elem` admonitionTags -> parseAdmonition $ T.pack l
"area" -> skip
"areaset" -> skip
"areaspec" -> skip
@@ -800,7 +801,7 @@ parseBlock (Elem e) =
"subtitle" -> return mempty -- handled in parent element
_ -> skip >> getBlocks e
where skip = do
- lift $ report $ IgnoredElement $ qName (elName e)
+ lift $ report $ IgnoredElement $ T.pack $ qName (elName e)
return mempty
parseMixed container conts = do
@@ -818,7 +819,7 @@ parseBlock (Elem e) =
"" -> []
x -> [x]
return $ codeBlockWith (attrValue "id" e, classes', [])
- $ trimNl $ strContentRecursive e
+ $ trimNl $ T.pack $ strContentRecursive e
parseBlockquote = do
attrib <- case filterChild (named "attribution") e of
Nothing -> return mempty
@@ -871,9 +872,9 @@ parseBlock (Elem e) =
_ -> AlignDefault
let toWidth c = case findAttr (unqual "colwidth") c of
Just w -> fromMaybe 0
- $ safeRead $ '0': filter (\x ->
+ $ safeRead $ "0" <> T.filter (\x ->
(x >= '0' && x <= '9')
- || x == '.') w
+ || x == '.') (T.pack w)
Nothing -> 0 :: Double
let numrows = case bodyrows of
[] -> 0
@@ -938,9 +939,9 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
elementToStr x = x
parseInline :: PandocMonad m => Content -> DB m Inlines
-parseInline (Text (CData _ s _)) = return $ text s
+parseInline (Text (CData _ s _)) = return $ text $ T.pack s
parseInline (CRef ref) =
- return $ maybe (text $ map toUpper ref) text $ lookupEntity ref
+ return $ maybe (text $ T.toUpper $ T.pack ref) (text . T.pack) $ lookupEntity ref
parseInline (Elem e) =
case qName (elName e) of
"equation" -> equation e displayMath
@@ -980,7 +981,7 @@ parseInline (Elem e) =
"constant" -> codeWithLang
"userinput" -> codeWithLang
"varargs" -> return $ code "(...)"
- "keycap" -> return (str $ strContent e)
+ "keycap" -> return (str $ T.pack $ strContent e)
"keycombo" -> keycombo <$>
mapM parseInline (elContent e)
"menuchoice" -> menuchoice <$>
@@ -992,20 +993,20 @@ parseInline (Elem e) =
let title = case attrValue "endterm" e of
"" -> maybe "???" xrefTitleByElem
(findElementById linkend content)
- endterm -> maybe "???" strContent
+ endterm -> maybe "???" (T.pack . strContent)
(findElementById endterm content)
- return $ link ('#' : linkend) "" (text title)
- "email" -> return $ link ("mailto:" ++ strContent e) ""
- $ str $ strContent e
- "uri" -> return $ link (strContent e) "" $ str $ strContent e
+ return $ link ("#" <> linkend) "" (text title)
+ "email" -> return $ link ("mailto:" <> T.pack (strContent e)) ""
+ $ str $ T.pack $ strContent e
+ "uri" -> return $ link (T.pack $ strContent e) "" $ str $ T.pack $ strContent e
"ulink" -> link (attrValue "url" e) "" <$> innerInlines
"link" -> do
ils <- innerInlines
let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
- Just h -> h
- _ -> '#' : attrValue "linkend" e
+ Just h -> T.pack h
+ _ -> "#" <> attrValue "linkend" e
let ils' = if ils == mempty then str href else ils
- let attr = (attrValue "id" e, words $ attrValue "role" e, [])
+ let attr = (attrValue "id" e, T.words $ attrValue "role" e, [])
return $ linkWith attr href "" ils'
"foreignphrase" -> emph <$> innerInlines
"emphasis" -> case attrValue "role" e of
@@ -1023,7 +1024,7 @@ parseInline (Elem e) =
"br" -> return linebreak
_ -> skip >> innerInlines
where skip = do
- lift $ report $ IgnoredElement $ qName (elName e)
+ lift $ report $ IgnoredElement $ T.pack $ qName (elName e)
return mempty
innerInlines = (trimInlines . mconcat) <$>
@@ -1032,7 +1033,7 @@ parseInline (Elem e) =
let classes' = case attrValue "language" e of
"" -> []
l -> [l]
- return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e
+ return $ codeWith (attrValue "id" e,classes',[]) $ T.pack $ strContentRecursive e
simpleList = (mconcat . intersperse (str "," <> space)) <$> mapM getInlines
(filterChildren (named "member") e)
segmentedList = do
@@ -1062,8 +1063,8 @@ parseInline (Elem e) =
-- if there's no such attribute, employ some heuristics based on what
-- docbook-xsl does.
xrefTitleByElem el
- | not (null xrefLabel) = xrefLabel
- | otherwise = case qName (elName el) of
+ | not (T.null xrefLabel) = xrefLabel
+ | otherwise = case qName (elName el) of
"chapter" -> descendantContent "title" el
"section" -> descendantContent "title" el
"sect1" -> descendantContent "title" el
@@ -1073,10 +1074,10 @@ parseInline (Elem e) =
"sect5" -> descendantContent "title" el
"cmdsynopsis" -> descendantContent "command" el
"funcsynopsis" -> descendantContent "function" el
- _ -> qName (elName el) ++ "_title"
+ _ -> T.pack $ qName (elName el) ++ "_title"
where
xrefLabel = attrValue "xreflabel" el
- descendantContent name = maybe "???" strContent
+ descendantContent name = maybe "???" (T.pack . strContent)
. filterElementName (\n -> qName n == name)
-- | Extract a math equation from an element
@@ -1088,20 +1089,20 @@ equation
:: Monad m
=> Element
-- ^ The element from which to extract a mathematical equation
- -> (String -> Inlines)
+ -> (Text -> Inlines)
-- ^ A constructor for some Inlines, taking the TeX code as input
-> m Inlines
equation e constructor =
- return $ mconcat $ map constructor $ mathMLEquations ++ latexEquations
+ return $ mconcat $ map constructor $ mathMLEquations <> latexEquations
where
- mathMLEquations :: [String]
+ mathMLEquations :: [Text]
mathMLEquations = map writeTeX $ rights $ readMath
(\x -> qName (elName x) == "math" && qPrefix (elName x) == Just "mml")
- (readMathML . showElement)
+ (readMathML . T.pack . showElement)
- latexEquations :: [String]
+ latexEquations :: [Text]
latexEquations = readMath (\x -> qName (elName x) == "mathphrase")
- (concat . fmap showVerbatimCData . elContent)
+ (T.concat . fmap showVerbatimCData . elContent)
readMath :: (Element -> Bool) -> (Element -> b) -> [b]
readMath childPredicate fromElement =
@@ -1111,9 +1112,10 @@ equation e constructor =
-- | Get the actual text stored in a CData block. 'showContent'
-- returns the text still surrounded by the [[CDATA]] tags.
-showVerbatimCData :: Content -> String
-showVerbatimCData (Text (CData _ d _)) = d
-showVerbatimCData c = showContent c
+showVerbatimCData :: Content -> Text
+showVerbatimCData (Text (CData _ d _)) = T.pack d
+showVerbatimCData c = T.pack $ showContent c
+
-- | Set the prefix of a name to 'Nothing'
removePrefix :: QName -> QName
diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs
index 387c3c7e2..cd4ff01db 100644
--- a/src/Text/Pandoc/Readers/Docx.hs
+++ b/src/Text/Pandoc/Readers/Docx.hs
@@ -4,6 +4,7 @@
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.Docx
Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
@@ -68,12 +69,12 @@ import Data.Default (Default)
import Data.List (delete, intersect)
import Data.Char (isSpace)
import qualified Data.Map as M
+import qualified Data.Text as T
import Data.Maybe (isJust, fromMaybe)
import Data.Sequence (ViewL (..), viewl)
import qualified Data.Sequence as Seq
import qualified Data.Set as Set
import Text.Pandoc.Builder
--- import Text.Pandoc.Definition
import Text.Pandoc.MediaBag (MediaBag)
import Text.Pandoc.Options
import Text.Pandoc.Readers.Docx.Combine
@@ -101,14 +102,14 @@ readDocx opts bytes
readDocx _ _ =
throwError $ PandocSomeError "couldn't parse docx file"
-data DState = DState { docxAnchorMap :: M.Map String String
- , docxAnchorSet :: Set.Set String
- , docxImmedPrevAnchor :: Maybe String
+data DState = DState { docxAnchorMap :: M.Map T.Text T.Text
+ , docxAnchorSet :: Set.Set T.Text
+ , docxImmedPrevAnchor :: Maybe T.Text
, docxMediaBag :: MediaBag
, docxDropCap :: Inlines
-- keep track of (numId, lvl) values for
-- restarting
- , docxListState :: M.Map (String, String) Integer
+ , docxListState :: M.Map (T.Text, T.Text) Integer
, docxPrevPara :: Inlines
}
@@ -142,7 +143,7 @@ spansToKeep = []
divsToKeep :: [ParaStyleName]
divsToKeep = ["Definition", "Definition Term"]
-metaStyles :: M.Map ParaStyleName String
+metaStyles :: M.Map ParaStyleName T.Text
metaStyles = M.fromList [ ("Title", "title")
, ("Subtitle", "subtitle")
, ("Author", "author")
@@ -167,7 +168,7 @@ isEmptyPar (Paragraph _ parParts) =
isEmptyElem _ = True
isEmptyPar _ = False
-bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map String MetaValue)
+bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map T.Text MetaValue)
bodyPartsToMeta' [] = return M.empty
bodyPartsToMeta' (bp : bps)
| (Paragraph pPr parParts) <- bp
@@ -232,22 +233,22 @@ runElemToInlines Tab = space
runElemToInlines SoftHyphen = text "\xad"
runElemToInlines NoBreakHyphen = text "\x2011"
-runElemToString :: RunElem -> String
-runElemToString (TextRun s) = s
-runElemToString LnBrk = ['\n']
-runElemToString Tab = ['\t']
-runElemToString SoftHyphen = ['\xad']
-runElemToString NoBreakHyphen = ['\x2011']
+runElemToText :: RunElem -> T.Text
+runElemToText (TextRun s) = s
+runElemToText LnBrk = T.singleton '\n'
+runElemToText Tab = T.singleton '\t'
+runElemToText SoftHyphen = T.singleton '\xad'
+runElemToText NoBreakHyphen = T.singleton '\x2011'
-runToString :: Run -> String
-runToString (Run _ runElems) = concatMap runElemToString runElems
-runToString _ = ""
+runToText :: Run -> T.Text
+runToText (Run _ runElems) = T.concat $ map runElemToText runElems
+runToText _ = ""
-parPartToString :: ParPart -> String
-parPartToString (PlainRun run) = runToString run
-parPartToString (InternalHyperLink _ runs) = concatMap runToString runs
-parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs
-parPartToString _ = ""
+parPartToText :: ParPart -> T.Text
+parPartToText (PlainRun run) = runToText run
+parPartToText (InternalHyperLink _ runs) = T.concat $ map runToText runs
+parPartToText (ExternalHyperLink _ runs) = T.concat $ map runToText runs
+parPartToText _ = ""
blacklistedCharStyles :: [CharStyleName]
blacklistedCharStyles = ["Hyperlink"]
@@ -310,7 +311,7 @@ runToInlines :: PandocMonad m => Run -> DocxContext m Inlines
runToInlines (Run rs runElems)
| maybe False isCodeCharStyle $ rParentStyle rs = do
rPr <- resolveDependentRunStyle rs
- let codeString = code $ concatMap runElemToString runElems
+ let codeString = code $ T.concat $ map runElemToText runElems
return $ case rVertAlign rPr of
Just SupScrpt -> superscript codeString
Just SubScrpt -> subscript codeString
@@ -328,17 +329,17 @@ runToInlines (Endnote bps) = do
return $ note blksList
runToInlines (InlineDrawing fp title alt bs ext) = do
(lift . lift) $ P.insertMedia fp Nothing bs
- return $ imageWith (extentToAttr ext) fp title $ text alt
+ return $ imageWith (extentToAttr ext) (T.pack fp) title $ text alt
runToInlines InlineChart = return $ spanWith ("", ["chart"], []) $ text "[CHART]"
extentToAttr :: Extent -> Attr
extentToAttr (Just (w, h)) =
("", [], [("width", showDim w), ("height", showDim h)] )
where
- showDim d = show (d / 914400) ++ "in"
+ showDim d = tshow (d / 914400) <> "in"
extentToAttr _ = nullAttr
-blocksToInlinesWarn :: PandocMonad m => String -> Blocks -> DocxContext m Inlines
+blocksToInlinesWarn :: PandocMonad m => T.Text -> Blocks -> DocxContext m Inlines
blocksToInlinesWarn cmtId blks = do
let blkList = toList blks
notParaOrPlain :: Block -> Bool
@@ -347,7 +348,7 @@ blocksToInlinesWarn cmtId blks = do
notParaOrPlain _ = True
unless ( not (any notParaOrPlain blkList)) $
lift $ P.report $ DocxParserWarning $
- "Docx comment " ++ cmtId ++ " will not retain formatting"
+ "Docx comment " <> cmtId <> " will not retain formatting"
return $ blocksToInlines' blkList
-- The majority of work in this function is done in the primed
@@ -440,12 +441,12 @@ parPartToInlines' (BookMark _ anchor) =
return $ spanWith (newAnchor, ["anchor"], []) mempty
parPartToInlines' (Drawing fp title alt bs ext) = do
(lift . lift) $ P.insertMedia fp Nothing bs
- return $ imageWith (extentToAttr ext) fp title $ text alt
+ return $ imageWith (extentToAttr ext) (T.pack fp) title $ text alt
parPartToInlines' Chart =
return $ spanWith ("", ["chart"], []) $ text "[CHART]"
parPartToInlines' (InternalHyperLink anchor runs) = do
ils <- smushInlines <$> mapM runToInlines runs
- return $ link ('#' : anchor) "" ils
+ return $ link ("#" <> anchor) "" ils
parPartToInlines' (ExternalHyperLink target runs) = do
ils <- smushInlines <$> mapM runToInlines runs
return $ link target "" ils
@@ -463,7 +464,7 @@ isAnchorSpan (Span (_, classes, kvs) _) =
null kvs
isAnchorSpan _ = False
-dummyAnchors :: [String]
+dummyAnchors :: [T.Text]
dummyAnchors = ["_GoBack"]
makeHeaderAnchor :: PandocMonad m => Blocks -> DocxContext m Blocks
@@ -477,7 +478,7 @@ makeHeaderAnchor' (Header n (ident, classes, kvs) ils)
, (Span (anchIdent, ["anchor"], _) cIls) <- c = do
hdrIDMap <- gets docxAnchorMap
exts <- readerExtensions <$> asks docxOptions
- let newIdent = if null ident
+ let newIdent = if T.null ident
then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap)
else ident
newIls = concatMap f ils where f il | il == c = cIls
@@ -490,7 +491,7 @@ makeHeaderAnchor' (Header n (ident, classes, kvs) ils) =
do
hdrIDMap <- gets docxAnchorMap
exts <- readerExtensions <$> asks docxOptions
- let newIdent = if null ident
+ let newIdent = if T.null ident
then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap)
else ident
modify $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap}
@@ -558,8 +559,8 @@ parStyleToTransform pPr
else transform
parStyleToTransform _ = return id
-normalizeToClassName :: (FromStyleName a) => a -> String
-normalizeToClassName = map go . fromStyleName
+normalizeToClassName :: (FromStyleName a) => a -> T.Text
+normalizeToClassName = T.map go . fromStyleName
where go c | isSpace c = '-'
| otherwise = c
@@ -574,7 +575,8 @@ bodyPartToBlocks (Paragraph pPr parparts)
return $
transform $
codeBlock $
- concatMap parPartToString parparts
+ T.concat $
+ map parPartToText parparts
| Just (style, n) <- pHeading pPr = do
ils <-local (\s-> s{docxInHeaderBlock=True})
(smushInlines <$> mapM parPartToInlines parparts)
@@ -646,7 +648,7 @@ bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do
, ("num-id", numId)
, ("format", fmt)
, ("text", txt)
- , ("start", show start)
+ , ("start", tshow start)
]
modify $ \st -> st{ docxListState =
-- expire all the continuation data for lists of level > this one:
@@ -705,12 +707,12 @@ bodyPartToBlocks (OMathPara e) =
-- replace targets with generated anchors.
rewriteLink' :: PandocMonad m => Inline -> DocxContext m Inline
-rewriteLink' l@(Link attr ils ('#':target, title)) = do
+rewriteLink' l@(Link attr ils (T.uncons -> Just ('#',target), title)) = do
anchorMap <- gets docxAnchorMap
case M.lookup target anchorMap of
Just newTarget -> do
modify $ \s -> s{docxAnchorSet = Set.insert newTarget (docxAnchorSet s)}
- return $ Link attr ils ('#':newTarget, title)
+ return $ Link attr ils ("#" <> newTarget, title)
Nothing -> do
modify $ \s -> s{docxAnchorSet = Set.insert target (docxAnchorSet s)}
return l
diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs
index da40a80ea..82791d669 100644
--- a/src/Text/Pandoc/Readers/Docx/Combine.hs
+++ b/src/Text/Pandoc/Readers/Docx/Combine.hs
@@ -1,7 +1,8 @@
-{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE TypeSynonymInstances #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Docx.Combine
Copyright : © 2014-2019 Jesse Rosenthal <jrosenthal@jhu.edu>,
diff --git a/src/Text/Pandoc/Readers/Docx/Fields.hs b/src/Text/Pandoc/Readers/Docx/Fields.hs
index e7a916f1c..05d9dd697 100644
--- a/src/Text/Pandoc/Readers/Docx/Fields.hs
+++ b/src/Text/Pandoc/Readers/Docx/Fields.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Docx.Fields
Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
@@ -16,16 +17,18 @@ module Text.Pandoc.Readers.Docx.Fields ( FieldInfo(..)
) where
import Prelude
+import Data.Functor (($>))
+import qualified Data.Text as T
import Text.Parsec
-import Text.Parsec.String (Parser)
+import Text.Parsec.Text (Parser)
-type URL = String
+type URL = T.Text
data FieldInfo = HyperlinkField URL
| UnknownField
deriving (Show)
-parseFieldInfo :: String -> Either ParseError FieldInfo
+parseFieldInfo :: T.Text -> Either ParseError FieldInfo
parseFieldInfo = parse fieldInfo ""
fieldInfo :: Parser FieldInfo
@@ -34,31 +37,31 @@ fieldInfo =
<|>
return UnknownField
-escapedQuote :: Parser String
-escapedQuote = string "\\\""
+escapedQuote :: Parser T.Text
+escapedQuote = string "\\\"" $> "\\\""
-inQuotes :: Parser String
+inQuotes :: Parser T.Text
inQuotes =
- (try escapedQuote) <|> (anyChar >>= (\c -> return [c]))
+ (try escapedQuote) <|> (anyChar >>= (\c -> return $ T.singleton c))
-quotedString :: Parser String
+quotedString :: Parser T.Text
quotedString = do
char '"'
- concat <$> manyTill inQuotes (try (char '"'))
+ T.concat <$> manyTill inQuotes (try (char '"'))
-unquotedString :: Parser String
-unquotedString = manyTill anyChar (try $ lookAhead space *> return () <|> eof)
+unquotedString :: Parser T.Text
+unquotedString = T.pack <$> manyTill anyChar (try $ lookAhead space *> return () <|> eof)
-fieldArgument :: Parser String
+fieldArgument :: Parser T.Text
fieldArgument = quotedString <|> unquotedString
-- there are other switches, but this is the only one I've seen in the wild so far, so it's the first one I'll implement. See §17.16.5.25
-hyperlinkSwitch :: Parser (String, String)
+hyperlinkSwitch :: Parser (T.Text, T.Text)
hyperlinkSwitch = do
sw <- string "\\l"
spaces
farg <- fieldArgument
- return (sw, farg)
+ return (T.pack sw, farg)
hyperlink :: Parser URL
hyperlink = do
@@ -68,6 +71,6 @@ hyperlink = do
farg <- fieldArgument
switches <- spaces *> many hyperlinkSwitch
let url = case switches of
- ("\\l", s) : _ -> farg ++ ('#': s)
+ ("\\l", s) : _ -> farg <> "#" <> s
_ -> farg
return url
diff --git a/src/Text/Pandoc/Readers/Docx/Lists.hs b/src/Text/Pandoc/Readers/Docx/Lists.hs
index eb24640c5..b7b7a3835 100644
--- a/src/Text/Pandoc/Readers/Docx/Lists.hs
+++ b/src/Text/Pandoc/Readers/Docx/Lists.hs
@@ -22,6 +22,7 @@ import Prelude
import Data.List
import Data.Maybe
import Data.String (fromString)
+import qualified Data.Text as T
import Text.Pandoc.Generic (bottomUp)
import Text.Pandoc.JSON
import Text.Pandoc.Readers.Docx.Parse (ParaStyleName)
@@ -45,20 +46,20 @@ getNumId _ = Nothing
getNumIdN :: Block -> Integer
getNumIdN b = fromMaybe (-1) (getNumId b)
-getText :: Block -> Maybe String
+getText :: Block -> Maybe T.Text
getText (Div (_, _, kvs) _) = lookup "text" kvs
getText _ = Nothing
data ListType = Itemized | Enumerated ListAttributes
-listStyleMap :: [(String, ListNumberStyle)]
+listStyleMap :: [(T.Text, ListNumberStyle)]
listStyleMap = [("upperLetter", UpperAlpha),
("lowerLetter", LowerAlpha),
("upperRoman", UpperRoman),
("lowerRoman", LowerRoman),
("decimal", Decimal)]
-listDelimMap :: [(String, ListNumberDelim)]
+listDelimMap :: [(T.Text, ListNumberDelim)]
listDelimMap = [("%1)", OneParen),
("(%1)", TwoParens),
("%1.", Period)]
@@ -82,11 +83,11 @@ getListType b@(Div (_, _, kvs) _) | isListItem b =
_ -> Nothing
getListType _ = Nothing
-listParagraphDivs :: [String]
+listParagraphDivs :: [T.Text]
listParagraphDivs = ["list-paragraph"]
listParagraphStyles :: [ParaStyleName]
-listParagraphStyles = map fromString listParagraphDivs
+listParagraphStyles = map (fromString . T.unpack) listParagraphDivs
-- This is a first stab at going through and attaching meaning to list
-- paragraphs, without an item marker, following a list item. We
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 889bd80fc..8598ada6f 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -1,5 +1,6 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Docx.Parse
Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
@@ -63,6 +64,7 @@ import qualified Data.ByteString.Lazy as B
import Data.Char (chr, ord, readLitChar)
import Data.List
import qualified Data.Map as M
+import qualified Data.Text as T
import Data.Maybe
import System.FilePath
import Text.Pandoc.Readers.Docx.Util
@@ -71,7 +73,7 @@ import Text.Pandoc.Shared (filteredFilesFromArchive, safeRead)
import qualified Text.Pandoc.UTF8 as UTF8
import Text.TeXMath (Exp)
import Text.TeXMath.Readers.OMML (readOMML)
-import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, stringToFont)
+import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont)
import Text.XML.Light
import qualified Text.XML.Light.Cursor as XMLC
@@ -88,7 +90,7 @@ data ReaderEnv = ReaderEnv { envNotes :: Notes
}
deriving Show
-data ReaderState = ReaderState { stateWarnings :: [String]
+data ReaderState = ReaderState { stateWarnings :: [T.Text]
, stateFldCharState :: FldCharState
}
deriving Show
@@ -119,7 +121,6 @@ eitherToD (Left _) = throwError DocxError
concatMapM :: (Monad m) => (a -> m [b]) -> [a] -> m [b]
concatMapM f xs = liftM concat (mapM f xs)
-
-- This is similar to `mapMaybe`: it maps a function returning the D
-- monad over a list, and only keeps the non-erroring return values.
mapD :: (a -> D b) -> [a] -> D [b]
@@ -178,18 +179,18 @@ type ParStyleMap = M.Map ParaStyleId ParStyle
data Numbering = Numbering NameSpaces [Numb] [AbstractNumb]
deriving Show
-data Numb = Numb String String [LevelOverride]
+data Numb = Numb T.Text T.Text [LevelOverride]
deriving Show
-- ilvl startOverride lvl
-data LevelOverride = LevelOverride String (Maybe Integer) (Maybe Level)
+data LevelOverride = LevelOverride T.Text (Maybe Integer) (Maybe Level)
deriving Show
-data AbstractNumb = AbstractNumb String [Level]
+data AbstractNumb = AbstractNumb T.Text [Level]
deriving Show
-- ilvl format string start
-data Level = Level String String String (Maybe Integer)
+data Level = Level T.Text T.Text T.Text (Maybe Integer)
deriving Show
data DocumentLocation = InDocument | InFootnote | InEndnote
@@ -199,11 +200,11 @@ data Relationship = Relationship DocumentLocation RelId Target
deriving Show
data Notes = Notes NameSpaces
- (Maybe (M.Map String Element))
- (Maybe (M.Map String Element))
+ (Maybe (M.Map T.Text Element))
+ (Maybe (M.Map T.Text Element))
deriving Show
-data Comments = Comments NameSpaces (M.Map String Element)
+data Comments = Comments NameSpaces (M.Map T.Text Element)
deriving Show
data ParIndentation = ParIndentation { leftParIndent :: Maybe Integer
@@ -238,8 +239,8 @@ defaultParagraphStyle = ParagraphStyle { pStyle = []
data BodyPart = Paragraph ParagraphStyle [ParPart]
- | ListItem ParagraphStyle String String (Maybe Level) [ParPart]
- | Tbl String TblGrid TblLook [Row]
+ | ListItem ParagraphStyle T.Text T.Text (Maybe Level) [ParPart]
+ | Tbl T.Text TblGrid TblLook [Row]
| OMathPara [Exp]
deriving Show
@@ -279,7 +280,7 @@ data ParPart = PlainRun Run
| BookMark BookMarkId Anchor
| InternalHyperLink Anchor [Run]
| ExternalHyperLink URL [Run]
- | Drawing FilePath String String B.ByteString Extent -- title, alt
+ | Drawing FilePath T.Text T.Text B.ByteString Extent -- title, alt
| Chart -- placeholder for now
| PlainOMath [Exp]
| Field FieldInfo [Run]
@@ -290,28 +291,28 @@ data ParPart = PlainRun Run
data Run = Run RunStyle [RunElem]
| Footnote [BodyPart]
| Endnote [BodyPart]
- | InlineDrawing FilePath String String B.ByteString Extent -- title, alt
+ | InlineDrawing FilePath T.Text T.Text B.ByteString Extent -- title, alt
| InlineChart -- placeholder
deriving Show
-data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen
+data RunElem = TextRun T.Text | LnBrk | Tab | SoftHyphen | NoBreakHyphen
deriving Show
-type Target = String
-type Anchor = String
-type URL = String
-type BookMarkId = String
-type RelId = String
-type ChangeId = String
-type CommentId = String
-type Author = String
-type ChangeDate = String
-type CommentDate = String
+type Target = T.Text
+type Anchor = T.Text
+type URL = T.Text
+type BookMarkId = T.Text
+type RelId = T.Text
+type ChangeId = T.Text
+type CommentId = T.Text
+type Author = T.Text
+type ChangeDate = T.Text
+type CommentDate = T.Text
archiveToDocx :: Archive -> Either DocxError Docx
archiveToDocx archive = fst <$> archiveToDocxWithWarnings archive
-archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [String])
+archiveToDocxWithWarnings :: Archive -> Either DocxError (Docx, [T.Text])
archiveToDocxWithWarnings archive = do
docXmlPath <- case getDocumentXmlPath archive of
Just fp -> Right fp
@@ -341,7 +342,7 @@ archiveToDocxWithWarnings archive = do
Right doc -> Right (Docx doc, stateWarnings st)
Left e -> Left e
-getDocumentXmlPath :: Archive -> Maybe String
+getDocumentXmlPath :: Archive -> Maybe FilePath
getDocumentXmlPath zf = do
entry <- findEntryByPath "_rels/.rels" zf
relsElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry
@@ -394,7 +395,7 @@ constructBogusParStyleData stName = ParStyle
, numInfo = Nothing
, psParentStyle = Nothing
, pStyleName = stName
- , pStyleId = ParaStyleId . filter (/=' ') . fromStyleName $ stName
+ , pStyleId = ParaStyleId . T.filter (/=' ') . fromStyleName $ stName
}
archiveToNotes :: Archive -> Notes
@@ -441,8 +442,8 @@ filePathToRelType path docXmlPath =
relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship
relElemToRelationship relType element | qName (elName element) == "Relationship" =
do
- relId <- findAttr (QName "Id" Nothing Nothing) element
- target <- findAttr (QName "Target" Nothing Nothing) element
+ relId <- findAttrText (QName "Id" Nothing Nothing) element
+ target <- findAttrText (QName "Target" Nothing Nothing) element
return $ Relationship relType relId target
relElemToRelationship _ _ = Nothing
@@ -464,7 +465,7 @@ filePathIsMedia fp =
in
(dir == "word/media/")
-lookupLevel :: String -> String -> Numbering -> Maybe Level
+lookupLevel :: T.Text -> T.Text -> Numbering -> Maybe Level
lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
(absNumId, ovrrides) <- lookup numId $
map (\(Numb nid absnumid ovrRides) -> (nid, (absnumid, ovrRides))) numbs
@@ -483,7 +484,7 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
loElemToLevelOverride :: NameSpaces -> Element -> Maybe LevelOverride
loElemToLevelOverride ns element
| isElem ns "w" "lvlOverride" element = do
- ilvl <- findAttrByName ns "w" "ilvl" element
+ ilvl <- findAttrTextByName ns "w" "ilvl" element
let startOverride = findChildByName ns "w" "startOverride" element
>>= findAttrByName ns "w" "val"
>>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
@@ -495,9 +496,9 @@ loElemToLevelOverride _ _ = Nothing
numElemToNum :: NameSpaces -> Element -> Maybe Numb
numElemToNum ns element
| isElem ns "w" "num" element = do
- numId <- findAttrByName ns "w" "numId" element
+ numId <- findAttrTextByName ns "w" "numId" element
absNumId <- findChildByName ns "w" "abstractNumId" element
- >>= findAttrByName ns "w" "val"
+ >>= findAttrTextByName ns "w" "val"
let lvlOverrides = mapMaybe
(loElemToLevelOverride ns)
(findChildrenByName ns "w" "lvlOverride" element)
@@ -507,7 +508,7 @@ numElemToNum _ _ = Nothing
absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb
absNumElemToAbsNum ns element
| isElem ns "w" "abstractNum" element = do
- absNumId <- findAttrByName ns "w" "abstractNumId" element
+ absNumId <- findAttrTextByName ns "w" "abstractNumId" element
let levelElems = findChildrenByName ns "w" "lvl" element
levels = mapMaybe (levelElemToLevel ns) levelElems
return $ AbstractNumb absNumId levels
@@ -516,11 +517,11 @@ absNumElemToAbsNum _ _ = Nothing
levelElemToLevel :: NameSpaces -> Element -> Maybe Level
levelElemToLevel ns element
| isElem ns "w" "lvl" element = do
- ilvl <- findAttrByName ns "w" "ilvl" element
+ ilvl <- findAttrTextByName ns "w" "ilvl" element
fmt <- findChildByName ns "w" "numFmt" element
- >>= findAttrByName ns "w" "val"
+ >>= findAttrTextByName ns "w" "val"
txt <- findChildByName ns "w" "lvlText" element
- >>= findAttrByName ns "w" "val"
+ >>= findAttrTextByName ns "w" "val"
let start = findChildByName ns "w" "start" element
>>= findAttrByName ns "w" "val"
>>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
@@ -544,11 +545,11 @@ archiveToNumbering :: Archive -> Numbering
archiveToNumbering archive =
fromMaybe (Numbering [] [] []) (archiveToNumbering' archive)
-elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map String Element)
+elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map T.Text Element)
elemToNotes ns notetype element
- | isElem ns "w" (notetype ++ "s") element =
+ | isElem ns "w" (notetype <> "s") element =
let pairs = mapMaybe
- (\e -> findAttrByName ns "w" "id" e >>=
+ (\e -> findAttrTextByName ns "w" "id" e >>=
(\a -> Just (a, e)))
(findChildrenByName ns "w" notetype element)
in
@@ -556,11 +557,11 @@ elemToNotes ns notetype element
M.fromList pairs
elemToNotes _ _ _ = Nothing
-elemToComments :: NameSpaces -> Element -> M.Map String Element
+elemToComments :: NameSpaces -> Element -> M.Map T.Text Element
elemToComments ns element
| isElem ns "w" "comments" element =
let pairs = mapMaybe
- (\e -> findAttrByName ns "w" "id" e >>=
+ (\e -> findAttrTextByName ns "w" "id" e >>=
(\a -> Just (a, e)))
(findChildrenByName ns "w" "comment" element)
in
@@ -632,7 +633,7 @@ testBitMask bitMaskS n =
pHeading :: ParagraphStyle -> Maybe (ParaStyleName, Int)
pHeading = getParStyleField headingLev . pStyle
-pNumInfo :: ParagraphStyle -> Maybe (String, String)
+pNumInfo :: ParagraphStyle -> Maybe (T.Text, T.Text)
pNumInfo = getParStyleField numInfo . pStyle
elemToBodyPart :: NameSpaces -> Element -> D BodyPart
@@ -640,7 +641,7 @@ elemToBodyPart ns element
| isElem ns "w" "p" element
, (c:_) <- findChildrenByName ns "m" "oMathPara" element =
do
- expsLst <- eitherToD $ readOMML $ showElement c
+ expsLst <- eitherToD $ readOMML $ T.pack $ showElement c
return $ OMathPara expsLst
elemToBodyPart ns element
| isElem ns "w" "p" element
@@ -664,7 +665,7 @@ elemToBodyPart ns element
| isElem ns "w" "tbl" element = do
let caption' = findChildByName ns "w" "tblPr" element
>>= findChildByName ns "w" "tblCaption"
- >>= findAttrByName ns "w" "val"
+ >>= findAttrTextByName ns "w" "val"
caption = fromMaybe "" caption'
grid' = case findChildByName ns "w" "tblGrid" element of
Just g -> elemToTblGrid ns g
@@ -687,10 +688,10 @@ lookupRelationship docLocation relid rels =
where
pairs = map (\(Relationship loc relid' target) -> ((loc, relid'), target)) rels
-expandDrawingId :: String -> D (FilePath, B.ByteString)
+expandDrawingId :: T.Text -> D (FilePath, B.ByteString)
expandDrawingId s = do
location <- asks envLocation
- target <- asks (lookupRelationship location s . envRelationships)
+ target <- asks (fmap T.unpack . lookupRelationship location s . envRelationships)
case target of
Just filepath -> do
bytes <- asks (lookup ("word/" ++ filepath) . envMedia)
@@ -699,12 +700,12 @@ expandDrawingId s = do
Nothing -> throwError DocxError
Nothing -> throwError DocxError
-getTitleAndAlt :: NameSpaces -> Element -> (String, String)
+getTitleAndAlt :: NameSpaces -> Element -> (T.Text, T.Text)
getTitleAndAlt ns element =
let mbDocPr = findChildByName ns "wp" "inline" element >>=
findChildByName ns "wp" "docPr"
- title = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "title")
- alt = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "descr")
+ title = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "title")
+ alt = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "descr")
in (title, alt)
elemToParPart :: NameSpaces -> Element -> D ParPart
@@ -716,7 +717,7 @@ elemToParPart ns element
= let (title, alt) = getTitleAndAlt ns drawingElem
a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
- >>= findAttrByName ns "r" "embed"
+ >>= findAttrTextByName ns "r" "embed"
in
case drawing of
Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem)
@@ -726,7 +727,7 @@ elemToParPart ns element
| isElem ns "w" "r" element
, Just _ <- findChildByName ns "w" "pict" element =
let drawing = findElement (elemName ns "v" "imagedata") element
- >>= findAttrByName ns "r" "id"
+ >>= findAttrTextByName ns "r" "id"
in
case drawing of
-- Todo: check out title and attr for deprecated format.
@@ -795,7 +796,7 @@ elemToParPart ns element
fldCharState <- gets stateFldCharState
case fldCharState of
FldCharOpen -> do
- info <- eitherToD $ parseFieldInfo $ strContent instrText
+ info <- eitherToD $ parseFieldInfo $ T.pack $ strContent instrText
modify $ \st -> st{stateFldCharState = FldCharFieldInfo info}
return NullParPart
_ -> return NullParPart
@@ -816,56 +817,56 @@ elemToParPart ns element
return $ ChangedRuns change runs
elemToParPart ns element
| isElem ns "w" "bookmarkStart" element
- , Just bmId <- findAttrByName ns "w" "id" element
- , Just bmName <- findAttrByName ns "w" "name" element =
+ , Just bmId <- findAttrTextByName ns "w" "id" element
+ , Just bmName <- findAttrTextByName ns "w" "name" element =
return $ BookMark bmId bmName
elemToParPart ns element
| isElem ns "w" "hyperlink" element
- , Just relId <- findAttrByName ns "r" "id" element = do
+ , Just relId <- findAttrTextByName ns "r" "id" element = do
location <- asks envLocation
runs <- mapD (elemToRun ns) (elChildren element)
rels <- asks envRelationships
case lookupRelationship location relId rels of
Just target ->
- case findAttrByName ns "w" "anchor" element of
- Just anchor -> return $ ExternalHyperLink (target ++ '#':anchor) runs
+ case findAttrTextByName ns "w" "anchor" element of
+ Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) runs
Nothing -> return $ ExternalHyperLink target runs
Nothing -> return $ ExternalHyperLink "" runs
elemToParPart ns element
| isElem ns "w" "hyperlink" element
- , Just anchor <- findAttrByName ns "w" "anchor" element = do
+ , Just anchor <- findAttrTextByName ns "w" "anchor" element = do
runs <- mapD (elemToRun ns) (elChildren element)
return $ InternalHyperLink anchor runs
elemToParPart ns element
| isElem ns "w" "commentRangeStart" element
- , Just cmtId <- findAttrByName ns "w" "id" element = do
+ , Just cmtId <- findAttrTextByName ns "w" "id" element = do
(Comments _ commentMap) <- asks envComments
case M.lookup cmtId commentMap of
Just cmtElem -> elemToCommentStart ns cmtElem
Nothing -> throwError WrongElem
elemToParPart ns element
| isElem ns "w" "commentRangeEnd" element
- , Just cmtId <- findAttrByName ns "w" "id" element =
+ , Just cmtId <- findAttrTextByName ns "w" "id" element =
return $ CommentEnd cmtId
elemToParPart ns element
| isElem ns "m" "oMath" element =
- fmap PlainOMath (eitherToD $ readOMML $ showElement element)
+ fmap PlainOMath (eitherToD $ readOMML $ T.pack $ showElement element)
elemToParPart _ _ = throwError WrongElem
elemToCommentStart :: NameSpaces -> Element -> D ParPart
elemToCommentStart ns element
| isElem ns "w" "comment" element
- , Just cmtId <- findAttrByName ns "w" "id" element
- , Just cmtAuthor <- findAttrByName ns "w" "author" element
- , Just cmtDate <- findAttrByName ns "w" "date" element = do
+ , Just cmtId <- findAttrTextByName ns "w" "id" element
+ , Just cmtAuthor <- findAttrTextByName ns "w" "author" element
+ , Just cmtDate <- findAttrTextByName ns "w" "date" element = do
bps <- mapD (elemToBodyPart ns) (elChildren element)
return $ CommentStart cmtId cmtAuthor cmtDate bps
elemToCommentStart _ _ = throwError WrongElem
-lookupFootnote :: String -> Notes -> Maybe Element
+lookupFootnote :: T.Text -> Notes -> Maybe Element
lookupFootnote s (Notes _ fns _) = fns >>= M.lookup s
-lookupEndnote :: String -> Notes -> Maybe Element
+lookupEndnote :: T.Text -> Notes -> Maybe Element
lookupEndnote s (Notes _ _ ens) = ens >>= M.lookup s
elemToExtent :: Element -> Extent
@@ -876,7 +877,7 @@ elemToExtent drawingElem =
where
wp_ns = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem
- >>= findAttr (QName at Nothing Nothing) >>= safeRead
+ >>= findAttr (QName at Nothing Nothing) >>= safeRead . T.pack
childElemToRun :: NameSpaces -> Element -> D Run
@@ -887,7 +888,7 @@ childElemToRun ns element
= let (title, alt) = getTitleAndAlt ns element
a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
- >>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
+ >>= findAttrText (QName "embed" (lookup "r" ns) (Just "r"))
in
case drawing of
Just s -> expandDrawingId s >>=
@@ -900,7 +901,7 @@ childElemToRun ns element
= return InlineChart
childElemToRun ns element
| isElem ns "w" "footnoteReference" element
- , Just fnId <- findAttrByName ns "w" "id" element = do
+ , Just fnId <- findAttrTextByName ns "w" "id" element = do
notes <- asks envNotes
case lookupFootnote fnId notes of
Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e)
@@ -908,7 +909,7 @@ childElemToRun ns element
Nothing -> return $ Footnote []
childElemToRun ns element
| isElem ns "w" "endnoteReference" element
- , Just enId <- findAttrByName ns "w" "id" element = do
+ , Just enId <- findAttrTextByName ns "w" "id" element = do
notes <- asks envNotes
case lookupEndnote enId notes of
Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e)
@@ -961,15 +962,15 @@ getParStyleField _ _ = Nothing
getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange
getTrackedChange ns element
| isElem ns "w" "ins" element || isElem ns "w" "moveTo" element
- , Just cId <- findAttrByName ns "w" "id" element
- , Just cAuthor <- findAttrByName ns "w" "author" element
- , Just cDate <- findAttrByName ns "w" "date" element =
+ , Just cId <- findAttrTextByName ns "w" "id" element
+ , Just cAuthor <- findAttrTextByName ns "w" "author" element
+ , Just cDate <- findAttrTextByName ns "w" "date" element =
Just $ TrackedChange Insertion (ChangeInfo cId cAuthor cDate)
getTrackedChange ns element
| isElem ns "w" "del" element || isElem ns "w" "moveFrom" element
- , Just cId <- findAttrByName ns "w" "id" element
- , Just cAuthor <- findAttrByName ns "w" "author" element
- , Just cDate <- findAttrByName ns "w" "date" element =
+ , Just cId <- findAttrTextByName ns "w" "id" element
+ , Just cAuthor <- findAttrTextByName ns "w" "author" element
+ , Just cDate <- findAttrTextByName ns "w" "date" element =
Just $ TrackedChange Deletion (ChangeInfo cId cAuthor cDate)
getTrackedChange _ _ = Nothing
@@ -978,7 +979,7 @@ elemToParagraphStyle ns element sty
| Just pPr <- findChildByName ns "w" "pPr" element =
let style =
mapMaybe
- (fmap ParaStyleId . findAttrByName ns "w" "val")
+ (fmap ParaStyleId . findAttrTextByName ns "w" "val")
(findChildrenByName ns "w" "pStyle" pPr)
in ParagraphStyle
{pStyle = mapMaybe (`M.lookup` sty) style
@@ -1010,7 +1011,7 @@ elemToRunStyleD ns element
charStyles <- asks envCharStyles
let parentSty =
findChildByName ns "w" "rStyle" rPr >>=
- findAttrByName ns "w" "val" >>=
+ findAttrTextByName ns "w" "val" >>=
flip M.lookup charStyles . CharStyleId
return $ elemToRunStyle ns element parentSty
elemToRunStyleD _ _ = return defaultRunStyle
@@ -1020,12 +1021,12 @@ elemToRunElem ns element
| isElem ns "w" "t" element
|| isElem ns "w" "delText" element
|| isElem ns "m" "t" element = do
- let str = strContent element
+ let str = T.pack $ strContent element
font <- asks envFont
case font of
Nothing -> return $ TextRun str
Just f -> return . TextRun $
- map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str
+ T.map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str
| isElem ns "w" "br" element = return LnBrk
| isElem ns "w" "tab" element = return Tab
| isElem ns "w" "softHyphen" element = return SoftHyphen
@@ -1043,11 +1044,11 @@ getSymChar ns element
| Just s <- lowerFromPrivate <$> getCodepoint
, Just font <- getFont =
case readLitChar ("\\x" ++ s) of
- [(char, _)] -> TextRun . maybe "" (:[]) $ getUnicode font char
+ [(char, _)] -> TextRun . maybe "" T.singleton $ getUnicode font char
_ -> TextRun ""
where
getCodepoint = findAttrByName ns "w" "char" element
- getFont = stringToFont =<< findAttrByName ns "w" "font" element
+ getFont = textToFont . T.pack =<< findAttrByName ns "w" "font" element
lowerFromPrivate ('F':xs) = '0':xs
lowerFromPrivate xs = xs
getSymChar _ _ = TextRun ""
@@ -1059,7 +1060,7 @@ elemToRunElems ns element
let qualName = elemName ns "w"
let font = do
fontElem <- findElement (qualName "rFonts") element
- stringToFont =<<
+ textToFont . T.pack =<<
foldr ((<|>) . (flip findAttr fontElem . qualName)) Nothing ["ascii", "hAnsi"]
local (setFont font) (mapD (elemToRunElem ns) (elChildren element))
elemToRunElems _ _ = throwError WrongElem
diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
index ac2d6fa07..f81707e92 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
@@ -3,6 +3,7 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Docx.Parse.Styles
Copyright : Copyright (C) 2014-2019 Jesse Rosenthal
@@ -46,20 +47,19 @@ import Prelude
import Codec.Archive.Zip
import Control.Applicative ((<|>))
import Control.Monad.Except
-import Data.Char (toLower)
-import Data.List
import Data.Function (on)
import Data.String (IsString(..))
import qualified Data.Map as M
+import qualified Data.Text as T
import Data.Maybe
import Data.Coerce
import Text.Pandoc.Readers.Docx.Util
import qualified Text.Pandoc.UTF8 as UTF8
import Text.XML.Light
-newtype CharStyleId = CharStyleId String
+newtype CharStyleId = CharStyleId T.Text
deriving (Show, Eq, Ord, IsString, FromStyleId)
-newtype ParaStyleId = ParaStyleId String
+newtype ParaStyleId = ParaStyleId T.Text
deriving (Show, Eq, Ord, IsString, FromStyleId)
newtype CharStyleName = CharStyleName CIString
@@ -68,25 +68,31 @@ newtype ParaStyleName = ParaStyleName CIString
deriving (Show, Eq, Ord, IsString, FromStyleName)
-- Case-insensitive comparisons
-newtype CIString = CIString String deriving (Show, IsString, FromStyleName)
+newtype CIString = CIString T.Text deriving (Show, IsString, FromStyleName)
class FromStyleName a where
- fromStyleName :: a -> String
+ fromStyleName :: a -> T.Text
instance FromStyleName String where
+ fromStyleName = T.pack
+
+instance FromStyleName T.Text where
fromStyleName = id
class FromStyleId a where
- fromStyleId :: a -> String
+ fromStyleId :: a -> T.Text
instance FromStyleId String where
+ fromStyleId = T.pack
+
+instance FromStyleId T.Text where
fromStyleId = id
instance Eq CIString where
- (==) = (==) `on` map toLower . coerce
+ (==) = (==) `on` T.toCaseFold . coerce
instance Ord CIString where
- compare = compare `on` map toLower . coerce
+ compare = compare `on` T.toCaseFold . coerce
data VertAlign = BaseLn | SupScrpt | SubScrpt
deriving Show
@@ -108,7 +114,7 @@ data RunStyle = RunStyle { isBold :: Maybe Bool
deriving Show
data ParStyle = ParStyle { headingLev :: Maybe (ParaStyleName, Int)
- , numInfo :: Maybe (String, String)
+ , numInfo :: Maybe (T.Text, T.Text)
, psParentStyle :: Maybe ParStyle
, pStyleName :: ParaStyleName
, pStyleId :: ParaStyleId
@@ -146,7 +152,7 @@ isBasedOnStyle ns element parentStyle
, Just styleType <- findAttrByName ns "w" "type" element
, styleType == cStyleType parentStyle
, Just basedOnVal <- findChildByName ns "w" "basedOn" element >>=
- findAttrByName ns "w" "val"
+ findAttrTextByName ns "w" "val"
, Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps)
| isElem ns "w" "style" element
, Just styleType <- findAttrByName ns "w" "type" element
@@ -234,7 +240,7 @@ checkOnOff _ _ _ = Nothing
elemToCharStyle :: NameSpaces
-> Element -> Maybe CharStyle -> Maybe CharStyle
elemToCharStyle ns element parentStyle
- = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element)
+ = CharStyle <$> (CharStyleId <$> findAttrTextByName ns "w" "styleId" element)
<*> getElementStyleName ns element
<*> (Just $ elemToRunStyle ns element parentStyle)
@@ -267,32 +273,32 @@ elemToRunStyle _ _ _ = defaultRunStyle
getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int)
getHeaderLevel ns element
| Just styleName <- getElementStyleName ns element
- , Just n <- stringToInteger =<<
- (stripPrefix "heading " . map toLower $
+ , Just n <- stringToInteger . T.unpack =<<
+ (T.stripPrefix "heading " . T.toLower $
fromStyleName styleName)
, n > 0 = Just (styleName, fromInteger n)
getHeaderLevel _ _ = Nothing
-getElementStyleName :: Coercible String a => NameSpaces -> Element -> Maybe a
+getElementStyleName :: Coercible T.Text a => NameSpaces -> Element -> Maybe a
getElementStyleName ns el = coerce <$>
- ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val")
- <|> findAttrByName ns "w" "styleId" el)
+ ((findChildByName ns "w" "name" el >>= findAttrTextByName ns "w" "val")
+ <|> findAttrTextByName ns "w" "styleId" el)
-getNumInfo :: NameSpaces -> Element -> Maybe (String, String)
+getNumInfo :: NameSpaces -> Element -> Maybe (T.Text, T.Text)
getNumInfo ns element = do
let numPr = findChildByName ns "w" "pPr" element >>=
findChildByName ns "w" "numPr"
lvl = fromMaybe "0" (numPr >>=
findChildByName ns "w" "ilvl" >>=
- findAttrByName ns "w" "val")
+ findAttrTextByName ns "w" "val")
numId <- numPr >>=
findChildByName ns "w" "numId" >>=
- findAttrByName ns "w" "val"
+ findAttrTextByName ns "w" "val"
return (numId, lvl)
elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle
elemToParStyleData ns element parentStyle
- | Just styleId <- findAttrByName ns "w" "styleId" element
+ | Just styleId <- findAttrTextByName ns "w" "styleId" element
, Just styleName <- getElementStyleName ns element
= Just $ ParStyle
{
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
index f4855efd2..0de1114bd 100644
--- a/src/Text/Pandoc/Readers/Docx/Util.hs
+++ b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -19,11 +19,14 @@ module Text.Pandoc.Readers.Docx.Util (
, elemToNameSpaces
, findChildByName
, findChildrenByName
+ , findAttrText
, findAttrByName
+ , findAttrTextByName
) where
import Prelude
import Data.Maybe (mapMaybe)
+import qualified Data.Text as T
import Text.XML.Light
type NameSpaces = [(String, String)]
@@ -55,7 +58,13 @@ findChildrenByName ns pref name el =
let ns' = ns ++ elemToNameSpaces el
in findChildren (elemName ns' pref name) el
+findAttrText :: QName -> Element -> Maybe T.Text
+findAttrText x = fmap T.pack . findAttr x
+
findAttrByName :: NameSpaces -> String -> String -> Element -> Maybe String
findAttrByName ns pref name el =
let ns' = ns ++ elemToNameSpaces el
in findAttr (elemName ns' pref name) el
+
+findAttrTextByName :: NameSpaces -> String -> String -> Element -> Maybe T.Text
+findAttrTextByName a b c = fmap T.pack . findAttrByName a b c
diff --git a/src/Text/Pandoc/Readers/DokuWiki.hs b/src/Text/Pandoc/Readers/DokuWiki.hs
index 60d406df1..3a92cfa19 100644
--- a/src/Text/Pandoc/Readers/DokuWiki.hs
+++ b/src/Text/Pandoc/Readers/DokuWiki.hs
@@ -2,6 +2,7 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.DokuWiki
Copyright : Copyright (C) 2018-2019 Alexander Krotov
@@ -20,8 +21,7 @@ import Control.Monad
import Control.Monad.Except (throwError)
import Data.Char (isAlphaNum, isDigit)
import qualified Data.Foldable as F
-import Data.List (intercalate, transpose, isPrefixOf, isSuffixOf)
-import Data.List.Split (splitOn)
+import Data.List (transpose)
import Data.Maybe (fromMaybe, catMaybes)
import Data.Text (Text)
import qualified Data.Text as T
@@ -31,7 +31,7 @@ import Text.Pandoc.Definition
import Text.Pandoc.Error (PandocError (PandocParsecError))
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed, nested)
-import Text.Pandoc.Shared (crFilter, trim, underlineSpan)
+import Text.Pandoc.Shared (crFilter, trim, underlineSpan, tshow)
-- | Read DokuWiki from an input string and return a Pandoc document.
readDokuWiki :: PandocMonad m
@@ -42,7 +42,7 @@ readDokuWiki opts s = do
let input = crFilter s
res <- runParserT parseDokuWiki def {stateOptions = opts } "source" input
case res of
- Left e -> throwError $ PandocParsecError (T.unpack input) e
+ Left e -> throwError $ PandocParsecError input e
Right d -> return d
type DWParser = ParserT Text ParserState
@@ -71,9 +71,9 @@ parseDokuWiki =
B.doc . mconcat <$> many block <* spaces <* eof
-- | Parse <code> and <file> attributes
-codeLanguage :: PandocMonad m => DWParser m (String, [String], [(String, String)])
+codeLanguage :: PandocMonad m => DWParser m (Text, [Text], [(Text, Text)])
codeLanguage = try $ do
- rawLang <- option "-" (spaceChar *> manyTill anyChar (lookAhead (spaceChar <|> char '>')))
+ rawLang <- option "-" (spaceChar *> manyTillChar anyChar (lookAhead (spaceChar <|> char '>')))
let attr = case rawLang of
"-" -> []
l -> [l]
@@ -81,16 +81,16 @@ codeLanguage = try $ do
-- | Generic parser for <code> and <file> tags
codeTag :: PandocMonad m
- => ((String, [String], [(String, String)]) -> String -> a)
- -> String
+ => ((Text, [Text], [(Text, Text)]) -> Text -> a)
+ -> Text
-> DWParser m a
codeTag f tag = try $ f
<$ char '<'
- <* string tag
+ <* textStr tag
<*> codeLanguage
<* manyTill anyChar (char '>')
<* optional (manyTill spaceChar eol)
- <*> manyTill anyChar (try $ string "</" <* string tag <* char '>')
+ <*> manyTillChar anyChar (try $ string "</" <* textStr tag <* char '>')
-- * Inline parsers
@@ -167,19 +167,19 @@ underlined :: PandocMonad m => DWParser m B.Inlines
underlined = try $ underlineSpan <$> enclosed (string "__") nestedInlines
nowiki :: PandocMonad m => DWParser m B.Inlines
-nowiki = try $ B.text <$ string "<nowiki>" <*> manyTill anyChar (try $ string "</nowiki>")
+nowiki = try $ B.text <$ string "<nowiki>" <*> manyTillChar anyChar (try $ string "</nowiki>")
percent :: PandocMonad m => DWParser m B.Inlines
-percent = try $ B.text <$> enclosed (string "%%") nestedString
+percent = try $ B.text <$> enclosed (string "%%") nestedText
-nestedString :: (Show a, PandocMonad m)
- => DWParser m a -> DWParser m String
-nestedString end = innerSpace <|> count 1 nonspaceChar
+nestedText :: (Show a, PandocMonad m)
+ => DWParser m a -> DWParser m Text
+nestedText end = innerSpace <|> countChar 1 nonspaceChar
where
- innerSpace = try $ many1 spaceChar <* notFollowedBy end
+ innerSpace = try $ many1Char spaceChar <* notFollowedBy end
monospaced :: PandocMonad m => DWParser m B.Inlines
-monospaced = try $ B.code <$> enclosed (string "''") nestedString
+monospaced = try $ B.code <$> enclosed (string "''") nestedText
subscript :: PandocMonad m => DWParser m B.Inlines
subscript = try $ B.subscript <$> between (string "<sub>") (try $ string "</sub>") nestedInlines
@@ -201,12 +201,12 @@ inlineFile :: PandocMonad m => DWParser m B.Inlines
inlineFile = codeTag B.codeWith "file"
inlineHtml :: PandocMonad m => DWParser m B.Inlines
-inlineHtml = try $ B.rawInline "html" <$ string "<html>" <*> manyTill anyChar (try $ string "</html>")
+inlineHtml = try $ B.rawInline "html" <$ string "<html>" <*> manyTillChar anyChar (try $ string "</html>")
inlinePhp :: PandocMonad m => DWParser m B.Inlines
-inlinePhp = try $ B.codeWith ("", ["php"], []) <$ string "<php>" <*> manyTill anyChar (try $ string "</php>")
+inlinePhp = try $ B.codeWith ("", ["php"], []) <$ string "<php>" <*> manyTillChar anyChar (try $ string "</php>")
-makeLink :: (String, String) -> B.Inlines
+makeLink :: (Text, Text) -> B.Inlines
makeLink (text, url) = B.link url "" $ B.str text
autoEmail :: PandocMonad m => DWParser m B.Inlines
@@ -220,7 +220,7 @@ autoLink = try $ do
state <- getState
guard $ stateAllowLinks state
(text, url) <- uri
- guard $ checkLink (last url)
+ guard $ checkLink (T.last url)
return $ makeLink (text, url)
where
checkLink c
@@ -234,10 +234,10 @@ nocache :: PandocMonad m => DWParser m B.Inlines
nocache = try $ mempty <$ string "~~NOCACHE~~"
str :: PandocMonad m => DWParser m B.Inlines
-str = B.str <$> (many1 alphaNum <|> count 1 characterReference)
+str = B.str <$> (many1Char alphaNum <|> countChar 1 characterReference)
symbol :: PandocMonad m => DWParser m B.Inlines
-symbol = B.str <$> count 1 nonspaceChar
+symbol = B.str <$> countChar 1 nonspaceChar
link :: PandocMonad m => DWParser m B.Inlines
link = try $ do
@@ -248,77 +248,78 @@ link = try $ do
setState $ st{ stateAllowLinks = True }
return l
-isExternalLink :: String -> Bool
-isExternalLink s =
- case dropWhile (\c -> isAlphaNum c || (c `elem` ['-', '.', '+'])) s of
- (':':'/':'/':_) -> True
- _ -> False
-
-isAbsolutePath :: String -> Bool
-isAbsolutePath ('.':_) = False
-isAbsolutePath s = ':' `elem` s
-
-normalizeDots :: String -> String
-normalizeDots path@('.':_) =
- case dropWhile (== '.') path of
- ':':_ -> path
- _ -> takeWhile (== '.') path ++ ':':dropWhile (== '.') path
-normalizeDots path = path
+isExternalLink :: Text -> Bool
+isExternalLink s = "://" `T.isPrefixOf` sSuff
+ where
+ sSuff = T.dropWhile (\c -> isAlphaNum c || (c `elem` ['-', '.', '+'])) s
+
+isAbsolutePath :: Text -> Bool
+isAbsolutePath (T.uncons -> Just ('.', _)) = False
+isAbsolutePath s = T.any (== ':') s
+
+normalizeDots :: Text -> Text
+normalizeDots path
+ | not (T.null pref) = case T.uncons suff of
+ Just (':', _) -> path
+ _ -> pref <> ":" <> suff
+ | otherwise = path
+ where
+ (pref, suff) = T.span (== '.') path
-normalizeInternalPath :: String -> String
+normalizeInternalPath :: Text -> Text
normalizeInternalPath path =
if isAbsolutePath path
then ensureAbsolute normalizedPath
else normalizedPath
where
- normalizedPath = intercalate "/" $ dropWhile (== ".") $ splitOn ":" $ normalizeDots path
- ensureAbsolute s@('/':_) = s
- ensureAbsolute s = '/':s
+ normalizedPath = T.intercalate "/" $ dropWhile (== ".") $ T.splitOn ":" $ normalizeDots path
+ ensureAbsolute s@(T.uncons -> Just ('/', _)) = s
+ ensureAbsolute s = "/" <> s
-normalizePath :: String -> String
+normalizePath :: Text -> Text
normalizePath path =
if isExternalLink path
then path
else normalizeInternalPath path
-urlToText :: String -> String
+urlToText :: Text -> Text
urlToText url =
if isExternalLink url
then url
- else reverse $ takeWhile (/= ':') $ reverse url
+ else T.takeWhileEnd (/= ':') url
-- Parse link or image
parseLink :: PandocMonad m
- => (String -> Maybe B.Inlines -> B.Inlines)
- -> String
- -> String
+ => (Text -> Maybe B.Inlines -> B.Inlines)
+ -> Text
+ -> Text
-> DWParser m B.Inlines
parseLink f l r = f
- <$ string l
- <*> many1Till anyChar (lookAhead (void (char '|') <|> try (void $ string r)))
- <*> optionMaybe (B.trimInlines . mconcat <$> (char '|' *> manyTill inline (try $ lookAhead $ string r)))
- <* string r
+ <$ textStr l
+ <*> many1TillChar anyChar (lookAhead (void (char '|') <|> try (void $ textStr r)))
+ <*> optionMaybe (B.trimInlines . mconcat <$> (char '|' *> manyTill inline (try $ lookAhead $ textStr r)))
+ <* textStr r
-- | Split Interwiki link into left and right part
-- | Return Nothing if it is not Interwiki link
-splitInterwiki :: String -> Maybe (String, String)
+splitInterwiki :: Text -> Maybe (Text, Text)
splitInterwiki path =
- case span (\c -> isAlphaNum c || c == '.') path of
- (l, '>':r) -> Just (l, r)
+ case T.span (\c -> isAlphaNum c || c == '.') path of
+ (l, T.uncons -> Just ('>', r)) -> Just (l, r)
_ -> Nothing
-interwikiToUrl :: String -> String -> String
-interwikiToUrl "callto" page = "callto://" ++ page
-interwikiToUrl "doku" page = "https://www.dokuwiki.org/" ++ page
-interwikiToUrl "phpfn" page = "https://secure.php.net/" ++ page
-interwikiToUrl "tel" page = "tel:" ++ page
-interwikiToUrl "wp" page = "https://en.wikipedia.org/wiki/" ++ page
-interwikiToUrl "wpde" page = "https://de.wikipedia.org/wiki/" ++ page
-interwikiToUrl "wpes" page = "https://es.wikipedia.org/wiki/" ++ page
-interwikiToUrl "wpfr" page = "https://fr.wikipedia.org/wiki/" ++ page
-interwikiToUrl "wpjp" page = "https://jp.wikipedia.org/wiki/" ++ page
-interwikiToUrl "wppl" page = "https://pl.wikipedia.org/wiki/" ++ page
-interwikiToUrl _ page = "https://www.google.com/search?q=" ++ page ++ "&btnI=lucky"
+interwikiToUrl :: Text -> Text -> Text
+interwikiToUrl "callto" page = "callto://" <> page
+interwikiToUrl "doku" page = "https://www.dokuwiki.org/" <> page
+interwikiToUrl "phpfn" page = "https://secure.php.net/" <> page
+interwikiToUrl "tel" page = "tel:" <> page
+interwikiToUrl "wp" page = "https://en.wikipedia.org/wiki/" <> page
+interwikiToUrl "wpde" page = "https://de.wikipedia.org/wiki/" <> page
+interwikiToUrl "wpes" page = "https://es.wikipedia.org/wiki/" <> page
+interwikiToUrl "wpfr" page = "https://fr.wikipedia.org/wiki/" <> page
+interwikiToUrl "wpjp" page = "https://jp.wikipedia.org/wiki/" <> page
+interwikiToUrl "wppl" page = "https://pl.wikipedia.org/wiki/" <> page
+interwikiToUrl _ page = "https://www.google.com/search?q=" <> page <> "&btnI=lucky"
linkText :: PandocMonad m => DWParser m B.Inlines
linkText = parseLink fromRaw "[[" "]]"
@@ -338,23 +339,23 @@ linkText = parseLink fromRaw "[[" "]]"
Just (_, r) -> r
-- Matches strings like "100x100" (width x height) and "50" (width)
-isWidthHeightParameter :: String -> Bool
+isWidthHeightParameter :: Text -> Bool
isWidthHeightParameter s =
- case s of
- (x:xs) ->
- isDigit x && case dropWhile isDigit xs of
- ('x':ys@(_:_)) -> all isDigit ys
- "" -> True
+ case T.uncons s of
+ Just (x, xs) ->
+ isDigit x && case T.uncons $ T.dropWhile isDigit xs of
+ Just ('x', ys) | not (T.null ys) -> T.all isDigit ys
+ Nothing -> True
_ -> False
_ -> False
-parseWidthHeight :: String -> (Maybe String, Maybe String)
+parseWidthHeight :: Text -> (Maybe Text, Maybe Text)
parseWidthHeight s = (width, height)
where
- width = Just $ takeWhile isDigit s
+ width = Just $ T.takeWhile isDigit s
height =
- case dropWhile isDigit s of
- ('x':xs) -> Just xs
+ case T.uncons $ T.dropWhile isDigit s of
+ Just ('x', xs) -> Just xs
_ -> Nothing
image :: PandocMonad m => DWParser m B.Inlines
@@ -365,17 +366,17 @@ image = try $ parseLink fromRaw "{{" "}}"
then B.link normalizedPath "" (fromMaybe defaultDescription description)
else B.imageWith ("", classes, attributes) normalizedPath "" (fromMaybe defaultDescription description)
where
- (path', parameters) = span (/= '?') $ trim path
+ (path', parameters) = T.span (/= '?') $ trim path
normalizedPath = normalizePath path'
- leftPadding = " " `isPrefixOf` path
- rightPadding = " " `isSuffixOf` path
+ leftPadding = " " `T.isPrefixOf` path
+ rightPadding = " " `T.isSuffixOf` path
classes =
case (leftPadding, rightPadding) of
(False, False) -> []
(False, True) -> ["align-left"]
(True, False) -> ["align-right"]
(True, True) -> ["align-center"]
- parameterList = splitOn "&" $ drop 1 parameters
+ parameterList = T.splitOn "&" $ T.drop 1 parameters
linkOnly = "linkonly" `elem` parameterList
(width, height) = maybe (Nothing, Nothing) parseWidthHeight (F.find isWidthHeightParameter parameterList)
attributes = catMaybes [fmap ("width",) width, fmap ("height",) height]
@@ -389,7 +390,7 @@ block = do
<|> blockElements
<|> para
skipMany blankline
- trace (take 60 $ show $ B.toList res)
+ trace (T.take 60 $ tshow $ B.toList res)
return res
blockElements :: PandocMonad m => DWParser m B.Blocks
@@ -417,30 +418,30 @@ header = try $ do
attr <- registerHeader nullAttr contents
return $ B.headerWith attr (7 - lev) contents
-list :: PandocMonad m => String -> DWParser m B.Blocks
+list :: PandocMonad m => Text -> DWParser m B.Blocks
list prefix = bulletList prefix <|> orderedList prefix
-bulletList :: PandocMonad m => String -> DWParser m B.Blocks
+bulletList :: PandocMonad m => Text -> DWParser m B.Blocks
bulletList prefix = try $ B.bulletList <$> parseList prefix '*'
-orderedList :: PandocMonad m => String -> DWParser m B.Blocks
+orderedList :: PandocMonad m => Text -> DWParser m B.Blocks
orderedList prefix = try $ B.orderedList <$> parseList prefix '-'
parseList :: PandocMonad m
- => String
+ => Text
-> Char
-> DWParser m [B.Blocks]
parseList prefix marker =
many1 ((<>) <$> item <*> fmap mconcat (many continuation))
where
- continuation = try $ list (" " ++ prefix)
- item = try $ string prefix *> char marker *> char ' ' *> itemContents
+ continuation = try $ list (" " <> prefix)
+ item = try $ textStr prefix *> char marker *> char ' ' *> itemContents
itemContents = B.plain . mconcat <$> many1Till inline' eol
indentedCode :: PandocMonad m => DWParser m B.Blocks
-indentedCode = try $ B.codeBlock . unlines <$> many1 indentedLine
+indentedCode = try $ B.codeBlock . T.unlines <$> many1 indentedLine
where
- indentedLine = try $ string " " *> manyTill anyChar eol
+ indentedLine = try $ string " " *> manyTillChar anyChar eol
quote :: PandocMonad m => DWParser m B.Blocks
quote = try $ nestedQuote 0
@@ -456,13 +457,13 @@ blockHtml :: PandocMonad m => DWParser m B.Blocks
blockHtml = try $ B.rawBlock "html"
<$ string "<HTML>"
<* optional (manyTill spaceChar eol)
- <*> manyTill anyChar (try $ string "</HTML>")
+ <*> manyTillChar anyChar (try $ string "</HTML>")
blockPhp :: PandocMonad m => DWParser m B.Blocks
blockPhp = try $ B.codeBlockWith ("", ["php"], [])
<$ string "<PHP>"
<* optional (manyTill spaceChar eol)
- <*> manyTill anyChar (try $ string "</PHP>")
+ <*> manyTillChar anyChar (try $ string "</PHP>")
table :: PandocMonad m => DWParser m B.Blocks
table = do
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index 8e9746090..93ddeb9ee 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -1,6 +1,7 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.EPUB
Copyright : Copyright (C) 2014-2019 Matthew Pickering
@@ -24,7 +25,8 @@ import Control.DeepSeq (NFData, deepseq)
import Control.Monad (guard, liftM, liftM2, mplus)
import Control.Monad.Except (throwError)
import qualified Data.ByteString.Lazy as BL (ByteString)
-import Data.List (isInfixOf, isPrefixOf)
+import Data.List (isInfixOf)
+import qualified Data.Text as T
import qualified Data.Map as M (Map, elems, fromList, lookup)
import Data.Maybe (fromMaybe, mapMaybe)
import qualified Data.Text.Lazy as TL
@@ -67,9 +69,9 @@ archiveToEPUB os archive = do
-- No need to collapse here as the image path is from the manifest file
let coverDoc = fromMaybe mempty (imageToPandoc <$> cover)
spine <- parseSpine items content
- let escapedSpine = map (escapeURI . takeFileName . fst) spine
+ let escapedSpine = map (escapeURI . T.pack . takeFileName . fst) spine
Pandoc _ bs <-
- foldM' (\a b -> ((a <>) . walk (prependHash escapedSpine))
+ foldM' (\a b -> ((a <>) . walk (prependHash $ escapedSpine))
`liftM` parseSpineElem root b) mempty spine
let ast = coverDoc <> Pandoc meta bs
fetchImages (M.elems items) root archive ast
@@ -79,7 +81,7 @@ archiveToEPUB os archive = do
parseSpineElem :: PandocMonad m => FilePath -> (FilePath, MimeType) -> m Pandoc
parseSpineElem (normalise -> r) (normalise -> path, mime) = do
doc <- mimeToReader mime r path
- let docSpan = B.doc $ B.para $ B.spanWith (takeFileName path, [], []) mempty
+ let docSpan = B.doc $ B.para $ B.spanWith (T.pack $ takeFileName path, [], []) mempty
return $ docSpan <> doc
mimeToReader :: PandocMonad m => MimeType -> FilePath -> FilePath -> m Pandoc
mimeToReader "application/xhtml+xml" (unEscapeString -> root)
@@ -108,18 +110,19 @@ fetchImages mimes root arc (query iq -> links) =
<$> findEntryByPath abslink arc
iq :: Inline -> [FilePath]
-iq (Image _ _ (url, _)) = [url]
+iq (Image _ _ (url, _)) = [T.unpack url]
iq _ = []
-- Remove relative paths
renameImages :: FilePath -> Inline -> Inline
renameImages root img@(Image attr a (url, b))
- | "data:" `isPrefixOf` url = img
- | otherwise = Image attr a (collapseFilePath (root </> url), b)
+ | "data:" `T.isPrefixOf` url = img
+ | otherwise = Image attr a ( T.pack $ collapseFilePath (root </> T.unpack url)
+ , b)
renameImages _ x = x
imageToPandoc :: FilePath -> Pandoc
-imageToPandoc s = B.doc . B.para $ B.image s "" mempty
+imageToPandoc s = B.doc . B.para $ B.image (T.pack s) "" mempty
imageMimes :: [MimeType]
imageMimes = ["image/gif", "image/jpeg", "image/png"]
@@ -144,7 +147,7 @@ parseManifest content coverId = do
uid <- findAttrE (emptyName "id") e
href <- findAttrE (emptyName "href") e
mime <- findAttrE (emptyName "media-type") e
- return (uid, (href, mime))
+ return (uid, (href, T.pack mime))
parseSpine :: PandocMonad m => Items -> Element -> m [(FilePath, MimeType)]
parseSpine is e = do
@@ -172,11 +175,11 @@ parseMeta content = do
-- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem
parseMetaItem :: Element -> Meta -> Meta
parseMetaItem e@(stripNamespace . elName -> field) meta =
- addMetaField (renameMeta field) (B.str $ strContent e) meta
+ addMetaField (renameMeta field) (B.str $ T.pack $ strContent e) meta
-renameMeta :: String -> String
+renameMeta :: String -> T.Text
renameMeta "creator" = "author"
-renameMeta s = s
+renameMeta s = T.pack s
getManifest :: PandocMonad m => Archive -> m (String, Element)
getManifest archive = do
@@ -197,26 +200,26 @@ getManifest archive = do
fixInternalReferences :: FilePath -> Pandoc -> Pandoc
fixInternalReferences pathToFile =
walk (renameImages root)
- . walk (fixBlockIRs filename)
+ . walk (fixBlockIRs filename)
. walk (fixInlineIRs filename)
where
- (root, escapeURI -> filename) = splitFileName pathToFile
+ (root, T.unpack . escapeURI . T.pack -> filename) = splitFileName pathToFile
fixInlineIRs :: String -> Inline -> Inline
fixInlineIRs s (Span as v) =
Span (fixAttrs s as) v
fixInlineIRs s (Code as code) =
Code (fixAttrs s as) code
-fixInlineIRs s (Link as is ('#':url, tit)) =
+fixInlineIRs s (Link as is (T.uncons -> Just ('#', url), tit)) =
Link (fixAttrs s as) is (addHash s url, tit)
fixInlineIRs s (Link as is t) =
Link (fixAttrs s as) is t
fixInlineIRs _ v = v
-prependHash :: [String] -> Inline -> Inline
+prependHash :: [T.Text] -> Inline -> Inline
prependHash ps l@(Link attr is (url, tit))
- | or [s `isPrefixOf` url | s <- ps] =
- Link attr is ('#':url, tit)
+ | or [s `T.isPrefixOf` url | s <- ps] =
+ Link attr is ("#" <> url, tit)
| otherwise = l
prependHash _ i = i
@@ -230,17 +233,17 @@ fixBlockIRs s (CodeBlock as code) =
fixBlockIRs _ b = b
fixAttrs :: FilePath -> B.Attr -> B.Attr
-fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . null) cs, removeEPUBAttrs kvs)
+fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . T.null) cs, removeEPUBAttrs kvs)
-addHash :: String -> String -> String
+addHash :: String -> T.Text -> T.Text
addHash _ "" = ""
-addHash s ident = takeFileName s ++ "#" ++ ident
+addHash s ident = T.pack (takeFileName s) <> "#" <> ident
-removeEPUBAttrs :: [(String, String)] -> [(String, String)]
+removeEPUBAttrs :: [(T.Text, T.Text)] -> [(T.Text, T.Text)]
removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs
-isEPUBAttr :: (String, String) -> Bool
-isEPUBAttr (k, _) = "epub:" `isPrefixOf` k
+isEPUBAttr :: (T.Text, a) -> Bool
+isEPUBAttr (k, _) = "epub:" `T.isPrefixOf` k
-- Library
@@ -291,4 +294,4 @@ findElementE :: PandocMonad m => QName -> Element -> m Element
findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x
mkE :: PandocMonad m => String -> Maybe a -> m a
-mkE s = maybe (throwError . PandocParseError $ s) return
+mkE s = maybe (throwError . PandocParseError $ T.pack $ s) return
diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs
index 0b25b9fed..6eed3c104 100644
--- a/src/Text/Pandoc/Readers/FB2.hs
+++ b/src/Text/Pandoc/Readers/FB2.hs
@@ -1,5 +1,6 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.FB2
Copyright : Copyright (C) 2018-2019 Alexander Krotov
@@ -27,12 +28,11 @@ import Control.Monad.Except (throwError)
import Control.Monad.State.Strict
import Data.ByteString.Lazy.Char8 ( pack )
import Data.ByteString.Base64.Lazy
-import Data.Char (isSpace, toUpper)
import Data.Functor
-import Data.List (dropWhileEnd, intersperse)
-import Data.List.Split (splitOn)
+import Data.List (intersperse)
import qualified Data.Map as M
import Data.Text (Text)
+import qualified Data.Text as T
import Data.Default
import Data.Maybe
import Text.HTML.TagSoup.Entity (lookupEntity)
@@ -48,8 +48,8 @@ type FB2 m = StateT FB2State m
data FB2State = FB2State{ fb2SectionLevel :: Int
, fb2Meta :: Meta
- , fb2Authors :: [String]
- , fb2Notes :: M.Map String Blocks
+ , fb2Authors :: [Text]
+ , fb2Notes :: M.Map Text Blocks
} deriving Show
instance Default FB2State where
@@ -76,19 +76,20 @@ readFB2 _ inp =
-- * Utility functions
-trim :: String -> String
-trim = dropWhileEnd isSpace . dropWhile isSpace
+trim :: Text -> Text
+trim = T.strip
-removeHash :: String -> String
-removeHash ('#':xs) = xs
-removeHash xs = xs
+removeHash :: Text -> Text
+removeHash t = case T.uncons t of
+ Just ('#', xs) -> xs
+ _ -> t
-convertEntity :: String -> String
-convertEntity e = fromMaybe (map toUpper e) (lookupEntity e)
+convertEntity :: String -> Text
+convertEntity e = maybe (T.toUpper $ T.pack e) T.pack $ lookupEntity e
parseInline :: PandocMonad m => Content -> FB2 m Inlines
parseInline (Elem e) =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"strong" -> strong <$> parseStyleType e
"emphasis" -> emph <$> parseStyleType e
"style" -> parseNamedStyle e
@@ -96,12 +97,12 @@ parseInline (Elem e) =
"strikethrough" -> strikeout <$> parseStyleType e
"sub" -> subscript <$> parseStyleType e
"sup" -> superscript <$> parseStyleType e
- "code" -> pure $ code $ strContent e
+ "code" -> pure $ code $ T.pack $ strContent e
"image" -> parseInlineImageElement e
name -> do
report $ IgnoredElement name
pure mempty
-parseInline (Text x) = pure $ text $ cdData x
+parseInline (Text x) = pure $ text $ T.pack $ cdData x
parseInline (CRef r) = pure $ str $ convertEntity r
parseSubtitle :: PandocMonad m => Element -> FB2 m Blocks
@@ -111,7 +112,7 @@ parseSubtitle e = headerWith ("", ["unnumbered"], []) <$> gets fb2SectionLevel <
parseRootElement :: PandocMonad m => Element -> FB2 m Blocks
parseRootElement e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"FictionBook" -> do
-- Parse notes before parsing the rest of the content.
case filterChild isNotesBody e of
@@ -144,7 +145,7 @@ parseNote e =
Just sectionId -> do
content <- mconcat <$> mapM parseSectionChild (dropTitle $ elChildren e)
oldNotes <- gets fb2Notes
- modify $ \s -> s { fb2Notes = M.insert ("#" ++ sectionId) content oldNotes }
+ modify $ \s -> s { fb2Notes = M.insert ("#" <> T.pack sectionId) content oldNotes }
pure ()
where
isTitle x = qName (elName x) == "title"
@@ -156,7 +157,7 @@ parseNote e =
-- | Parse a child of @\<FictionBook>@ element.
parseFictionBookChild :: PandocMonad m => Element -> FB2 m Blocks
parseFictionBookChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"stylesheet" -> pure mempty -- stylesheet is ignored
"description" -> mempty <$ mapM_ parseDescriptionChild (elChildren e)
"body" -> if isNotesBody e
@@ -168,7 +169,7 @@ parseFictionBookChild e =
-- | Parse a child of @\<description>@ element.
parseDescriptionChild :: PandocMonad m => Element -> FB2 m ()
parseDescriptionChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"title-info" -> mapM_ parseTitleInfoChild (elChildren e)
"src-title-info" -> pure () -- ignore
"document-info" -> pure ()
@@ -176,13 +177,13 @@ parseDescriptionChild e =
"custom-info" -> pure ()
"output" -> pure ()
name -> do
- report $ IgnoredElement $ name ++ " in description"
+ report $ IgnoredElement $ name <> " in description"
pure mempty
-- | Parse a child of @\<body>@ element.
parseBodyChild :: PandocMonad m => Element -> FB2 m Blocks
parseBodyChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"image" -> parseImageElement e
"title" -> header <$> gets fb2SectionLevel <*> parseTitleType (elContent e)
"epigraph" -> parseEpigraph e
@@ -196,25 +197,25 @@ parseBinaryElement e =
(Nothing, _) -> report $ IgnoredElement "binary without id attribute"
(Just _, Nothing) ->
report $ IgnoredElement "binary without content-type attribute"
- (Just filename, contentType) -> insertMedia filename contentType (decodeLenient (pack (strContent e)))
+ (Just filename, contentType) -> insertMedia filename (T.pack <$> contentType) (decodeLenient (pack (strContent e)))
-- * Type parsers
-- | Parse @authorType@
-parseAuthor :: PandocMonad m => Element -> FB2 m String
-parseAuthor e = unwords . catMaybes <$> mapM parseAuthorChild (elChildren e)
+parseAuthor :: PandocMonad m => Element -> FB2 m Text
+parseAuthor e = T.unwords . catMaybes <$> mapM parseAuthorChild (elChildren e)
-parseAuthorChild :: PandocMonad m => Element -> FB2 m (Maybe String)
+parseAuthorChild :: PandocMonad m => Element -> FB2 m (Maybe Text)
parseAuthorChild e =
- case qName $ elName e of
- "first-name" -> pure $ Just $ strContent e
- "middle-name" -> pure $ Just $ strContent e
- "last-name" -> pure $ Just $ strContent e
- "nickname" -> pure $ Just $ strContent e
- "home-page" -> pure $ Just $ strContent e
- "email" -> pure $ Just $ strContent e
+ case T.pack $ qName $ elName e of
+ "first-name" -> pure $ Just $ T.pack $ strContent e
+ "middle-name" -> pure $ Just $ T.pack $ strContent e
+ "last-name" -> pure $ Just $ T.pack $ strContent e
+ "nickname" -> pure $ Just $ T.pack $ strContent e
+ "home-page" -> pure $ Just $ T.pack $ strContent e
+ "email" -> pure $ Just $ T.pack $ strContent e
name -> do
- report $ IgnoredElement $ name ++ " in author"
+ report $ IgnoredElement $ name <> " in author"
pure Nothing
-- | Parse @titleType@
@@ -236,13 +237,13 @@ parseTitleContent _ = pure Nothing
parseImageElement :: PandocMonad m => Element -> FB2 m Blocks
parseImageElement e =
case href of
- Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash src) title alt
+ Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash $ T.pack src) title alt
Nothing -> do
report $ IgnoredElement " image without href"
pure mempty
- where alt = maybe mempty str $ findAttr (unqual "alt") e
- title = fromMaybe "" $ findAttr (unqual "title") e
- imgId = fromMaybe "" $ findAttr (unqual "id") e
+ where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e
+ title = maybe "" T.pack $ findAttr (unqual "title") e
+ imgId = maybe "" T.pack $ findAttr (unqual "id") e
href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
-- | Parse @pType@
@@ -256,7 +257,7 @@ parseCite e = blockQuote . mconcat <$> mapM parseCiteChild (elChildren e)
-- | Parse @citeType@ child
parseCiteChild :: PandocMonad m => Element -> FB2 m Blocks
parseCiteChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"p" -> para <$> parsePType e
"poem" -> parsePoem e
"empty-line" -> pure horizontalRule
@@ -271,13 +272,13 @@ parsePoem e = mconcat <$> mapM parsePoemChild (elChildren e)
parsePoemChild :: PandocMonad m => Element -> FB2 m Blocks
parsePoemChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"title" -> parseTitle e
"subtitle" -> parseSubtitle e
"epigraph" -> parseEpigraph e
"stanza" -> parseStanza e
"text-author" -> para <$> parsePType e
- "date" -> pure $ para $ text $ strContent e
+ "date" -> pure $ para $ text $ T.pack $ strContent e
name -> report (UnexpectedXmlElement name "poem") $> mempty
parseStanza :: PandocMonad m => Element -> FB2 m Blocks
@@ -290,7 +291,7 @@ joinLineBlocks [] = []
parseStanzaChild :: PandocMonad m => Element -> FB2 m Blocks
parseStanzaChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"title" -> parseTitle e
"subtitle" -> parseSubtitle e
"v" -> lineBlock . (:[]) <$> parsePType e
@@ -300,11 +301,11 @@ parseStanzaChild e =
parseEpigraph :: PandocMonad m => Element -> FB2 m Blocks
parseEpigraph e =
divWith (divId, ["epigraph"], []) . mconcat <$> mapM parseEpigraphChild (elChildren e)
- where divId = fromMaybe "" $ findAttr (unqual "id") e
+ where divId = maybe "" T.pack $ findAttr (unqual "id") e
parseEpigraphChild :: PandocMonad m => Element -> FB2 m Blocks
parseEpigraphChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"p" -> para <$> parsePType e
"poem" -> parsePoem e
"cite" -> parseCite e
@@ -318,7 +319,7 @@ parseAnnotation e = mconcat <$> mapM parseAnnotationChild (elChildren e)
parseAnnotationChild :: PandocMonad m => Element -> FB2 m Blocks
parseAnnotationChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"p" -> para <$> parsePType e
"poem" -> parsePoem e
"cite" -> parseCite e
@@ -332,14 +333,14 @@ parseSection :: PandocMonad m => Element -> FB2 m Blocks
parseSection e = do
n <- gets fb2SectionLevel
modify $ \st -> st{ fb2SectionLevel = n + 1 }
- let sectionId = fromMaybe "" $ findAttr (unqual "id") e
+ let sectionId = maybe "" T.pack $ findAttr (unqual "id") e
bs <- divWith (sectionId, ["section"], []) . mconcat <$> mapM parseSectionChild (elChildren e)
modify $ \st -> st{ fb2SectionLevel = n }
pure bs
parseSectionChild :: PandocMonad m => Element -> FB2 m Blocks
parseSectionChild e =
- case qName $ elName e of
+ case T.pack $ qName $ elName e of
"title" -> parseBodyChild e
"epigraph" -> parseEpigraph e
"image" -> parseImageElement e
@@ -361,16 +362,16 @@ parseStyleType e = mconcat <$> mapM parseInline (elContent e)
parseNamedStyle :: PandocMonad m => Element -> FB2 m Inlines
parseNamedStyle e = do
content <- mconcat <$> mapM parseNamedStyleChild (elContent e)
- let lang = maybeToList $ ("lang",) <$> findAttr (QName "lang" Nothing (Just "xml")) e
+ let lang = maybeToList $ ("lang",) . T.pack <$> findAttr (QName "lang" Nothing (Just "xml")) e
case findAttr (unqual "name") e of
- Just name -> pure $ spanWith ("", [name], lang) content
+ Just name -> pure $ spanWith ("", [T.pack name], lang) content
Nothing -> do
report $ IgnoredElement "link without required name"
pure mempty
parseNamedStyleChild :: PandocMonad m => Content -> FB2 m Inlines
parseNamedStyleChild (Elem e) =
- case qName (elName e) of
+ case T.pack $ qName (elName e) of
"strong" -> strong <$> parseStyleType e
"emphasis" -> emph <$> parseStyleType e
"style" -> parseNamedStyle e
@@ -378,10 +379,10 @@ parseNamedStyleChild (Elem e) =
"strikethrough" -> strikeout <$> parseStyleType e
"sub" -> subscript <$> parseStyleType e
"sup" -> superscript <$> parseStyleType e
- "code" -> pure $ code $ strContent e
+ "code" -> pure $ code $ T.pack $ strContent e
"image" -> parseInlineImageElement e
name -> do
- report $ IgnoredElement $ name ++ " in style"
+ report $ IgnoredElement $ name <> " in style"
pure mempty
parseNamedStyleChild x = parseInline x
@@ -390,7 +391,7 @@ parseLinkType :: PandocMonad m => Element -> FB2 m Inlines
parseLinkType e = do
content <- mconcat <$> mapM parseStyleLinkType (elContent e)
notes <- gets fb2Notes
- case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
+ case T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
Just href -> case findAttr (QName "type" Nothing Nothing) e of
Just "note" -> case M.lookup href notes of
Nothing -> pure $ link href "" content
@@ -417,19 +418,21 @@ parseTable _ = pure mempty -- TODO: tables are not supported yet
-- | Parse @title-infoType@
parseTitleInfoChild :: PandocMonad m => Element -> FB2 m ()
parseTitleInfoChild e =
- case qName (elName e) of
+ case T.pack $ qName (elName e) of
"genre" -> pure ()
"author" -> parseAuthor e >>= \author -> modify (\st -> st {fb2Authors = author:fb2Authors st})
- "book-title" -> modify (setMeta "title" (text $ strContent e))
+ "book-title" -> modify (setMeta "title" (text $ T.pack $ strContent e))
"annotation" -> parseAnnotation e >>= modify . setMeta "abstract"
- "keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ splitOn "," $ strContent e))
- "date" -> modify (setMeta "date" (text $ strContent e))
+ "keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ T.splitOn ","
+ $ T.pack
+ $ strContent e))
+ "date" -> modify (setMeta "date" (text $ T.pack $ strContent e))
"coverpage" -> parseCoverPage e
"lang" -> pure ()
"src-lang" -> pure ()
"translator" -> pure ()
"sequence" -> pure ()
- name -> report $ IgnoredElement $ name ++ " in title-info"
+ name -> report $ IgnoredElement $ name <> " in title-info"
parseCoverPage :: PandocMonad m => Element -> FB2 m ()
parseCoverPage e =
@@ -437,7 +440,7 @@ parseCoverPage e =
Just img -> case href of
Just src -> modify (setMeta "cover-image" (MetaString $ removeHash src))
Nothing -> pure ()
- where href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img
+ where href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img
Nothing -> pure ()
-- | Parse @inlineImageType@ element
@@ -450,5 +453,5 @@ parseInlineImageElement e =
Nothing -> do
report $ IgnoredElement "inline image without href"
pure mempty
- where alt = maybe mempty str $ findAttr (unqual "alt") e
- href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
+ where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e
+ href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index e03ac6a97..1c2892d6a 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -1,5 +1,5 @@
-{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
@@ -35,8 +35,7 @@ import Control.Monad.Reader (ReaderT, ask, asks, lift, local, runReaderT)
import Data.Char (isAlphaNum, isLetter)
import Data.Default (Default (..), def)
import Data.Foldable (for_)
-import Data.List (isPrefixOf)
-import Data.List.Split (wordsBy, splitWhen)
+import Data.List.Split (splitWhen)
import qualified Data.Map as M
import Data.Maybe (fromMaybe, isJust, isNothing)
import Data.Monoid (First (..))
@@ -62,8 +61,8 @@ import Text.Pandoc.Options (
extensionEnabled)
import Text.Pandoc.Parsing hiding ((<|>))
import Text.Pandoc.Shared (addMetaField, blocksToInlines', crFilter, escapeURI,
- extractSpaces, htmlSpanLikeElements,
- onlySimpleTableCells, safeRead, underlineSpan)
+ extractSpaces, htmlSpanLikeElements, elemText, splitTextBy,
+ onlySimpleTableCells, safeRead, underlineSpan, tshow)
import Text.Pandoc.Walk
import Text.Parsec.Error
import Text.TeXMath (readMathML, writeTeX)
@@ -93,14 +92,14 @@ readHtml opts inp = do
"source" tags
case result of
Right doc -> return doc
- Left err -> throwError $ PandocParseError $ getError err
+ Left err -> throwError $ PandocParseError $ T.pack $ getError err
replaceNotes :: PandocMonad m => [Block] -> TagParser m [Block]
replaceNotes bs = do
st <- getState
return $ walk (replaceNotes' (noteTable st)) bs
-replaceNotes' :: [(String, Blocks)] -> Inline -> Inline
+replaceNotes' :: [(Text, Blocks)] -> Inline -> Inline
replaceNotes' noteTbl (RawInline (Format "noteref") ref) =
maybe (Str "") (Note . B.toList) $ lookup ref noteTbl
replaceNotes' _ x = x
@@ -108,9 +107,9 @@ replaceNotes' _ x = x
data HTMLState =
HTMLState
{ parserState :: ParserState,
- noteTable :: [(String, Blocks)],
+ noteTable :: [(Text, Blocks)],
baseHref :: Maybe URI,
- identifiers :: Set.Set String,
+ identifiers :: Set.Set Text,
logMessages :: [LogMessage],
macros :: M.Map Text Macro
}
@@ -134,7 +133,7 @@ pHtml :: PandocMonad m => TagParser m Blocks
pHtml = try $ do
(TagOpen "html" attr) <- lookAhead pAny
for_ (lookup "lang" attr) $
- updateState . B.setMeta "lang" . B.text . T.unpack
+ updateState . B.setMeta "lang" . B.text
pInTags "html" block
pBody :: PandocMonad m => TagParser m Blocks
@@ -146,11 +145,11 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAny)
setTitle t = mempty <$ updateState (B.setMeta "title" t)
pMetaTag = do
mt <- pSatisfy (matchTagOpen "meta" [])
- let name = T.unpack $ fromAttrib "name" mt
- if null name
+ let name = fromAttrib "name" mt
+ if T.null name
then return mempty
else do
- let content = T.unpack $ fromAttrib "content" mt
+ let content = fromAttrib "content" mt
updateState $ \s ->
let ps = parserState s in
s{ parserState = ps{
@@ -187,13 +186,13 @@ block = do
, pFigure
, pRawHtmlBlock
]
- trace (take 60 $ show $ B.toList res)
+ trace (T.take 60 $ tshow $ B.toList res)
return res
-namespaces :: PandocMonad m => [(String, TagParser m Inlines)]
+namespaces :: PandocMonad m => [(Text, TagParser m Inlines)]
namespaces = [(mathMLNamespace, pMath True)]
-mathMLNamespace :: String
+mathMLNamespace :: Text
mathMLNamespace = "http://www.w3.org/1998/Math/MathML"
eSwitch :: (PandocMonad m, Monoid a)
@@ -233,7 +232,7 @@ eFootnote = try $ do
content <- pInTags tag block
addNote ident content
-addNote :: PandocMonad m => String -> Blocks -> TagParser m ()
+addNote :: PandocMonad m => Text -> Blocks -> TagParser m ()
addNote uid cont = updateState (\s -> s {noteTable = (uid, cont) : noteTable s})
eNoteref :: PandocMonad m => TagParser m Inlines
@@ -245,8 +244,8 @@ eNoteref = try $ do
-> (lookup "type" as <|> lookup "epub:type" as)
== Just "noteref"
_ -> False)
- ident <- case T.unpack <$> lookup "href" attr of
- Just ('#':rest) -> return rest
+ ident <- case lookup "href" attr >>= T.uncons of
+ Just ('#', rest) -> return rest
_ -> mzero
_ <- manyTill pAny (pSatisfy (\case
TagClose t -> t == tag
@@ -287,7 +286,7 @@ pListItem nonItem = do
maybe id addId (lookup "id" attr) <$>
pInTags "li" block <* skipMany nonItem
-parseListStyleType :: String -> ListNumberStyle
+parseListStyleType :: Text -> ListNumberStyle
parseListStyleType "lower-roman" = LowerRoman
parseListStyleType "upper-roman" = UpperRoman
parseListStyleType "lower-alpha" = LowerAlpha
@@ -295,7 +294,7 @@ parseListStyleType "upper-alpha" = UpperAlpha
parseListStyleType "decimal" = Decimal
parseListStyleType _ = DefaultStyle
-parseTypeAttr :: String -> ListNumberStyle
+parseTypeAttr :: Text -> ListNumberStyle
parseTypeAttr "i" = LowerRoman
parseTypeAttr "I" = UpperRoman
parseTypeAttr "a" = LowerAlpha
@@ -404,20 +403,19 @@ pDiv = try $ do
pRawHtmlBlock :: PandocMonad m => TagParser m Blocks
pRawHtmlBlock = do
- raw <- T.unpack <$>
- (pHtmlBlock "script" <|> pHtmlBlock "style" <|> pHtmlBlock "textarea"
- <|> pRawTag)
+ raw <- (pHtmlBlock "script" <|> pHtmlBlock "style" <|> pHtmlBlock "textarea"
+ <|> pRawTag)
exts <- getOption readerExtensions
- if extensionEnabled Ext_raw_html exts && not (null raw)
+ if extensionEnabled Ext_raw_html exts && not (T.null raw)
then return $ B.rawBlock "html" raw
else ignore raw
-ignore :: (Monoid a, PandocMonad m) => String -> TagParser m a
+ignore :: (Monoid a, PandocMonad m) => Text -> TagParser m a
ignore raw = do
pos <- getPosition
-- raw can be null for tags like <!DOCTYPE>; see paRawTag
-- in this case we don't want a warning:
- unless (null raw) $
+ unless (T.null raw) $
logMessage $ SkippedContent raw pos
return mempty
@@ -438,7 +436,7 @@ eSection = try $ do
headerLevel :: Text -> TagParser m Int
headerLevel tagtype =
- case safeRead (T.unpack (T.drop 1 tagtype)) of
+ case safeRead (T.drop 1 tagtype) of
Just level ->
-- try (do
-- guardEnabled Ext_epub_html_exts
@@ -468,7 +466,7 @@ pHeader = try $ do
level <- headerLevel tagtype
contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof)
let ident = fromMaybe "" $ lookup "id" attr
- let classes = maybe [] words $ lookup "class" attr
+ let classes = maybe [] T.words $ lookup "class" attr
let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"]
attr'' <- registerHeader (ident, classes, keyvals) contents
return $ if bodyTitle
@@ -529,14 +527,14 @@ pCol = try $ do
optional $ pSatisfy (matchTagClose "col")
skipMany pBlank
let width = case lookup "width" attribs of
- Nothing -> case lookup "style" attribs of
- Just ('w':'i':'d':'t':'h':':':xs) | '%' `elem` xs ->
- fromMaybe 0.0 $ safeRead (filter
- (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
- _ -> 0.0
- Just x | not (null x) && last x == '%' ->
- fromMaybe 0.0 $ safeRead (init x)
- _ -> 0.0
+ Nothing -> case lookup "style" attribs of
+ Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs ->
+ fromMaybe 0.0 $ safeRead (T.filter
+ (`notElem` (" \t\r\n%'\";" :: [Char])) xs)
+ _ -> 0.0
+ Just (T.unsnoc -> Just (xs, '%')) ->
+ fromMaybe 0.0 $ safeRead xs
+ _ -> 0.0
if width > 0.0
then return $ width / 100.0
else return 0.0
@@ -562,7 +560,7 @@ pCell celltype = try $ do
let extractAlign' [] = ""
extractAlign' ("text-align":x:_) = x
extractAlign' (_:xs) = extractAlign' xs
- let extractAlign = extractAlign' . wordsBy (`elem` [' ','\t',';',':'])
+ let extractAlign = extractAlign' . splitTextBy (`elemText` " \t;:")
let align = case maybeFromAttrib "align" tag `mplus`
(extractAlign <$> maybeFromAttrib "style" tag) of
Just "left" -> AlignLeft
@@ -610,7 +608,7 @@ pFigure = try $ do
let caption = fromMaybe mempty mbcap
case B.toList <$> mbimg of
Just [Image attr _ (url, tit)] ->
- return $ B.para $ B.imageWith attr url ("fig:" ++ tit) caption
+ return $ B.para $ B.imageWith attr url ("fig:" <> tit) caption
_ -> mzero
pCodeBlock :: PandocMonad m => TagParser m Blocks
@@ -618,21 +616,21 @@ pCodeBlock = try $ do
TagOpen _ attr' <- pSatisfy (matchTagOpen "pre" [])
let attr = toStringAttr attr'
contents <- manyTill pAny (pCloses "pre" <|> eof)
- let rawText = concatMap tagToString contents
+ let rawText = T.concat $ map tagToText contents
-- drop leading newline if any
- let result' = case rawText of
- '\n':xs -> xs
- _ -> rawText
+ let result' = case T.uncons rawText of
+ Just ('\n', xs) -> xs
+ _ -> rawText
-- drop trailing newline if any
- let result = case reverse result' of
- '\n':_ -> init result'
- _ -> result'
+ let result = case T.unsnoc result' of
+ Just (result'', '\n') -> result''
+ _ -> result'
return $ B.codeBlockWith (mkAttr attr) result
-tagToString :: Tag Text -> String
-tagToString (TagText s) = T.unpack s
-tagToString (TagOpen "br" _) = "\n"
-tagToString _ = ""
+tagToText :: Tag Text -> Text
+tagToText (TagText s) = s
+tagToText (TagOpen "br" _) = "\n"
+tagToText _ = ""
inline :: PandocMonad m => TagParser m Inlines
inline = choice
@@ -667,7 +665,7 @@ pLocation = do
pSat :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text)
pSat f = do
pos <- getPosition
- token show (const pos) (\x -> if f x then Just x else Nothing)
+ token tshow (const pos) (\x -> if f x then Just x else Nothing)
pSatisfy :: PandocMonad m => (Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy f = try $ optional pLocation >> pSat f
@@ -688,10 +686,10 @@ pQ = choice $ map try [citedQuote, normalQuote]
where citedQuote = do
tag <- pSatisfy $ tagOpenLit "q" (any ((=="cite") . fst))
- url <- canonicalizeUrl $ T.unpack $ fromAttrib "cite" tag
- let uid = fromMaybe (T.unpack $ fromAttrib "name" tag) $
+ url <- canonicalizeUrl $ fromAttrib "cite" tag
+ let uid = fromMaybe (fromAttrib "name" tag) $
maybeFromAttrib "id" tag
- let cls = words $ T.unpack $ fromAttrib "class" tag
+ let cls = T.words $ fromAttrib "class" tag
makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url)])
normalQuote = do
@@ -729,7 +727,7 @@ pSpanLike =
TagOpen _ attrs <- pSatisfy $ tagOpenLit tagName (const True)
let (ids, cs, kvs) = mkAttr . toStringAttr $ attrs
content <- mconcat <$> manyTill inline (pCloses tagName <|> eof)
- return $ B.spanWith (ids, T.unpack tagName : cs, kvs) content
+ return $ B.spanWith (ids, tagName : cs, kvs) content
pSmall :: PandocMonad m => TagParser m Inlines
pSmall = pInlinesInTags "small" (B.spanWith ("",["small"],[]))
@@ -753,19 +751,18 @@ pLineBreak = do
-- Unlike fromAttrib from tagsoup, this distinguishes
-- between a missing attribute and an attribute with empty content.
-maybeFromAttrib :: String -> Tag Text -> Maybe String
-maybeFromAttrib name (TagOpen _ attrs) =
- T.unpack <$> lookup (T.pack name) attrs
+maybeFromAttrib :: Text -> Tag Text -> Maybe Text
+maybeFromAttrib name (TagOpen _ attrs) = lookup name attrs
maybeFromAttrib _ _ = Nothing
pLink :: PandocMonad m => TagParser m Inlines
pLink = try $ do
tag <- pSatisfy $ tagOpenLit "a" (const True)
- let title = T.unpack $ fromAttrib "title" tag
+ let title = fromAttrib "title" tag
-- take id from id attribute if present, otherwise name
- let uid = fromMaybe (T.unpack $ fromAttrib "name" tag) $
+ let uid = fromMaybe (fromAttrib "name" tag) $
maybeFromAttrib "id" tag
- let cls = words $ T.unpack $ fromAttrib "class" tag
+ let cls = T.words $ fromAttrib "class" tag
lab <- mconcat <$> manyTill inline (pCloses "a")
-- check for href; if href, then a link, otherwise a span
case maybeFromAttrib "href" tag of
@@ -778,34 +775,33 @@ pLink = try $ do
pImage :: PandocMonad m => TagParser m Inlines
pImage = do
tag <- pSelfClosing (=="img") (isJust . lookup "src")
- url <- canonicalizeUrl $ T.unpack $ fromAttrib "src" tag
- let title = T.unpack $ fromAttrib "title" tag
- let alt = T.unpack $ fromAttrib "alt" tag
- let uid = T.unpack $ fromAttrib "id" tag
- let cls = words $ T.unpack $ fromAttrib "class" tag
+ url <- canonicalizeUrl $ fromAttrib "src" tag
+ let title = fromAttrib "title" tag
+ let alt = fromAttrib "alt" tag
+ let uid = fromAttrib "id" tag
+ let cls = T.words $ fromAttrib "class" tag
let getAtt k = case fromAttrib k tag of
"" -> []
- v -> [(T.unpack k, T.unpack v)]
+ v -> [(k, v)]
let kvs = concatMap getAtt ["width", "height", "sizes", "srcset"]
return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt)
-pCodeWithClass :: PandocMonad m => [(T.Text,String)] -> TagParser m Inlines
-pCodeWithClass elemToClass = try $ do
+pCodeWithClass :: PandocMonad m => [(T.Text,Text)] -> TagParser m Inlines
+pCodeWithClass elemToClass = try $ do
let tagTest = flip elem . fmap fst $ elemToClass
TagOpen open attr' <- pSatisfy $ tagOpen tagTest (const True)
result <- manyTill pAny (pCloses open)
let (ids,cs,kvs) = mkAttr . toStringAttr $ attr'
cs' = maybe cs (:cs) . lookup open $ elemToClass
return . B.codeWith (ids,cs',kvs) .
- unwords . lines . T.unpack . innerText $ result
+ T.unwords . T.lines . innerText $ result
pCode :: PandocMonad m => TagParser m Inlines
pCode = try $ do
(TagOpen open attr') <- pSatisfy $ tagOpen (`elem` ["code","tt"]) (const True)
let attr = toStringAttr attr'
result <- manyTill pAny (pCloses open)
- return $ B.codeWith (mkAttr attr) $ unwords $ lines $ T.unpack $
- innerText result
+ return $ B.codeWith (mkAttr attr) $ T.unwords $ T.lines $ innerText result
pSpan :: PandocMonad m => TagParser m Inlines
pSpan = try $ do
@@ -817,7 +813,7 @@ pSpan = try $ do
where styleAttr = fromMaybe "" $ lookup "style" attr
fontVariant = fromMaybe "" $ pickStyleAttrProps ["font-variant"] styleAttr
classes = fromMaybe [] $
- words <$> lookup "class" attr
+ T.words <$> lookup "class" attr
let tag = if isSmallCaps then B.smallcaps else B.spanWith (mkAttr attr)
return $ tag contents
@@ -829,18 +825,17 @@ pRawHtmlInline = do
then pSatisfy (not . isBlockTag)
else pSatisfy isInlineTag
exts <- getOption readerExtensions
- let raw = T.unpack $ renderTags' [result]
+ let raw = renderTags' [result]
if extensionEnabled Ext_raw_html exts
then return $ B.rawInline "html" raw
else ignore raw
-mathMLToTeXMath :: String -> Either String String
+mathMLToTeXMath :: Text -> Either Text Text
mathMLToTeXMath s = writeTeX <$> readMathML s
-toStringAttr :: [(Text, Text)] -> [(String, String)]
+toStringAttr :: [(Text, Text)] -> [(Text, Text)]
toStringAttr = map go
- where go (x,y) = (T.unpack (fromMaybe x $ T.stripPrefix "data-" x),
- T.unpack y)
+ where go (x,y) = (fromMaybe x $ T.stripPrefix "data-" x, y)
pScriptMath :: PandocMonad m => TagParser m Inlines
pScriptMath = try $ do
@@ -849,8 +844,7 @@ pScriptMath = try $ do
Just x | "math/tex" `T.isPrefixOf` x
-> return $ "display" `T.isSuffixOf` x
_ -> mzero
- contents <- T.unpack . innerText <$>
- manyTill pAny (pSatisfy (matchTagClose "script"))
+ contents <- innerText <$> manyTill pAny (pSatisfy (matchTagClose "script"))
return $ (if isdisplay then B.displayMath else B.math) contents
pMath :: PandocMonad m => Bool -> TagParser m Inlines
@@ -862,11 +856,11 @@ pMath inCase = try $ do
unless inCase $
guard (maybe True (== mathMLNamespace) (lookup "xmlns" attr))
contents <- manyTill pAny (pSatisfy (matchTagClose "math"))
- case mathMLToTeXMath (T.unpack $ renderTags $
+ case mathMLToTeXMath (renderTags $
[open] <> contents <> [TagClose "math"]) of
Left _ -> return $ B.spanWith ("",["math"],attr) $ B.text $
- T.unpack $ innerText contents
- Right [] -> return mempty
+ innerText contents
+ Right "" -> return mempty
Right x -> return $ case lookup "display" attr of
Just "block" -> B.displayMath x
_ -> B.math x
@@ -925,7 +919,7 @@ pTagText = try $ do
parsed <- lift $ lift $
flip runReaderT qu $ runParserT (many pTagContents) st "text" str
case parsed of
- Left _ -> throwError $ PandocParseError $ "Could not parse `" <> T.unpack str <> "'"
+ Left _ -> throwError $ PandocParseError $ "Could not parse `" <> str <> "'"
Right result -> return $ mconcat result
pBlank :: PandocMonad m => TagParser m ()
@@ -954,11 +948,11 @@ pRawTeX = do
guardEnabled Ext_raw_tex
inp <- getInput
st <- getState
- res <- lift $ runParserT (withRaw rawLaTeXInline) st "chunk" (T.unpack inp)
+ res <- lift $ runParserT (withRaw rawLaTeXInline) st "chunk" inp
case res of
Left _ -> mzero
Right (contents, raw) -> do
- _ <- count (length raw) anyChar
+ _ <- count (T.length raw) anyChar
return $ B.rawInline "tex" contents
pStr :: PandocMonad m => InlinesParser m Inlines
@@ -966,7 +960,7 @@ pStr = do
result <- many1 $ satisfy $ \c ->
not (isSpace c) && not (isSpecial c) && not (isBad c)
updateLastStrPos
- return $ B.str result
+ return $ B.str $ T.pack result
isSpecial :: Char -> Bool
isSpecial '"' = True
@@ -982,7 +976,7 @@ isSpecial '\8221' = True
isSpecial _ = False
pSymbol :: PandocMonad m => InlinesParser m Inlines
-pSymbol = satisfy isSpecial >>= return . B.str . (:[])
+pSymbol = satisfy isSpecial >>= return . B.str . T.singleton
isBad :: Char -> Bool
isBad c = c >= '\128' && c <= '\159' -- not allowed in HTML
@@ -1019,7 +1013,7 @@ pBad = do
'\158' -> '\382'
'\159' -> '\376'
_ -> '?'
- return $ B.str [c']
+ return $ B.str $ T.singleton c'
pSpace :: PandocMonad m => InlinesParser m Inlines
pSpace = many1 (satisfy isSpace) >>= \xs ->
@@ -1156,8 +1150,8 @@ _ `closes` _ = False
-- | Matches a stretch of HTML in balanced tags.
htmlInBalanced :: Monad m
- => (Tag String -> Bool)
- -> ParserT String st m String
+ => (Tag Text -> Bool)
+ -> ParserT Text st m Text
htmlInBalanced f = try $ do
lookAhead (char '<')
inp <- getInput
@@ -1174,21 +1168,21 @@ htmlInBalanced f = try $ do
(TagClose _ : TagPosition er ec : _) -> do
let ls = er - sr
let cs = ec - sc
- lscontents <- unlines <$> count ls anyLine
+ lscontents <- T.unlines <$> count ls anyLine
cscontents <- count cs anyChar
closetag <- do
x <- many (satisfy (/='>'))
char '>'
return (x <> ">")
- return (lscontents <> cscontents <> closetag)
+ return $ lscontents <> T.pack cscontents <> T.pack closetag
_ -> mzero
_ -> mzero
-htmlInBalanced' :: String
- -> [Tag String]
- -> [Tag String]
+htmlInBalanced' :: Text
+ -> [Tag Text]
+ -> [Tag Text]
htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts
- where go :: Int -> [Tag String] -> Maybe [Tag String]
+ where go :: Int -> [Tag Text] -> Maybe [Tag Text]
go n (t@(TagOpen tn' _):rest) | tn' == tagname =
(t :) <$> go (n + 1) rest
go 1 (t@(TagClose tn'):_) | tn' == tagname =
@@ -1204,8 +1198,8 @@ hasTagWarning _ = False
-- | Matches a tag meeting a certain condition.
htmlTag :: (HasReaderOptions st, Monad m)
- => (Tag String -> Bool)
- -> ParserT [Char] st m (Tag String, String)
+ => (Tag Text -> Bool)
+ -> ParserT Text st m (Tag Text, Text)
htmlTag f = try $ do
lookAhead (char '<')
startpos <- getPosition
@@ -1213,7 +1207,7 @@ htmlTag f = try $ do
let ts = canonicalizeTags $ parseTagsOptions
parseOptions{ optTagWarning = False
, optTagPosition = True }
- (inp ++ " ") -- add space to ensure that
+ (inp <> " ") -- add space to ensure that
-- we get a TagPosition after the tag
(next, ln, col) <- case ts of
(TagPosition{} : next : TagPosition ln col : _)
@@ -1225,13 +1219,12 @@ htmlTag f = try $ do
-- so we exclude . even though it's a valid character
-- in XML element names
let isNameChar c = isAlphaNum c || c == ':' || c == '-' || c == '_'
- let isName s = case s of
- [] -> False
- (c:cs) -> isLetter c && all isNameChar cs
- let isPI s = case s of
- ('?':_) -> True -- processing instruction
- _ -> False
-
+ let isName s = case T.uncons s of
+ Nothing -> False
+ Just (c, cs) -> isLetter c && T.all isNameChar cs
+ let isPI s = case T.uncons s of
+ Just ('?', _) -> True -- processing instruction
+ _ -> False
let endpos = if ln == 1
then setSourceColumn startpos
(sourceColumn startpos + (col - 1))
@@ -1247,18 +1240,18 @@ htmlTag f = try $ do
-- basic sanity check, since the parser is very forgiving
-- and finds tags in stuff like x<y)
guard $ isName tagname || isPI tagname
- guard $ not $ null tagname
+ guard $ not $ T.null tagname
-- <https://example.org> should NOT be a tag either.
-- tagsoup will parse it as TagOpen "https:" [("example.org","")]
- guard $ last tagname /= ':'
+ guard $ T.last tagname /= ':'
char '<'
rendered <- manyTill anyChar endAngle
- return (next, "<" ++ rendered ++ ">")
+ return (next, T.pack $ "<" ++ rendered ++ ">")
case next of
TagComment s
- | "<!--" `isPrefixOf` inp -> do
+ | "<!--" `T.isPrefixOf` inp -> do
string "<!--"
- count (length s) anyChar
+ count (T.length s) anyChar
string "-->"
stripComments <- getOption readerStripComments
if stripComments
@@ -1272,12 +1265,12 @@ htmlTag f = try $ do
handleTag tagname
_ -> mzero
-mkAttr :: [(String, String)] -> Attr
+mkAttr :: [(Text, Text)] -> Attr
mkAttr attr = (attribsId, attribsClasses, attribsKV)
where attribsId = fromMaybe "" $ lookup "id" attr
- attribsClasses = words (fromMaybe "" $ lookup "class" attr) <> epubTypes
+ attribsClasses = T.words (fromMaybe "" $ lookup "class" attr) <> epubTypes
attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
- epubTypes = words $ fromMaybe "" $ lookup "epub:type" attr
+ epubTypes = T.words $ fromMaybe "" $ lookup "epub:type" attr
-- Strip namespace prefixes
stripPrefixes :: [Tag Text] -> [Tag Text]
@@ -1304,11 +1297,11 @@ isSpace _ = False
-- Utilities
-- | Adjusts a url according to the document's base URL.
-canonicalizeUrl :: PandocMonad m => String -> TagParser m String
+canonicalizeUrl :: PandocMonad m => Text -> TagParser m Text
canonicalizeUrl url = do
mbBaseHref <- baseHref <$> getState
- return $ case (parseURIReference url, mbBaseHref) of
- (Just rel, Just bs) -> show (rel `nonStrictRelativeTo` bs)
+ return $ case (parseURIReference (T.unpack url), mbBaseHref) of
+ (Just rel, Just bs) -> tshow (rel `nonStrictRelativeTo` bs)
_ -> url
diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs
index 0a048b6e6..3fc2f9715 100644
--- a/src/Text/Pandoc/Readers/Haddock.hs
+++ b/src/Text/Pandoc/Readers/Haddock.hs
@@ -1,5 +1,6 @@
{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE CPP #-}
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Haddock
Copyright : Copyright (C) 2013 David Lazar
@@ -17,9 +18,10 @@ module Text.Pandoc.Readers.Haddock
import Prelude
import Control.Monad.Except (throwError)
-import Data.List (intersperse, stripPrefix)
+import Data.List (intersperse)
import Data.Maybe (fromMaybe)
import Data.Text (Text, unpack)
+import qualified Data.Text as T
import Documentation.Haddock.Parser
import Documentation.Haddock.Types as H
import Text.Pandoc.Builder (Blocks, Inlines)
@@ -28,7 +30,7 @@ import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Error
import Text.Pandoc.Options
-import Text.Pandoc.Shared (crFilter, splitBy, trim)
+import Text.Pandoc.Shared (crFilter, splitTextBy, trim)
-- | Parse Haddock markup and return a 'Pandoc' document.
@@ -51,7 +53,7 @@ docHToBlocks d' =
case d' of
DocEmpty -> mempty
DocAppend (DocParagraph (DocHeader h)) (DocParagraph (DocAName ident)) ->
- B.headerWith (ident,[],[]) (headerLevel h)
+ B.headerWith (T.pack ident,[],[]) (headerLevel h)
(docHToInlines False $ headerTitle h)
DocAppend d1 d2 -> mappend (docHToBlocks d1) (docHToBlocks d2)
DocString _ -> inlineFallback
@@ -73,12 +75,12 @@ docHToBlocks d' =
DocDefList items -> B.definitionList (map (\(d,t) ->
(docHToInlines False d,
[consolidatePlains $ docHToBlocks t])) items)
- DocCodeBlock (DocString s) -> B.codeBlockWith ("",[],[]) s
+ DocCodeBlock (DocString s) -> B.codeBlockWith ("",[],[]) $ T.pack s
DocCodeBlock d -> B.para $ docHToInlines True d
DocHyperlink _ -> inlineFallback
DocPic _ -> inlineFallback
DocAName _ -> inlineFallback
- DocProperty s -> B.codeBlockWith ("",["property","haskell"],[]) (trim s)
+ DocProperty s -> B.codeBlockWith ("",["property","haskell"],[]) (trim $ T.pack s)
DocExamples es -> mconcat $ map (\e ->
makeExample ">>>" (exampleExpression e) (exampleResult e)) es
DocTable H.Table{ tableHeaderRows = headerRows
@@ -114,58 +116,58 @@ docHToInlines isCode d' =
(docHToInlines isCode d2)
DocString s
| isCode -> mconcat $ intersperse B.linebreak
- $ map B.code $ splitBy (=='\n') s
- | otherwise -> B.text s
+ $ map B.code $ splitTextBy (=='\n') $ T.pack s
+ | otherwise -> B.text $ T.pack s
DocParagraph _ -> mempty
DocIdentifier ident ->
case toRegular (DocIdentifier ident) of
- DocIdentifier s -> B.codeWith ("",["haskell","identifier"],[]) s
+ DocIdentifier s -> B.codeWith ("",["haskell","identifier"],[]) $ T.pack s
_ -> mempty
- DocIdentifierUnchecked s -> B.codeWith ("",["haskell","identifier"],[]) s
- DocModule s -> B.codeWith ("",["haskell","module"],[]) s
+ DocIdentifierUnchecked s -> B.codeWith ("",["haskell","identifier"],[]) $ T.pack s
+ DocModule s -> B.codeWith ("",["haskell","module"],[]) $ T.pack s
DocWarning _ -> mempty -- TODO
DocEmphasis d -> B.emph (docHToInlines isCode d)
- DocMonospaced (DocString s) -> B.code s
+ DocMonospaced (DocString s) -> B.code $ T.pack s
DocMonospaced d -> docHToInlines True d
DocBold d -> B.strong (docHToInlines isCode d)
- DocMathInline s -> B.math s
- DocMathDisplay s -> B.displayMath s
+ DocMathInline s -> B.math $ T.pack s
+ DocMathDisplay s -> B.displayMath $ T.pack s
DocHeader _ -> mempty
DocUnorderedList _ -> mempty
DocOrderedList _ -> mempty
DocDefList _ -> mempty
DocCodeBlock _ -> mempty
- DocHyperlink h -> B.link (hyperlinkUrl h) (hyperlinkUrl h)
- (maybe (B.text $ hyperlinkUrl h) (docHToInlines isCode)
+ DocHyperlink h -> B.link (T.pack $ hyperlinkUrl h) (T.pack $ hyperlinkUrl h)
+ (maybe (B.text $ T.pack $ hyperlinkUrl h) (docHToInlines isCode)
(hyperlinkLabel h))
- DocPic p -> B.image (pictureUri p) (fromMaybe (pictureUri p) $ pictureTitle p)
- (maybe mempty B.text $ pictureTitle p)
- DocAName s -> B.spanWith (s,["anchor"],[]) mempty
+ DocPic p -> B.image (T.pack $ pictureUri p) (T.pack $ fromMaybe (pictureUri p) $ pictureTitle p)
+ (maybe mempty (B.text . T.pack) $ pictureTitle p)
+ DocAName s -> B.spanWith (T.pack s,["anchor"],[]) mempty
DocProperty _ -> mempty
DocExamples _ -> mempty
DocTable _ -> mempty
-- | Create an 'Example', stripping superfluous characters as appropriate
-makeExample :: String -> String -> [String] -> Blocks
+makeExample :: T.Text -> String -> [String] -> Blocks
makeExample prompt expression result =
B.para $ B.codeWith ("",["prompt"],[]) prompt
<> B.space
- <> B.codeWith ([], ["haskell","expr"], []) (trim expression)
+ <> B.codeWith ("", ["haskell","expr"], []) (trim $ T.pack expression)
<> B.linebreak
<> mconcat (intersperse B.linebreak $ map coder result')
where
-- 1. drop trailing whitespace from the prompt, remember the prefix
- prefix = takeWhile (`elem` " \t") prompt
+ prefix = T.takeWhile (`elem` (" \t" :: String)) prompt
-- 2. drop, if possible, the exact same sequence of whitespace
-- characters from each result line
--
-- 3. interpret lines that only contain the string "<BLANKLINE>" as an
-- empty line
- result' = map (substituteBlankLine . tryStripPrefix prefix) result
+ result' = map (substituteBlankLine . tryStripPrefix prefix . T.pack) result
where
- tryStripPrefix xs ys = fromMaybe ys $ stripPrefix xs ys
+ tryStripPrefix xs ys = fromMaybe ys $ T.stripPrefix xs ys
substituteBlankLine "<BLANKLINE>" = ""
substituteBlankLine line = line
- coder = B.codeWith ([], ["result"], [])
+ coder = B.codeWith ("", ["result"], [])
diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs
index dbca5a59f..8efc230cc 100644
--- a/src/Text/Pandoc/Readers/Ipynb.hs
+++ b/src/Text/Pandoc/Readers/Ipynb.hs
@@ -4,6 +4,7 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Ipynb
Copyright : Copyright (C) 2019 John MacFarlane
@@ -19,7 +20,6 @@ module Text.Pandoc.Readers.Ipynb ( readIpynb )
where
import Prelude
import Data.Char (isDigit)
-import Data.List (isPrefixOf)
import Data.Maybe (fromMaybe)
import Data.Digest.Pure.SHA (sha1, showDigest)
import Text.Pandoc.Options
@@ -30,6 +30,7 @@ import Text.Pandoc.Definition
import Data.Ipynb as Ipynb
import Text.Pandoc.Class
import Text.Pandoc.MIME (extensionFromMimeType)
+import Text.Pandoc.Shared (tshow)
import Text.Pandoc.UTF8
import Text.Pandoc.Walk (walk)
import Text.Pandoc.Error
@@ -51,15 +52,15 @@ readIpynb opts t = do
Left _ ->
case eitherDecode src of
Right (notebook3 :: Notebook NbV3) -> notebookToPandoc opts notebook3
- Left err -> throwError $ PandocIpynbDecodingError err
+ Left err -> throwError $ PandocIpynbDecodingError $ T.pack err
notebookToPandoc :: PandocMonad m
=> ReaderOptions -> Notebook a -> m Pandoc
notebookToPandoc opts notebook = do
let cells = notebookCells notebook
let (fmt,fmtminor) = notebookFormat notebook
- let m = M.insert "nbformat" (MetaString $ show fmt) $
- M.insert "nbformat_minor" (MetaString $ show fmtminor) $
+ let m = M.insert "nbformat" (MetaString $ tshow fmt) $
+ M.insert "nbformat_minor" (MetaString $ tshow fmtminor) $
jsonMetaToMeta (notebookMetadata notebook)
let lang = case M.lookup "kernelspec" m of
Just (MetaMap ks) ->
@@ -72,7 +73,7 @@ notebookToPandoc opts notebook = do
return $ Pandoc (Meta $ M.insert "jupyter" (MetaMap m) mempty) blocks
cellToBlocks :: PandocMonad m
- => ReaderOptions -> String -> Cell a -> m B.Blocks
+ => ReaderOptions -> Text -> Cell a -> m B.Blocks
cellToBlocks opts lang c = do
let Source ts = cellSource c
let source = mconcat ts
@@ -100,19 +101,18 @@ cellToBlocks opts lang c = do
"text/markdown" -> "markdown"
"text/x-rsrt" -> "rst"
_ -> format
- return $ B.divWith ("",["cell","raw"],kvs) $ B.rawBlock format'
- $ T.unpack source
+ return $ B.divWith ("",["cell","raw"],kvs) $ B.rawBlock format' source
Ipynb.Code{ codeOutputs = outputs, codeExecutionCount = ec } -> do
outputBlocks <- mconcat <$> mapM outputToBlock outputs
- let kvs' = maybe kvs (\x -> ("execution_count", show x):kvs) ec
+ let kvs' = maybe kvs (\x -> ("execution_count", tshow x):kvs) ec
return $ B.divWith ("",["cell","code"],kvs') $
- B.codeBlockWith ("",[lang],[]) (T.unpack source)
+ B.codeBlockWith ("",[lang],[]) source
<> outputBlocks
-- Remove attachment: prefix from images...
fixImage :: Inline -> Inline
fixImage (Image attr lab (src,tit))
- | "attachment:" `isPrefixOf` src = Image attr lab (drop 11 src, tit)
+ | "attachment:" `T.isPrefixOf` src = Image attr lab (T.drop 11 src, tit)
fixImage x = x
addAttachment :: PandocMonad m => (Text, MimeBundle) -> m ()
@@ -120,19 +120,19 @@ addAttachment (fname, mimeBundle) = do
let fp = T.unpack fname
case M.toList (unMimeBundle mimeBundle) of
(mimeType, BinaryData bs):_ ->
- insertMedia fp (Just $ T.unpack mimeType) (BL.fromStrict bs)
+ insertMedia fp (Just mimeType) (BL.fromStrict bs)
(mimeType, TextualData t):_ ->
- insertMedia fp (Just $ T.unpack mimeType)
+ insertMedia fp (Just mimeType)
(BL.fromStrict $ TE.encodeUtf8 t)
(mimeType, JsonData v):_ ->
- insertMedia fp (Just $ T.unpack mimeType) (encode v)
- [] -> report $ CouldNotFetchResource fp "no attachment"
+ insertMedia fp (Just mimeType) (encode v)
+ [] -> report $ CouldNotFetchResource fname "no attachment"
outputToBlock :: PandocMonad m => Output a -> m B.Blocks
outputToBlock Stream{ streamName = sName,
streamText = Source text } = do
- return $ B.divWith ("",["output","stream",T.unpack sName],[])
- $ B.codeBlock $ T.unpack . mconcat $ text
+ return $ B.divWith ("",["output","stream",sName],[])
+ $ B.codeBlock $ T.concat $ text
outputToBlock DisplayData{ displayData = data',
displayMetadata = metadata' } =
B.divWith ("",["output", "display_data"],[]) <$>
@@ -140,15 +140,15 @@ outputToBlock DisplayData{ displayData = data',
outputToBlock ExecuteResult{ executeCount = ec,
executeData = data',
executeMetadata = metadata' } =
- B.divWith ("",["output", "execute_result"],[("execution_count",show ec)])
+ B.divWith ("",["output", "execute_result"],[("execution_count",tshow ec)])
<$> handleData metadata' data'
outputToBlock Err{ errName = ename,
errValue = evalue,
errTraceback = traceback } = do
return $ B.divWith ("",["output","error"],
- [("ename",T.unpack ename),
- ("evalue",T.unpack evalue)])
- $ B.codeBlock $ T.unpack . T.unlines $ traceback
+ [("ename",ename),
+ ("evalue",evalue)])
+ $ B.codeBlock $ T.unlines $ traceback
-- We want to display the richest output possible given
-- the output format.
@@ -174,54 +174,53 @@ handleData metadata (MimeBundle mb) =
let metaPairs = jsonMetaToPairs meta
let bl = BL.fromStrict bs
-- SHA1 hash for filename
- let mt' = T.unpack mt
- let fname = showDigest (sha1 bl) ++
- case extensionFromMimeType mt' of
+ let fname = T.pack (showDigest (sha1 bl)) <>
+ case extensionFromMimeType mt of
Nothing -> ""
- Just ext -> '.':ext
- insertMedia fname (Just mt') bl
+ Just ext -> "." <> ext
+ insertMedia (T.unpack fname) (Just mt) bl
return $ B.para $ B.imageWith ("",[],metaPairs) fname "" mempty
| otherwise = return mempty
dataBlock ("text/html", TextualData t)
- = return $ B.rawBlock "html" $ T.unpack t
+ = return $ B.rawBlock "html" $ t
dataBlock ("text/latex", TextualData t)
- = return $ B.rawBlock "latex" $ T.unpack t
+ = return $ B.rawBlock "latex" $ t
dataBlock ("text/plain", TextualData t) =
- return $ B.codeBlock $ T.unpack t
+ return $ B.codeBlock $ t
dataBlock (_, JsonData v) =
- return $ B.codeBlockWith ("",["json"],[]) $ toStringLazy $ encode v
+ return $ B.codeBlockWith ("",["json"],[]) $ T.pack $ toStringLazy $ encode v
dataBlock _ = return mempty
-jsonMetaToMeta :: JSONMeta -> M.Map String MetaValue
-jsonMetaToMeta = M.mapKeys T.unpack . M.map valueToMetaValue
+jsonMetaToMeta :: JSONMeta -> M.Map Text MetaValue
+jsonMetaToMeta = M.map valueToMetaValue
where
valueToMetaValue :: Value -> MetaValue
valueToMetaValue x@(Object{}) =
case fromJSON x of
- Error s -> MetaString s
+ Error s -> MetaString $ T.pack s
Success jm' -> MetaMap $ jsonMetaToMeta jm'
valueToMetaValue x@(Array{}) =
case fromJSON x of
- Error s -> MetaString s
+ Error s -> MetaString $ T.pack s
Success xs -> MetaList $ map valueToMetaValue xs
valueToMetaValue (Bool b) = MetaBool b
- valueToMetaValue (String t) = MetaString (T.unpack t)
+ valueToMetaValue (String t) = MetaString t
valueToMetaValue (Number n)
- | Scientific.isInteger n = MetaString (show (floor n :: Integer))
- | otherwise = MetaString (show n)
+ | Scientific.isInteger n = MetaString (tshow (floor n :: Integer))
+ | otherwise = MetaString (tshow n)
valueToMetaValue Aeson.Null = MetaString ""
-jsonMetaToPairs :: JSONMeta -> [(String, String)]
-jsonMetaToPairs = M.toList . M.mapKeys T.unpack . M.map
+jsonMetaToPairs :: JSONMeta -> [(Text, Text)]
+jsonMetaToPairs = M.toList . M.map
(\case
String t
| not (T.all isDigit t)
, t /= "true"
, t /= "false"
- -> T.unpack t
- x -> UTF8.toStringLazy $ Aeson.encode x)
+ -> t
+ x -> T.pack $ UTF8.toStringLazy $ Aeson.encode x)
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index e074599eb..320b9c1dd 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -1,5 +1,6 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.JATS
Copyright : Copyright (C) 2017-2019 Hamish Mackenzie
@@ -76,13 +77,13 @@ convertEntity :: String -> String
convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
-- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> String
+attrValue :: String -> Element -> Text
attrValue attr =
fromMaybe "" . maybeAttrValue attr
-maybeAttrValue :: String -> Element -> Maybe String
+maybeAttrValue :: String -> Element -> Maybe Text
maybeAttrValue attr elt =
- lookupAttrBy (\x -> qName x == attr) (elAttribs elt)
+ T.pack <$> lookupAttrBy (\x -> qName x == attr) (elAttribs elt)
-- convenience function
named :: String -> Element -> Bool
@@ -90,7 +91,7 @@ named s e = qName (elName e) == s
--
-addMeta :: PandocMonad m => ToMetaValue a => String -> a -> JATS m ()
+addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> JATS m ()
addMeta field val = modify (setMeta field val)
instance HasMeta JATSState where
@@ -126,15 +127,13 @@ isBlockElement (Elem e) = qName (elName e) `S.member` blocktags
isBlockElement _ = False
-- Trim leading and trailing newline characters
-trimNl :: String -> String
-trimNl = reverse . go . reverse . go
- where go ('\n':xs) = xs
- go xs = xs
+trimNl :: Text -> Text
+trimNl = T.dropAround (== '\n')
-- function that is used by both graphic (in parseBlock)
-- and inline-graphic (in parseInline)
getGraphic :: PandocMonad m
- => Maybe (Inlines, String) -> Element -> JATS m Inlines
+ => Maybe (Inlines, Text) -> Element -> JATS m Inlines
getGraphic mbfigdata e = do
let atVal a = attrValue a e
(ident, title, caption) =
@@ -142,7 +141,7 @@ getGraphic mbfigdata e = do
Just (capt, i) -> (i, "fig:" <> atVal "title", capt)
Nothing -> (atVal "id", atVal "title",
text (atVal "alt-text"))
- attr = (ident, words $ atVal "role", [])
+ attr = (ident, T.words $ atVal "role", [])
imageUrl = atVal "href"
return $ imageWith attr imageUrl title caption
@@ -155,8 +154,8 @@ parseBlock :: PandocMonad m => Content -> JATS m Blocks
parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
parseBlock (Text (CData _ s _)) = if all isSpace s
then return mempty
- else return $ plain $ trimInlines $ text s
-parseBlock (CRef x) = return $ plain $ str $ map toUpper x
+ else return $ plain $ trimInlines $ text $ T.pack s
+parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x
parseBlock (Elem e) =
case qName (elName e) of
"p" -> parseMixed para (elContent e)
@@ -167,7 +166,7 @@ parseBlock (Elem e) =
"bullet" -> bulletList <$> listitems
listType -> do
let start = fromMaybe 1 $
- (strContent <$> (filterElement (named "list-item") e
+ (textContent <$> (filterElement (named "list-item") e
>>= filterElement (named "label")))
>>= safeRead
orderedListWith (start, parseListStyleType listType, DefaultDelim)
@@ -204,7 +203,7 @@ parseBlock (Elem e) =
"" -> []
x -> [x]
return $ codeBlockWith (attrValue "id" e, classes', [])
- $ trimNl $ strContentRecursive e
+ $ trimNl $ textContentRecursive e
parseBlockquote = do
attrib <- case filterChild (named "attribution") e of
Nothing -> return mempty
@@ -267,9 +266,9 @@ parseBlock (Elem e) =
Just "right" -> AlignRight
Just "center" -> AlignCenter
_ -> AlignDefault
- let toWidth c = case findAttr (unqual "colwidth") c of
+ let toWidth c = case findAttrText (unqual "colwidth") c of
Just w -> fromMaybe 0
- $ safeRead $ '0': filter (\x ->
+ $ safeRead $ "0" <> T.filter (\x ->
isDigit x || x == '.') w
Nothing -> 0 :: Double
let numrows = foldl' max 0 $ map length bodyrows
@@ -363,7 +362,7 @@ parseRefList e = do
return mempty
parseRef :: PandocMonad m
- => Element -> JATS m (Map.Map String MetaValue)
+ => Element -> JATS m (Map.Map Text MetaValue)
parseRef e = do
let refId = text $ attrValue "id" e
let getInlineText n = maybe (return mempty) getInlines . filterChild (named n)
@@ -396,7 +395,7 @@ parseRef e = do
family <- maybe (return mempty) getInlines
$ filterChild (named "surname") nm
return $ toMetaValue $ Map.fromList [
- ("given", given)
+ ("given" :: Text, given)
, ("family", family)
]
personGroups <- mapM (\pg ->
@@ -406,7 +405,7 @@ parseRef e = do
toMetaValue names))
personGroups'
return $ Map.fromList $
- [ ("id", toMetaValue refId)
+ [ ("id" :: Text, toMetaValue refId)
, ("type", toMetaValue refType)
, ("title", toMetaValue refTitle)
, ("container-title", toMetaValue refContainerTitle)
@@ -415,7 +414,7 @@ parseRef e = do
, ("title", toMetaValue refTitle)
, ("issued", toMetaValue
$ Map.fromList [
- ("year", refYear)
+ ("year" :: Text, refYear)
])
, ("volume", toMetaValue refVolume)
, ("page", toMetaValue refPages)
@@ -424,6 +423,15 @@ parseRef e = do
Nothing -> return $ Map.insert "id" (toMetaValue refId) mempty
-- TODO handle mixed-citation
+findAttrText :: QName -> Element -> Maybe Text
+findAttrText x = fmap T.pack . findAttr x
+
+textContent :: Element -> Text
+textContent = T.pack . strContent
+
+textContentRecursive :: Element -> Text
+textContentRecursive = T.pack . strContentRecursive
+
strContentRecursive :: Element -> String
strContentRecursive = strContent .
(\e' -> e'{ elContent = map elementToStr $ elContent e' })
@@ -433,9 +441,9 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
elementToStr x = x
parseInline :: PandocMonad m => Content -> JATS m Inlines
-parseInline (Text (CData _ s _)) = return $ text s
+parseInline (Text (CData _ s _)) = return $ text $ T.pack s
parseInline (CRef ref) =
- return $ maybe (text $ map toUpper ref) text $ lookupEntity ref
+ return $ maybe (text $ T.toUpper $ T.pack ref) text $ T.pack <$> lookupEntity ref
parseInline (Elem e) =
case qName (elName e) of
"italic" -> emph <$> innerInlines
@@ -464,7 +472,7 @@ parseInline (Elem e) =
"xref" -> do
ils <- innerInlines
let rid = attrValue "rid" e
- let rids = words rid
+ let rids = T.words rid
let refType = ("ref-type",) <$> maybeAttrValue "ref-type" e
let attr = (attrValue "id" e, [], maybeToList refType)
return $ if refType == Just ("ref-type","bibr")
@@ -477,13 +485,13 @@ parseInline (Elem e) =
, citationNoteNum = 0
, citationHash = 0}) rids)
ils
- else linkWith attr ('#' : rid) "" ils
+ else linkWith attr ("#" <> rid) "" ils
"ext-link" -> do
ils <- innerInlines
- let title = fromMaybe "" $ findAttr (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e
+ let title = fromMaybe "" $ findAttrText (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e
let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
- Just h -> h
- _ -> '#' : attrValue "rid" e
+ Just h -> T.pack h
+ _ -> "#" <> attrValue "rid" e
let ils' = if ils == mempty then str href else ils
let attr = (attrValue "id" e, [], [])
return $ linkWith attr href title ils'
@@ -491,23 +499,23 @@ parseInline (Elem e) =
"disp-formula" -> formula displayMath
"inline-formula" -> formula math
"math" | qPrefix (elName e) == Just "mml" -> return . math $ mathML e
- "tex-math" -> return . math $ strContent e
+ "tex-math" -> return . math $ textContent e
- "email" -> return $ link ("mailto:" ++ strContent e) ""
- $ str $ strContent e
- "uri" -> return $ link (strContent e) "" $ str $ strContent e
+ "email" -> return $ link ("mailto:" <> textContent e) ""
+ $ str $ textContent e
+ "uri" -> return $ link (textContent e) "" $ str $ textContent e
"fn" -> (note . mconcat) <$>
mapM parseBlock (elContent e)
_ -> innerInlines
where innerInlines = (trimInlines . mconcat) <$>
mapM parseInline (elContent e)
mathML x =
- case readMathML . showElement $ everywhere (mkT removePrefix) x of
+ case readMathML . T.pack . showElement $ everywhere (mkT removePrefix) x of
Left _ -> mempty
Right m -> writeTeX m
formula constructor = do
let whereToLook = fromMaybe e $ filterElement (named "alternatives") e
- texMaths = map strContent $
+ texMaths = map textContent $
filterChildren (named "tex-math") whereToLook
mathMLs = map mathML $
filterChildren isMathML whereToLook
@@ -520,4 +528,4 @@ parseInline (Elem e) =
let classes' = case attrValue "language" e of
"" -> []
l -> [l]
- return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e
+ return $ codeWith (attrValue "id" e,classes',[]) $ textContentRecursive e
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index be19964a4..5c9a3e69c 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -1,9 +1,10 @@
-{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.LaTeX
Copyright : Copyright (C) 2006-2019 John MacFarlane
@@ -29,9 +30,9 @@ import Prelude
import Control.Applicative (many, optional, (<|>))
import Control.Monad
import Control.Monad.Except (throwError)
-import Data.Char (isDigit, isLetter, toLower, toUpper, chr)
+import Data.Char (isDigit, isLetter, toUpper, chr)
import Data.Default
-import Data.List (intercalate, isPrefixOf)
+import Data.List (intercalate)
import qualified Data.Map as M
import Data.Maybe (fromMaybe, maybeToList)
import qualified Data.Set as Set
@@ -44,7 +45,7 @@ import Text.Pandoc.Builder
import Text.Pandoc.Class (PandocMonad, PandocPure, getResourcePath, lookupEnv,
readFileFromDirs, report, setResourcePath,
setTranslations, translateTerm, trace, fileExists)
-import Text.Pandoc.Error (PandocError ( PandocParseError, PandocParsecError))
+import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError))
import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension)
import Text.Pandoc.ImageSize (numUnit, showFl)
import Text.Pandoc.Logging
@@ -77,7 +78,7 @@ readLaTeX opts ltx = do
(tokenize "source" (crFilter ltx))
case parsed of
Right result -> return result
- Left e -> throwError $ PandocParsecError (T.unpack ltx) e
+ Left e -> throwError $ PandocParsecError ltx e
parseLaTeX :: PandocMonad m => LP m Pandoc
parseLaTeX = do
@@ -99,13 +100,13 @@ parseLaTeX = do
walk (resolveRefs (sLabels st)) doc'
return $ Pandoc meta bs'
-resolveRefs :: M.Map String [Inline] -> Inline -> Inline
+resolveRefs :: M.Map Text [Inline] -> Inline -> Inline
resolveRefs labels x@(Link (ident,classes,kvs) _ _) =
case (lookup "reference-type" kvs,
lookup "reference" kvs) of
(Just "ref", Just lab) ->
case M.lookup lab labels of
- Just txt -> Link (ident,classes,kvs) txt ('#':lab, "")
+ Just txt -> Link (ident,classes,kvs) txt ("#" <> lab, "")
Nothing -> x
_ -> x
resolveRefs _ x = x
@@ -123,11 +124,11 @@ resolveRefs _ x = x
rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s)
- => ParserT String s m String
+ => ParserT Text s m Text
rawLaTeXBlock = do
lookAhead (try (char '\\' >> letter))
inp <- getInput
- let toks = tokenize "source" $ T.pack inp
+ let toks = tokenize "source" inp
snd <$> (rawLaTeXParser toks False (macroDef (const mempty)) blocks
<|> (rawLaTeXParser toks True
(do choice (map controlSeq
@@ -151,14 +152,14 @@ beginOrEndCommand = try $ do
(inlineEnvironments :: M.Map Text (LP PandocPure Inlines))
then mzero
else return $ rawBlock "latex"
- (T.unpack (txt <> untokenize rawargs))
+ (txt <> untokenize rawargs)
rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s)
- => ParserT String s m String
+ => ParserT Text s m Text
rawLaTeXInline = do
lookAhead (try (char '\\' >> letter))
inp <- getInput
- let toks = tokenize "source" $ T.pack inp
+ let toks = tokenize "source" inp
raw <- snd <$>
( rawLaTeXParser toks True
(mempty <$ (controlSeq "input" >> skipMany opt >> braced))
@@ -167,23 +168,23 @@ rawLaTeXInline = do
inlines
)
finalbraces <- mconcat <$> many (try (string "{}")) -- see #5439
- return $ raw <> finalbraces
+ return $ raw <> T.pack finalbraces
-inlineCommand :: PandocMonad m => ParserT String ParserState m Inlines
+inlineCommand :: PandocMonad m => ParserT Text ParserState m Inlines
inlineCommand = do
lookAhead (try (char '\\' >> letter))
inp <- getInput
- let toks = tokenize "source" $ T.pack inp
+ let toks = tokenize "source" inp
fst <$> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand')
inlines
-- inline elements:
word :: PandocMonad m => LP m Inlines
-word = (str . T.unpack . untoken) <$> satisfyTok isWordTok
+word = (str . untoken) <$> satisfyTok isWordTok
regularSymbol :: PandocMonad m => LP m Inlines
-regularSymbol = (str . T.unpack . untoken) <$> satisfyTok isRegularSymbol
+regularSymbol = (str . untoken) <$> satisfyTok isRegularSymbol
where isRegularSymbol (Tok _ Symbol t) = not $ T.any isSpecial t
isRegularSymbol _ = False
isSpecial c = c `Set.member` specialChars
@@ -199,14 +200,14 @@ inlineGroup = do
doLHSverb :: PandocMonad m => LP m Inlines
doLHSverb =
- (codeWith ("",["haskell"],[]) . T.unpack . untokenize)
+ (codeWith ("",["haskell"],[]) . untokenize)
<$> manyTill (satisfyTok (not . isNewlineTok)) (symbol '|')
-mkImage :: PandocMonad m => [(String, String)] -> String -> LP m Inlines
-mkImage options src = do
+mkImage :: PandocMonad m => [(Text, Text)] -> Text -> LP m Inlines
+mkImage options (T.unpack -> src) = do
let replaceTextwidth (k,v) =
case numUnit v of
- Just (num, "\\textwidth") -> (k, showFl (num * 100) ++ "%")
+ Just (num, "\\textwidth") -> (k, showFl (num * 100) <> "%")
_ -> (k, v)
let kvs = map replaceTextwidth
$ filter (\(k,_) -> k `elem` ["width", "height"]) options
@@ -223,10 +224,10 @@ mkImage options src = do
then return s'
else findFile s es
src' <- case takeExtension src of
- "" | not (null defaultExt) -> return $ addExtension src defaultExt
+ "" | not (T.null defaultExt) -> return $ addExtension src $ T.unpack defaultExt
| otherwise -> findFile src exts
_ -> return src
- return $ imageWith attr src' "" alt
+ return $ imageWith attr (T.pack src') "" alt
doxspace :: PandocMonad m => LP m Inlines
doxspace =
@@ -435,7 +436,7 @@ siUnitMap = M.fromList
, ("zetta", str "Z")
]
-lit :: String -> LP m Inlines
+lit :: Text -> LP m Inlines
lit = pure . str
removeDoubleQuotes :: Text -> Text
@@ -471,7 +472,7 @@ quoted' :: PandocMonad m
-> LP m ()
-> LP m Inlines
quoted' f starter ender = do
- startchs <- (T.unpack . untokenize) <$> starter
+ startchs <- untokenize <$> starter
smart <- extensionEnabled Ext_smart <$> getOption readerExtensions
if smart
then do
@@ -487,7 +488,7 @@ quoted' f starter ender = do
enquote :: PandocMonad m => Bool -> Maybe Text -> LP m Inlines
enquote starred mblang = do
skipopts
- let lang = (T.unpack <$> mblang) >>= babelLangToBCP47
+ let lang = mblang >>= babelLangToBCP47
let langspan = case lang of
Nothing -> id
Just l -> spanWith ("",[],[("lang", renderLang l)])
@@ -503,27 +504,27 @@ blockquote citations mblang = do
cs <- cites NormalCitation False
return $ para (cite cs mempty)
else return mempty
- let lang = (T.unpack <$> mblang) >>= babelLangToBCP47
+ let lang = mblang >>= babelLangToBCP47
let langdiv = case lang of
Nothing -> id
Just l -> divWith ("",[],[("lang", renderLang l)])
bs <- grouped block
return $ blockQuote . langdiv $ (bs <> citePar)
-doAcronym :: PandocMonad m => String -> LP m Inlines
+doAcronym :: PandocMonad m => Text -> LP m Inlines
doAcronym form = do
acro <- braced
- return . mconcat $ [spanWith ("",[],[("acronym-label", toksToString acro),
- ("acronym-form", "singular+" ++ form)])
- $ str $ toksToString acro]
+ return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
+ ("acronym-form", "singular+" <> form)])
+ $ str $ untokenize acro]
-doAcronymPlural :: PandocMonad m => String -> LP m Inlines
+doAcronymPlural :: PandocMonad m => Text -> LP m Inlines
doAcronymPlural form = do
acro <- braced
plural <- lit "s"
- return . mconcat $ [spanWith ("",[],[("acronym-label", toksToString acro),
- ("acronym-form", "plural+" ++ form)]) $
- mconcat [str $ toksToString acro, plural]]
+ return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro),
+ ("acronym-form", "plural+" <> form)]) $
+ mconcat [str $ untokenize acro, plural]]
doverb :: PandocMonad m => LP m Inlines
doverb = do
@@ -532,7 +533,7 @@ doverb = do
Just (c, ts) | T.null ts -> return c
_ -> mzero
withVerbatimMode $
- (code . T.unpack . untokenize) <$>
+ (code . untokenize) <$>
manyTill (notFollowedBy newlineTok >> verbTok marker) (symbol marker)
verbTok :: PandocMonad m => Char -> LP m Tok
@@ -547,7 +548,7 @@ verbTok stopchar = do
: totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp
return $ Tok pos toktype t1
-listingsLanguage :: [(String, String)] -> Maybe String
+listingsLanguage :: [(Text, Text)] -> Maybe Text
listingsLanguage opts =
case lookup "language" opts of
Nothing -> Nothing
@@ -562,10 +563,10 @@ dolstinline = do
domintinline :: PandocMonad m => LP m Inlines
domintinline = do
skipopts
- cls <- toksToString <$> braced
+ cls <- untokenize <$> braced
doinlinecode [cls]
-doinlinecode :: PandocMonad m => [String] -> LP m Inlines
+doinlinecode :: PandocMonad m => [Text] -> LP m Inlines
doinlinecode classes = do
Tok _ Symbol t <- anySymbol
marker <- case T.uncons t of
@@ -573,14 +574,14 @@ doinlinecode classes = do
_ -> mzero
let stopchar = if marker == '{' then '}' else marker
withVerbatimMode $
- (codeWith ("",classes,[]) . map nlToSpace . T.unpack . untokenize) <$>
+ (codeWith ("",classes,[]) . T.map nlToSpace . untokenize) <$>
manyTill (verbTok stopchar) (symbol stopchar)
nlToSpace :: Char -> Char
nlToSpace '\n' = ' '
nlToSpace x = x
-keyval :: PandocMonad m => LP m (String, String)
+keyval :: PandocMonad m => LP m (Text, Text)
keyval = try $ do
Tok _ Word key <- satisfyTok isWordTok
optional sp
@@ -601,35 +602,34 @@ keyval = try $ do
_ -> True))))))
optional (symbol ',')
optional sp
- return (T.unpack key, T.unpack $ T.strip val)
+ return (key, T.strip val)
-keyvals :: PandocMonad m => LP m [(String, String)]
+keyvals :: PandocMonad m => LP m [(Text, Text)]
keyvals = try $ symbol '[' >> manyTill keyval (symbol ']')
accent :: PandocMonad m => Char -> Maybe Char -> LP m Inlines
accent combiningAccent fallBack = try $ do
ils <- tok
case toList ils of
- (Str (x:xs) : ys) -> return $ fromList $
+ (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $
-- try to normalize to the combined character:
- Str (T.unpack
- (Normalize.normalize Normalize.NFC
- (T.pack [x, combiningAccent])) ++ xs) : ys
- [Space] -> return $ str [fromMaybe combiningAccent fallBack]
- [] -> return $ str [fromMaybe combiningAccent fallBack]
+ Str (Normalize.normalize Normalize.NFC
+ (T.pack [x, combiningAccent]) <> xs) : ys
+ [Space] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
+ [] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
_ -> return ils
-mathDisplay :: String -> Inlines
+mathDisplay :: Text -> Inlines
mathDisplay = displayMath . trimMath
-mathInline :: String -> Inlines
+mathInline :: Text -> Inlines
mathInline = math . trimMath
dollarsMath :: PandocMonad m => LP m Inlines
dollarsMath = do
symbol '$'
display <- option False (True <$ symbol '$')
- (do contents <- try $ T.unpack . untokenize <$> pDollarsMath 0
+ (do contents <- try $ untokenize <$> pDollarsMath 0
if display
then (mathDisplay contents <$ symbol '$')
else return $ mathInline contents)
@@ -682,10 +682,10 @@ simpleCiteArgs = try $ do
}
return $ addPrefix pre $ addSuffix suf $ map conv keys
-citationLabel :: PandocMonad m => LP m String
+citationLabel :: PandocMonad m => LP m Text
citationLabel = do
optional spaces
- toksToString <$>
+ untokenize <$>
(many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar)
<* optional spaces
<* optional (symbol ',')
@@ -729,10 +729,10 @@ cites mode multi = try $ do
addMprenote _ _ = []
addMpostnote = addSuffix . mpostnote
-citation :: PandocMonad m => String -> CitationMode -> Bool -> LP m Inlines
+citation :: PandocMonad m => Text -> CitationMode -> Bool -> LP m Inlines
citation name mode multi = do
(c,raw) <- withRaw $ cites mode multi
- return $ cite c (rawInline "latex" $ "\\" ++ name ++ toksToString raw)
+ return $ cite c (rawInline "latex" $ "\\" <> name <> untokenize raw)
handleCitationPart :: Inlines -> [Citation]
handleCitationPart ils =
@@ -756,7 +756,7 @@ complexNatbibCitation mode = try $ do
case cs of
[] -> mzero
(c:cits) -> return $ cite (c{ citationMode = mode }:cits)
- (rawInline "latex" $ "\\citetext" ++ toksToString raw)
+ (rawInline "latex" $ "\\citetext" <> untokenize raw)
inNote :: Inlines -> Inlines
inNote ils =
@@ -780,10 +780,10 @@ tok :: PandocMonad m => LP m Inlines
tok = try $ spaces >> grouped inline <|> inlineCommand' <|> singleChar'
where singleChar' = do
Tok _ _ t <- singleChar
- return (str (T.unpack t))
+ return $ str t
opt :: PandocMonad m => LP m Inlines
-opt = bracketed inline <|> (str . T.unpack <$> rawopt)
+opt = bracketed inline <|> (str <$> rawopt)
paropt :: PandocMonad m => LP m Inlines
paropt = parenWrapped inline
@@ -822,26 +822,31 @@ overlayTok =
inBrackets :: Inlines -> Inlines
inBrackets x = str "[" <> x <> str "]"
-unescapeURL :: String -> String
-unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs
- where isEscapable c = c `elem` ("#$%&~_^\\{}" :: String)
-unescapeURL (x:xs) = x:unescapeURL xs
-unescapeURL [] = ""
+unescapeURL :: Text -> Text
+unescapeURL = T.concat . go . T.splitOn "\\"
+ where
+ isEscapable c = c `elemText` "#$%&~_^\\{}"
+ go (x:xs) = x : map unescapeInterior xs
+ go [] = []
+ unescapeInterior t
+ | Just (c, _) <- T.uncons t
+ , isEscapable c = t
+ | otherwise = "\\" <> t
mathEnvWith :: PandocMonad m
=> (Inlines -> a) -> Maybe Text -> Text -> LP m a
mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name
where inner x = case innerEnv of
Nothing -> x
- Just y -> "\\begin{" ++ T.unpack y ++ "}\n" ++ x ++
- "\\end{" ++ T.unpack y ++ "}"
+ Just y -> "\\begin{" <> y <> "}\n" <> x <>
+ "\\end{" <> y <> "}"
-mathEnv :: PandocMonad m => Text -> LP m String
+mathEnv :: PandocMonad m => Text -> LP m Text
mathEnv name = do
skipopts
optional blankline
res <- manyTill anyTok (end_ name)
- return $ stripTrailingNewlines $ T.unpack $ untokenize res
+ return $ stripTrailingNewlines $ untokenize res
inlineEnvironment :: PandocMonad m => LP m Inlines
inlineEnvironment = try $ do
@@ -914,9 +919,9 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
, ("mbox", rawInlineOr "mbox" $ processHBox <$> tok)
, ("hbox", rawInlineOr "hbox" $ processHBox <$> tok)
, ("lettrine", optional opt >> extractSpaces (spanWith ("",["lettrine"],[])) <$> tok)
- , ("(", mathInline . toksToString <$> manyTill anyTok (controlSeq ")"))
- , ("[", mathDisplay . toksToString <$> manyTill anyTok (controlSeq "]"))
- , ("ensuremath", mathInline . toksToString <$> braced)
+ , ("(", mathInline . untokenize <$> manyTill anyTok (controlSeq ")"))
+ , ("[", mathDisplay . untokenize <$> manyTill anyTok (controlSeq "]"))
+ , ("ensuremath", mathInline . untokenize <$> braced)
, ("texorpdfstring", const <$> tok <*> tok)
, ("P", lit "¶")
, ("S", lit "§")
@@ -1008,16 +1013,15 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
, ("lstinline", dolstinline)
, ("mintinline", domintinline)
, ("Verb", doverb)
- , ("url", ((unescapeURL . T.unpack . untokenize) <$> bracedUrl) >>= \url ->
+ , ("url", ((unescapeURL . untokenize) <$> bracedUrl) >>= \url ->
pure (link url "" (str url)))
- , ("nolinkurl", ((unescapeURL . T.unpack . untokenize) <$> bracedUrl) >>= \url ->
+ , ("nolinkurl", ((unescapeURL . untokenize) <$> bracedUrl) >>= \url ->
pure (code url))
- , ("href", (unescapeURL . toksToString <$>
+ , ("href", (unescapeURL . untokenize <$>
bracedUrl <* optional sp) >>= \url ->
tok >>= \lab -> pure (link url "" lab))
, ("includegraphics", do options <- option [] keyvals
- src <- unescapeURL . T.unpack .
- removeDoubleQuotes . untokenize <$> braced
+ src <- unescapeURL . removeDoubleQuotes . untokenize <$> braced
mkImage options src)
, ("enquote*", enquote True Nothing)
, ("enquote", enquote False Nothing)
@@ -1172,22 +1176,21 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
ifdim :: PandocMonad m => LP m Inlines
ifdim = do
contents <- manyTill anyTok (controlSeq "fi")
- return $ rawInline "latex" $ T.unpack $
- "\\ifdim" <> untokenize contents <> "\\fi"
+ return $ rawInline "latex" $ "\\ifdim" <> untokenize contents <> "\\fi"
makeUppercase :: Inlines -> Inlines
-makeUppercase = fromList . walk (alterStr (map toUpper)) . toList
+makeUppercase = fromList . walk (alterStr T.toUpper) . toList
makeLowercase :: Inlines -> Inlines
-makeLowercase = fromList . walk (alterStr (map toLower)) . toList
+makeLowercase = fromList . walk (alterStr T.toLower) . toList
-alterStr :: (String -> String) -> Inline -> Inline
+alterStr :: (Text -> Text) -> Inline -> Inline
alterStr f (Str xs) = Str (f xs)
alterStr _ x = x
foreignlanguage :: PandocMonad m => LP m Inlines
foreignlanguage = do
- babelLang <- T.unpack . untokenize <$> braced
+ babelLang <- untokenize <$> braced
case babelLangToBCP47 babelLang of
Just lang -> spanWith ("", [], [("lang", renderLang lang)]) <$> tok
_ -> tok
@@ -1196,24 +1199,24 @@ inlineLanguageCommands :: PandocMonad m => M.Map Text (LP m Inlines)
inlineLanguageCommands = M.fromList $ mk <$> M.toList polyglossiaLangToBCP47
where
mk (polyglossia, bcp47Func) =
- ("text" <> T.pack polyglossia, inlineLanguage bcp47Func)
+ ("text" <> polyglossia, inlineLanguage bcp47Func)
-inlineLanguage :: PandocMonad m => (String -> Lang) -> LP m Inlines
+inlineLanguage :: PandocMonad m => (Text -> Lang) -> LP m Inlines
inlineLanguage bcp47Func = do
- o <- option "" $ (T.unpack . T.filter (\c -> c /= '[' && c /= ']'))
+ o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
<$> rawopt
let lang = renderLang $ bcp47Func o
extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok
hyperlink :: PandocMonad m => LP m Inlines
hyperlink = try $ do
- src <- toksToString <$> braced
+ src <- untokenize <$> braced
lab <- tok
- return $ link ('#':src) "" lab
+ return $ link ("#" <> src) "" lab
hypertargetBlock :: PandocMonad m => LP m Blocks
hypertargetBlock = try $ do
- ref <- toksToString <$> braced
+ ref <- untokenize <$> braced
bs <- grouped block
case toList bs of
[Header 1 (ident,_,_) _] | ident == ref -> return bs
@@ -1221,7 +1224,7 @@ hypertargetBlock = try $ do
hypertargetInline :: PandocMonad m => LP m Inlines
hypertargetInline = try $ do
- ref <- toksToString <$> braced
+ ref <- untokenize <$> braced
ils <- grouped inline
return $ spanWith (ref, [], []) ils
@@ -1231,7 +1234,7 @@ romanNumeralUpper =
romanNumeralLower :: (PandocMonad m) => LP m Inlines
romanNumeralLower =
- str . map toLower . toRomanNumeral <$> romanNumeralArg
+ str . T.toLower . toRomanNumeral <$> romanNumeralArg
romanNumeralArg :: (PandocMonad m) => LP m Int
romanNumeralArg = spaces *> (parser <|> inBraces)
@@ -1248,18 +1251,18 @@ romanNumeralArg = spaces *> (parser <|> inBraces)
let (digits, rest) = T.span isDigit s
unless (T.null rest) $
Prelude.fail "Non-digits in argument to \\Rn or \\RN"
- safeRead $ T.unpack digits
+ safeRead digits
newToggle :: (Monoid a, PandocMonad m) => [Tok] -> LP m a
newToggle name = do
updateState $ \st ->
- st{ sToggles = M.insert (toksToString name) False (sToggles st) }
+ st{ sToggles = M.insert (untokenize name) False (sToggles st) }
return mempty
setToggle :: (Monoid a, PandocMonad m) => Bool -> [Tok] -> LP m a
setToggle on name = do
updateState $ \st ->
- st{ sToggles = M.adjust (const on) (toksToString name) (sToggles st) }
+ st{ sToggles = M.adjust (const on) (untokenize name) (sToggles st) }
return mempty
ifToggle :: PandocMonad m => LP m ()
@@ -1271,7 +1274,7 @@ ifToggle = do
no <- braced
toggles <- sToggles <$> getState
inp <- getInput
- let name' = toksToString name
+ let name' = untokenize name
case M.lookup name' toggles of
Just True -> setInput (yes ++ inp)
Just False -> setInput (no ++ inp)
@@ -1294,11 +1297,11 @@ ifstrequal = do
else getInput >>= setInput . (ifnotequal ++)
return mempty
-coloredInline :: PandocMonad m => String -> LP m Inlines
+coloredInline :: PandocMonad m => Text -> LP m Inlines
coloredInline stylename = do
skipopts
color <- braced
- spanWith ("",[],[("style",stylename ++ ": " ++ toksToString color)]) <$> tok
+ spanWith ("",[],[("style",stylename <> ": " <> untokenize color)]) <$> tok
ttfamily :: PandocMonad m => LP m Inlines
ttfamily = (code . stringify . toList) <$> tok
@@ -1313,12 +1316,12 @@ rawInlineOr name' fallback = do
processHBox :: Inlines -> Inlines
processHBox = walk convert
where
- convert Space = Str [chr 160] -- non-breakable space
- convert SoftBreak = Str [chr 160] -- non-breakable space
+ convert Space = Str $ T.singleton $ chr 160 -- non-breakable space
+ convert SoftBreak = Str $ T.singleton $ chr 160 -- non-breakable space
convert LineBreak = Str ""
convert x = x
-getRawCommand :: PandocMonad m => Text -> Text -> LP m String
+getRawCommand :: PandocMonad m => Text -> Text -> LP m Text
getRawCommand name txt = do
(_, rawargs) <- withRaw $
case name of
@@ -1336,7 +1339,7 @@ getRawCommand name txt = do
skipopts
option "" (try dimenarg)
void $ many braced
- return $ T.unpack (txt <> untokenize rawargs)
+ return $ txt <> untokenize rawargs
isFontSizeCommand :: Text -> Bool
isFontSizeCommand "tiny" = True
@@ -1396,17 +1399,17 @@ treatAsInline = Set.fromList
dolabel :: PandocMonad m => LP m Inlines
dolabel = do
v <- braced
- let refstr = toksToString v
+ let refstr = untokenize v
return $ spanWith (refstr,[],[("label", refstr)])
- $ inBrackets $ str $ toksToString v
+ $ inBrackets $ str $ untokenize v
-doref :: PandocMonad m => String -> LP m Inlines
+doref :: PandocMonad m => Text -> LP m Inlines
doref cls = do
v <- braced
- let refstr = toksToString v
+ let refstr = untokenize v
return $ linkWith ("",[],[ ("reference-type", cls)
, ("reference", refstr)])
- ('#':refstr)
+ ("#" <> refstr)
""
(inBrackets $ str refstr)
@@ -1435,11 +1438,11 @@ inline = (mempty <$ comment)
<|> (str "\160" <$ symbol '~')
<|> dollarsMath
<|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb)
- <|> (str . (:[]) <$> primEscape)
+ <|> (str . T.singleton <$> primEscape)
<|> regularSymbol
<|> (do res <- symbolIn "#^'`\"[]&"
pos <- getPosition
- let s = T.unpack (untoken res)
+ let s = untoken res
report $ ParsingUnescaped s pos
return $ str s)
@@ -1498,7 +1501,7 @@ include name = do
-- note, we can have cc_by_4.0 for example...
_ | name == "usepackage" -> addExtension f ".sty"
| otherwise -> addExtension f ".tex"
- dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
+ dirs <- (map T.unpack . splitTextBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
mapM_ (insertIncluded dirs) (map addExt fs)
return mempty
@@ -1509,19 +1512,19 @@ insertIncluded :: PandocMonad m
insertIncluded dirs f = do
pos <- getPosition
containers <- getIncludeFiles <$> getState
- when (f `elem` containers) $
- throwError $ PandocParseError $ "Include file loop at " ++ show pos
- updateState $ addIncludeFile f
+ when (T.pack f `elem` containers) $
+ throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos
+ updateState $ addIncludeFile $ T.pack f
mbcontents <- readFileFromDirs dirs f
contents <- case mbcontents of
Just s -> return s
Nothing -> do
- report $ CouldNotLoadIncludeFile f pos
+ report $ CouldNotLoadIncludeFile (T.pack f) pos
return ""
- getInput >>= setInput . (tokenize f (T.pack contents) ++)
+ getInput >>= setInput . (tokenize f contents ++)
updateState dropLatestIncludeFile
-addMeta :: PandocMonad m => ToMetaValue a => String -> a -> LP m ()
+addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m ()
addMeta field val = updateState $ \st ->
st{ sMeta = addMetaField field val $ sMeta st }
@@ -1536,10 +1539,10 @@ authors = try $ do
egroup
addMeta "author" (map trimInlines auths)
-macroDef :: (PandocMonad m, Monoid a) => (String -> a) -> LP m a
+macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a
macroDef constructor = do
(_, s) <- withRaw (commandDef <|> environmentDef)
- (constructor (T.unpack $ untokenize s) <$
+ (constructor (untokenize s) <$
guardDisabled Ext_latex_macros)
<|> return mempty
where commandDef = do
@@ -1632,7 +1635,7 @@ newcommand = do
case M.lookup name macros of
Just macro
| mtype == "newcommand" -> do
- report $ MacroAlreadyDefined (T.unpack txt) pos
+ report $ MacroAlreadyDefined txt pos
return (name, macro)
| mtype == "providecommand" -> return (name, macro)
_ -> return (name, Macro ExpandWhenUsed argspecs optarg contents)
@@ -1658,7 +1661,7 @@ newenvironment = do
case M.lookup name macros of
Just _
| mtype == "newenvironment" -> do
- report $ MacroAlreadyDefined (T.unpack name) pos
+ report $ MacroAlreadyDefined name pos
return Nothing
| mtype == "provideenvironment" -> do
return Nothing
@@ -1669,7 +1672,7 @@ newenvironment = do
bracketedNum :: PandocMonad m => LP m Int
bracketedNum = do
ds <- untokenize <$> bracketedToks
- case safeRead (T.unpack ds) of
+ case safeRead ds of
Just i -> return i
_ -> return 0
@@ -1709,7 +1712,7 @@ section (ident, classes, kvs) lvl = do
contents <- grouped inline
lab <- option ident $
try (spaces >> controlSeq "label"
- >> spaces >> toksToString <$> braced)
+ >> spaces >> untokenize <$> braced)
when (lvl == 0) $
updateState $ \st -> st{ sHasChapters = True }
unless ("unnumbered" `elem` classes) $ do
@@ -1836,9 +1839,9 @@ blockCommands = M.fromList
, ("centerline", (para . trimInlines) <$> (skipopts *> tok))
, ("caption", skipopts *> setCaption)
, ("bibliography", mempty <$ (skipopts *> braced >>=
- addMeta "bibliography" . splitBibs . toksToString))
+ addMeta "bibliography" . splitBibs . untokenize))
, ("addbibresource", mempty <$ (skipopts *> braced >>=
- addMeta "bibliography" . splitBibs . toksToString))
+ addMeta "bibliography" . splitBibs . untokenize))
, ("endinput", mempty <$ skipMany tok)
-- includes
, ("lstinputlisting", inputListing)
@@ -1941,18 +1944,18 @@ rawEnv name = do
(bs, raw) <- withRaw $ env name blocks
if parseRaw
then return $ rawBlock "latex"
- $ T.unpack $ beginCommand <> untokenize raw
+ $ beginCommand <> untokenize raw
else do
- report $ SkippedContent (T.unpack beginCommand) pos1
+ report $ SkippedContent beginCommand pos1
pos2 <- getPosition
- report $ SkippedContent ("\\end{" ++ T.unpack name ++ "}") pos2
+ report $ SkippedContent ("\\end{" <> name <> "}") pos2
return bs
rawVerbEnv :: PandocMonad m => Text -> LP m Blocks
rawVerbEnv name = do
pos <- getPosition
(_, raw) <- withRaw $ verbEnv name
- let raw' = "\\begin{" ++ T.unpack name ++ "}" ++ toksToString raw
+ let raw' = "\\begin{" <> name <> "}" <> untokenize raw
exts <- getOption readerExtensions
let parseRaw = extensionEnabled Ext_raw_tex exts
if parseRaw
@@ -1961,12 +1964,11 @@ rawVerbEnv name = do
report $ SkippedContent raw' pos
return mempty
-verbEnv :: PandocMonad m => Text -> LP m String
+verbEnv :: PandocMonad m => Text -> LP m Text
verbEnv name = withVerbatimMode $ do
optional blankline
res <- manyTill anyTok (end_ name)
- return $ T.unpack
- $ stripTrailingNewline
+ return $ stripTrailingNewline
$ untokenize
$ res
@@ -2010,11 +2012,11 @@ minted = do
mintedAttr :: PandocMonad m => LP m Attr
mintedAttr = do
options <- option [] keyvals
- lang <- toksToString <$> braced
+ lang <- untokenize <$> braced
let kvs = [ (if k == "firstnumber"
then "startFrom"
else k, v) | (k,v) <- options ]
- let classes = [ lang | not (null lang) ] ++
+ let classes = [ lang | not (T.null lang) ] ++
[ "numberLines" |
lookup "linenos" options == Just "true" ]
return ("",classes,kvs)
@@ -2023,14 +2025,14 @@ inputMinted :: PandocMonad m => LP m Blocks
inputMinted = do
pos <- getPosition
attr <- mintedAttr
- f <- filter (/='"') . toksToString <$> braced
- dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
- mbCode <- readFileFromDirs dirs f
+ f <- T.filter (/='"') . untokenize <$> braced
+ dirs <- (map T.unpack . splitTextBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
+ mbCode <- readFileFromDirs dirs (T.unpack f)
rawcode <- case mbCode of
Just s -> return s
Nothing -> do
report $ CouldNotLoadIncludeFile f pos
- return []
+ return ""
return $ B.codeBlockWith attr rawcode
letterContents :: PandocMonad m => LP m Blocks
@@ -2052,10 +2054,10 @@ figure = try $ do
addImageCaption :: PandocMonad m => Blocks -> LP m Blocks
addImageCaption = walkM go
where go (Image attr@(_, cls, kvs) alt (src,tit))
- | not ("fig:" `isPrefixOf` tit) = do
+ | not ("fig:" `T.isPrefixOf` tit) = do
(mbcapt, mblab) <- sCaption <$> getState
let (alt', tit') = case mbcapt of
- Just ils -> (toList ils, "fig:" ++ tit)
+ Just ils -> (toList ils, "fig:" <> tit)
Nothing -> (alt, tit)
attr' = case mblab of
Just lab -> (lab, cls, kvs)
@@ -2090,23 +2092,23 @@ addImageCaption = walkM go
return $ Image attr' alt' (src, tit')
go x = return x
-coloredBlock :: PandocMonad m => String -> LP m Blocks
+coloredBlock :: PandocMonad m => Text -> LP m Blocks
coloredBlock stylename = try $ do
skipopts
color <- braced
notFollowedBy (grouped inline)
- let constructor = divWith ("",[],[("style",stylename ++ ": " ++ toksToString color)])
+ let constructor = divWith ("",[],[("style",stylename <> ": " <> untokenize color)])
constructor <$> grouped block
graphicsPath :: PandocMonad m => LP m Blocks
graphicsPath = do
- ps <- map toksToString <$>
+ ps <- map (T.unpack . untokenize) <$>
(bgroup *> spaces *> manyTill (braced <* spaces) egroup)
- getResourcePath >>= setResourcePath . (++ ps)
+ getResourcePath >>= setResourcePath . (<> ps)
return mempty
-splitBibs :: String -> [Inlines]
-splitBibs = map (str . flip replaceExtension "bib" . trim) . splitBy (==',')
+splitBibs :: Text -> [Inlines]
+splitBibs = map (str . T.pack . flip replaceExtension "bib" . T.unpack . trim) . splitTextBy (==',')
alltt :: Blocks -> Blocks
alltt = walk strToCode
@@ -2115,7 +2117,7 @@ alltt = walk strToCode
strToCode SoftBreak = LineBreak
strToCode x = x
-parseListingsOptions :: [(String, String)] -> Attr
+parseListingsOptions :: [(Text, Text)] -> Attr
parseListingsOptions options =
let kvs = [ (if k == "firstnumber"
then "startFrom"
@@ -2129,23 +2131,23 @@ inputListing :: PandocMonad m => LP m Blocks
inputListing = do
pos <- getPosition
options <- option [] keyvals
- f <- filter (/='"') . toksToString <$> braced
- dirs <- (splitBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
- mbCode <- readFileFromDirs dirs f
+ f <- T.filter (/='"') . untokenize <$> braced
+ dirs <- (map T.unpack . splitTextBy (==':') . fromMaybe ".") <$> lookupEnv "TEXINPUTS"
+ mbCode <- readFileFromDirs dirs (T.unpack f)
codeLines <- case mbCode of
- Just s -> return $ lines s
+ Just s -> return $ T.lines s
Nothing -> do
report $ CouldNotLoadIncludeFile f pos
return []
let (ident,classes,kvs) = parseListingsOptions options
let classes' =
(case listingsLanguage options of
- Nothing -> (take 1 (languagesByExtension (takeExtension f)) ++)
+ Nothing -> (take 1 (languagesByExtension (T.pack $ takeExtension $ T.unpack f)) <>)
Just _ -> id) classes
let firstline = fromMaybe 1 $ lookup "firstline" options >>= safeRead
let lastline = fromMaybe (length codeLines) $
lookup "lastline" options >>= safeRead
- let codeContents = intercalate "\n" $ take (1 + lastline - firstline) $
+ let codeContents = T.intercalate "\n" $ take (1 + lastline - firstline) $
drop (firstline - 1) codeLines
return $ codeBlockWith (ident,classes',kvs) codeContents
@@ -2176,12 +2178,12 @@ orderedList' = try $ do
spaces
let markerSpec = do
symbol '['
- ts <- toksToString <$> manyTill anyTok (symbol ']')
+ ts <- untokenize <$> manyTill anyTok (symbol ']')
case runParser anyOrderedListMarker def "option" ts of
Right r -> return r
Left _ -> do
pos <- getPosition
- report $ SkippedContent ("[" ++ ts ++ "]") pos
+ report $ SkippedContent ("[" <> ts <> "]") pos
return (1, DefaultStyle, DefaultDelim)
(_, style, delim) <- option (1, DefaultStyle, DefaultDelim) markerSpec
spaces
@@ -2191,17 +2193,17 @@ orderedList' = try $ do
spaces
start <- option 1 $ try $ do pos <- getPosition
controlSeq "setcounter"
- ctr <- toksToString <$> braced
- guard $ "enum" `isPrefixOf` ctr
- guard $ all (`elem` ['i','v']) (drop 4 ctr)
+ ctr <- untokenize <$> braced
+ guard $ "enum" `T.isPrefixOf` ctr
+ guard $ T.all (`elem` ['i','v']) (T.drop 4 ctr)
optional sp
- num <- toksToString <$> braced
+ num <- untokenize <$> braced
case safeRead num of
Just i -> return (i + 1 :: Int)
Nothing -> do
report $ SkippedContent
- ("\\setcounter{" ++ ctr ++
- "}{" ++ num ++ "}") pos
+ ("\\setcounter{" <> ctr <>
+ "}{" <> num <> "}") pos
return 1
bs <- listenv "enumerate" (many item)
return $ orderedListWith (start, style, delim) bs
@@ -2235,7 +2237,7 @@ splitWordTok = do
inp <- getInput
case inp of
(Tok spos Word t : rest) ->
- setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) ++ rest
+ setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest
_ -> return ()
parseAligns :: PandocMonad m => LP m [(Alignment, Double, ([Tok], [Tok]))]
@@ -2256,7 +2258,7 @@ parseAligns = try $ do
let alignSuffix = symbol '<' >> braced
let colWidth = try $ do
symbol '{'
- ds <- trim . toksToString <$> manyTill anyTok (controlSeq "linewidth")
+ ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth")
spaces
symbol '}'
case safeRead ds of
@@ -2266,7 +2268,7 @@ parseAligns = try $ do
pref <- option [] alignPrefix
spaces
al <- alignChar
- width <- colWidth <|> option 0.0 (do s <- toksToString <$> braced
+ width <- colWidth <|> option 0.0 (do s <- untokenize <$> braced
pos <- getPosition
report $ SkippedContent s pos
return 0.0)
@@ -2276,13 +2278,13 @@ parseAligns = try $ do
let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro
symbol '*'
spaces
- ds <- trim . toksToString <$> braced
+ ds <- trim . untokenize <$> braced
spaces
spec <- braced
case safeRead ds of
Just n ->
getInput >>= setInput . (mconcat (replicate n spec) ++)
- Nothing -> Prelude.fail $ "Could not parse " ++ ds ++ " as number"
+ Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number"
bgroup
spaces
maybeBar
@@ -2379,7 +2381,7 @@ block = do
<|> blockCommand
<|> paragraph
<|> grouped block
- trace (take 60 $ show $ B.toList res)
+ trace (T.take 60 $ tshow $ B.toList res)
return res
blocks :: PandocMonad m => LP m Blocks
@@ -2387,9 +2389,9 @@ blocks = mconcat <$> many block
setDefaultLanguage :: PandocMonad m => LP m Blocks
setDefaultLanguage = do
- o <- option "" $ (T.unpack . T.filter (\c -> c /= '[' && c /= ']'))
+ o <- option "" $ T.filter (\c -> c /= '[' && c /= ']')
<$> rawopt
- polylang <- toksToString <$> braced
+ polylang <- untokenize <$> braced
case M.lookup polylang polyglossiaLangToBCP47 of
Nothing -> return mempty -- TODO mzero? warning?
Just langFunc -> do
diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
index b21398f93..7ec432a4a 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.LaTeX.Lang
Copyright : Copyright (C) 2018-2019 John MacFarlane
@@ -18,11 +19,12 @@ module Text.Pandoc.Readers.LaTeX.Lang
where
import Prelude
import qualified Data.Map as M
+import qualified Data.Text as T
import Text.Pandoc.BCP47 (Lang(..))
-polyglossiaLangToBCP47 :: M.Map String (String -> Lang)
+polyglossiaLangToBCP47 :: M.Map T.Text (T.Text -> Lang)
polyglossiaLangToBCP47 = M.fromList
- [ ("arabic", \o -> case filter (/=' ') o of
+ [ ("arabic", \o -> case T.filter (/=' ') o of
"locale=algeria" -> Lang "ar" "" "DZ" []
"locale=mashriq" -> Lang "ar" "" "SY" []
"locale=libya" -> Lang "ar" "" "LY" []
@@ -30,7 +32,7 @@ polyglossiaLangToBCP47 = M.fromList
"locale=mauritania" -> Lang "ar" "" "MR" []
"locale=tunisia" -> Lang "ar" "" "TN" []
_ -> Lang "ar" "" "" [])
- , ("german", \o -> case filter (/=' ') o of
+ , ("german", \o -> case T.filter (/=' ') o of
"spelling=old" -> Lang "de" "" "DE" ["1901"]
"variant=austrian,spelling=old"
-> Lang "de" "" "AT" ["1901"]
@@ -40,11 +42,11 @@ polyglossiaLangToBCP47 = M.fromList
"variant=swiss" -> Lang "de" "" "CH" []
_ -> Lang "de" "" "" [])
, ("lsorbian", \_ -> Lang "dsb" "" "" [])
- , ("greek", \o -> case filter (/=' ') o of
+ , ("greek", \o -> case T.filter (/=' ') o of
"variant=poly" -> Lang "el" "" "polyton" []
"variant=ancient" -> Lang "grc" "" "" []
_ -> Lang "el" "" "" [])
- , ("english", \o -> case filter (/=' ') o of
+ , ("english", \o -> case T.filter (/=' ') o of
"variant=australian" -> Lang "en" "" "AU" []
"variant=canadian" -> Lang "en" "" "CA" []
"variant=british" -> Lang "en" "" "GB" []
@@ -52,7 +54,7 @@ polyglossiaLangToBCP47 = M.fromList
"variant=american" -> Lang "en" "" "US" []
_ -> Lang "en" "" "" [])
, ("usorbian", \_ -> Lang "hsb" "" "" [])
- , ("latin", \o -> case filter (/=' ') o of
+ , ("latin", \o -> case T.filter (/=' ') o of
"variant=classic" -> Lang "la" "" "" ["x-classic"]
_ -> Lang "la" "" "" [])
, ("slovenian", \_ -> Lang "sl" "" "" [])
@@ -133,7 +135,7 @@ polyglossiaLangToBCP47 = M.fromList
, ("vietnamese", \_ -> Lang "vi" "" "" [])
]
-babelLangToBCP47 :: String -> Maybe Lang
+babelLangToBCP47 :: T.Text -> Maybe Lang
babelLangToBCP47 s =
case s of
"austrian" -> Just $ Lang "de" "" "AT" ["1901"]
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 14cb408b0..a01abda46 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -97,8 +97,8 @@ import Text.Parsec.Pos
newtype DottedNum = DottedNum [Int]
deriving (Show)
-renderDottedNum :: DottedNum -> String
-renderDottedNum (DottedNum xs) =
+renderDottedNum :: DottedNum -> T.Text
+renderDottedNum (DottedNum xs) = T.pack $
intercalate "." (map show xs)
incrementDottedNum :: Int -> DottedNum -> DottedNum
@@ -111,18 +111,18 @@ data LaTeXState = LaTeXState{ sOptions :: ReaderOptions
, sMeta :: Meta
, sQuoteContext :: QuoteContext
, sMacros :: M.Map Text Macro
- , sContainers :: [String]
+ , sContainers :: [Text]
, sLogMessages :: [LogMessage]
- , sIdentifiers :: Set.Set String
+ , sIdentifiers :: Set.Set Text
, sVerbatimMode :: Bool
- , sCaption :: (Maybe Inlines, Maybe String)
+ , sCaption :: (Maybe Inlines, Maybe Text)
, sInListItem :: Bool
, sInTableCell :: Bool
, sLastHeaderNum :: DottedNum
, sLastFigureNum :: DottedNum
- , sLabels :: M.Map String [Inline]
+ , sLabels :: M.Map Text [Inline]
, sHasChapters :: Bool
- , sToggles :: M.Map String Bool
+ , sToggles :: M.Map Text Bool
, sExpanded :: Bool
}
deriving Show
@@ -202,7 +202,7 @@ withVerbatimMode parser = do
rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s)
=> [Tok] -> Bool -> LP m a -> LP m a
- -> ParserT String s m (a, String)
+ -> ParserT Text s m (a, Text)
rawLaTeXParser toks retokenize parser valParser = do
pstate <- getState
let lstate = def{ sOptions = extractReaderOptions pstate }
@@ -233,16 +233,16 @@ rawLaTeXParser toks retokenize parser valParser = do
, not (" " `T.isSuffixOf` result)
-> result <> " "
_ -> result
- return (val, T.unpack result')
+ return (val, result')
applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s)
- => String -> ParserT String s m String
+ => Text -> ParserT Text s m Text
applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|>
- do let retokenize = toksToString <$> many (satisfyTok (const True))
+ do let retokenize = untokenize <$> many (satisfyTok (const True))
pstate <- getState
let lstate = def{ sOptions = extractReaderOptions pstate
, sMacros = extractMacros pstate }
- res <- runParserT retokenize lstate "math" (tokenize "math" (T.pack s))
+ res <- runParserT retokenize lstate "math" (tokenize "math" s)
case res of
Left e -> Prelude.fail (show e)
Right s' -> return s'
@@ -307,7 +307,7 @@ totoks pos t =
: totoks (incSourceColumn pos 2) rest'
| c == '#' ->
let (t1, t2) = T.span (\d -> d >= '0' && d <= '9') rest
- in case safeRead (T.unpack t1) of
+ in case safeRead t1 of
Just i ->
Tok pos (Arg i) ("#" <> t1)
: totoks (incSourceColumn pos (1 + T.length t1)) t2
@@ -447,7 +447,7 @@ doMacros' n inp = do
handleMacros n' spos name ts = do
when (n' > 20) -- detect macro expansion loops
- $ throwError $ PandocMacroLoop (T.unpack name)
+ $ throwError $ PandocMacroLoop name
macros <- sMacros <$> getState
case M.lookup name macros of
Nothing -> mzero
@@ -588,7 +588,7 @@ primEscape = do
| c >= '\64' && c <= '\127' -> return (chr (ord c - 64))
| otherwise -> return (chr (ord c + 64))
Nothing -> Prelude.fail "Empty content of Esc1"
- Esc2 -> case safeRead ('0':'x':T.unpack (T.drop 2 t)) of
+ Esc2 -> case safeRead ("0x" <> T.drop 2 t) of
Just x -> return (chr x)
Nothing -> Prelude.fail $ "Could not read: " ++ T.unpack t
_ -> Prelude.fail "Expected an Esc1 or Esc2 token" -- should not happen
@@ -677,7 +677,7 @@ dimenarg = try $ do
guard $ rest `elem` ["", "pt","pc","in","bp","cm","mm","dd","cc","sp"]
return $ T.pack ['=' | ch] <> minus <> s
-ignore :: (Monoid a, PandocMonad m) => String -> ParserT s u m a
+ignore :: (Monoid a, PandocMonad m) => Text -> ParserT s u m a
ignore raw = do
pos <- getPosition
report $ SkippedContent raw pos
diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs
index ddf469222..feacb8450 100644
--- a/src/Text/Pandoc/Readers/Man.hs
+++ b/src/Text/Pandoc/Readers/Man.hs
@@ -1,4 +1,6 @@
-{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Man
Copyright : Copyright (C) 2018-2019 Yan Pashkovsky and John MacFarlane
@@ -63,7 +65,7 @@ readWithMTokens :: PandocMonad m
-> [RoffToken] -- ^ input
-> m (Either PandocError a)
readWithMTokens parser state input =
- let leftF = PandocParsecError . intercalate "\n" $ show <$> input
+ let leftF = PandocParsecError . T.pack . intercalate "\n" $ show <$> input
in mapLeft leftF `liftM` runParserT parser state "source" input
parseMan :: PandocMonad m => ManParser m Pandoc
@@ -141,7 +143,7 @@ parseTable = do
isHrule ([cellfmt], _) = columnType cellfmt `elem` ['_','-','=']
isHrule (_, [RoffTokens ss]) =
case Foldable.toList ss of
- [TextLine [RoffStr [c]]] -> c `elem` ['_','-','=']
+ [TextLine [RoffStr (T.unpack -> [c])]] -> c `elem` ['_','-','=']
_ -> False
isHrule _ = False
@@ -191,7 +193,7 @@ memptyLine = msatisfy isEmptyLine where
isEmptyLine EmptyLine = True
isEmptyLine _ = False
-mmacro :: PandocMonad m => String -> ManParser m RoffToken
+mmacro :: PandocMonad m => T.Text -> ManParser m RoffToken
mmacro mk = msatisfy isControlLine where
isControlLine (ControlLine mk' _ _) | mk == mk' = True
| otherwise = False
@@ -284,7 +286,7 @@ parseInline = try $ do
_ -> mzero
handleInlineMacro :: PandocMonad m
- => String -> [Arg] -> SourcePos -> ManParser m Inlines
+ => T.Text -> [Arg] -> SourcePos -> ManParser m Inlines
handleInlineMacro mname args _pos = do
case mname of
"UR" -> parseLink args
@@ -339,7 +341,7 @@ bareIP = msatisfy isBareIP where
isBareIP (ControlLine "IP" [] _) = True
isBareIP _ = False
-endmacro :: PandocMonad m => String -> ManParser m ()
+endmacro :: PandocMonad m => T.Text -> ManParser m ()
endmacro name = void (mmacro name)
<|> lookAhead (void newBlockMacro)
<|> lookAhead eof
@@ -356,7 +358,7 @@ parseCodeBlock = try $ do
toks <- (mmacro "nf" *> manyTill codeline (endmacro "fi"))
<|> (mmacro "EX" *> manyTill codeline (endmacro "EE"))
optional (mmacro "in")
- return $ codeBlock (intercalate "\n" $ catMaybes toks)
+ return $ codeBlock (T.intercalate "\n" $ catMaybes toks)
where
@@ -366,7 +368,7 @@ parseCodeBlock = try $ do
ControlLine "PP" _ _ -> return $ Just "" -- .PP sometimes used for blank line
ControlLine mname args pos -> do
(Just . query getText <$> handleInlineMacro mname args pos) <|>
- do report $ SkippedContent ('.':mname) pos
+ do report $ SkippedContent ("." <> mname) pos
return Nothing
Tbl _ _ pos -> do
report $ SkippedContent "TABLE" pos
@@ -375,12 +377,12 @@ parseCodeBlock = try $ do
TextLine ss
| not (null ss)
, all isFontToken ss -> return Nothing
- | otherwise -> return $ Just $ linePartsToString ss
+ | otherwise -> return $ Just $ linePartsToText ss
isFontToken Font{} = True
isFontToken _ = False
- getText :: Inline -> String
+ getText :: Inline -> T.Text
getText (Str s) = s
getText Space = " "
getText (Code _ s) = s
@@ -416,8 +418,8 @@ listItem mbListType = try $ do
(ControlLine _ args _) <- mmacro "IP"
case args of
(arg1 : _) -> do
- let cs = linePartsToString arg1
- let cs' = if not ('.' `elem` cs || ')' `elem` cs) then cs ++ "." else cs
+ let cs = linePartsToText arg1
+ let cs' = if not (T.any (== '.') cs || T.any (== ')') cs) then cs <> "." else cs
let lt = case Parsec.runParser anyOrderedListMarker defaultParserState
"list marker" cs' of
Right (start, listtype, listdelim)
@@ -467,7 +469,7 @@ parseLink args = do
ControlLine _ endargs _ <- mmacro "UE"
let url = case args of
[] -> ""
- (x:_) -> linePartsToString x
+ (x:_) -> linePartsToText x
return $ link url "" contents <>
case endargs of
[] -> mempty
@@ -479,7 +481,7 @@ parseEmailLink args = do
ControlLine _ endargs _ <- mmacro "ME"
let url = case args of
[] -> ""
- (x:_) -> "mailto:" ++ linePartsToString x
+ (x:_) -> "mailto:" <> linePartsToText x
return $ link url "" contents <>
case endargs of
[] -> mempty
@@ -490,6 +492,6 @@ skipUnknownMacro = do
tok <- mmacroAny
case tok of
ControlLine mkind _ pos -> do
- report $ SkippedContent ('.':mkind) pos
+ report $ SkippedContent ("." <> mkind) pos
return mempty
_ -> Prelude.fail "the impossible happened"
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 4807baada..f8349ea99 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1,7 +1,9 @@
-{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE RelaxedPolyRec #-}
{-# LANGUAGE ScopedTypeVariables #-}
-{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.Markdown
Copyright : Copyright (C) 2006-2019 John MacFarlane
@@ -19,14 +21,15 @@ import Prelude
import Control.Monad
import Control.Monad.Except (throwError)
import qualified Data.ByteString.Lazy as BS
-import Data.Char (isAlphaNum, isPunctuation, isSpace, toLower)
-import Data.List (intercalate, sortBy, transpose, elemIndex)
+import Data.Char (isAlphaNum, isPunctuation, isSpace)
+import Data.List (sortBy, transpose, elemIndex)
import qualified Data.Map as M
import Data.Maybe
import Data.Ord (comparing)
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
+import qualified Data.Text.Lazy as TL
import qualified Data.YAML as YAML
import qualified Data.YAML.Event as YE
import System.FilePath (addExtension, takeExtension)
@@ -47,7 +50,7 @@ import Text.Pandoc.Shared
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.XML (fromEntities)
-type MarkdownParser m = ParserT [Char] ParserState m
+type MarkdownParser m = ParserT Text ParserState m
-- | Read markdown from an input string and return a Pandoc document.
readMarkdown :: PandocMonad m
@@ -56,7 +59,7 @@ readMarkdown :: PandocMonad m
-> m Pandoc
readMarkdown opts s = do
parsed <- readWithM parseMarkdown def{ stateOptions = opts }
- (T.unpack (crFilter s) ++ "\n\n")
+ (crFilter s <> "\n\n")
case parsed of
Right result -> return result
Left e -> throwError e
@@ -77,7 +80,7 @@ isHruleChar '-' = True
isHruleChar '_' = True
isHruleChar _ = False
-setextHChars :: String
+setextHChars :: [Char]
setextHChars = "=-"
isBlank :: Char -> Bool
@@ -96,30 +99,30 @@ inList = do
ctx <- stateParserContext <$> getState
guard (ctx == ListItemState)
-spnl :: PandocMonad m => ParserT [Char] st m ()
+spnl :: PandocMonad m => ParserT Text st m ()
spnl = try $ do
skipSpaces
optional newline
skipSpaces
notFollowedBy (char '\n')
-spnl' :: PandocMonad m => ParserT [Char] st m String
+spnl' :: PandocMonad m => ParserT Text st m Text
spnl' = try $ do
xs <- many spaceChar
ys <- option "" $ try $ (:) <$> newline
<*> (many spaceChar <* notFollowedBy (char '\n'))
- return (xs ++ ys)
+ return $ T.pack $ xs ++ ys
-indentSpaces :: PandocMonad m => MarkdownParser m String
+indentSpaces :: PandocMonad m => MarkdownParser m Text
indentSpaces = try $ do
tabStop <- getOption readerTabStop
- count tabStop (char ' ') <|>
- string "\t" <?> "indentation"
+ countChar tabStop (char ' ') <|>
+ textStr "\t" <?> "indentation"
-nonindentSpaces :: PandocMonad m => MarkdownParser m String
+nonindentSpaces :: PandocMonad m => MarkdownParser m Text
nonindentSpaces = do
n <- skipNonindentSpaces
- return $ replicate n ' '
+ return $ T.replicate n " "
-- returns number of spaces parsed
skipNonindentSpaces :: PandocMonad m => MarkdownParser m Int
@@ -139,8 +142,9 @@ inlinesInBalancedBrackets :: PandocMonad m => MarkdownParser m (F Inlines)
inlinesInBalancedBrackets =
try $ char '[' >> withRaw (go 1) >>=
parseFromString inlines . stripBracket . snd
- where stripBracket [] = []
- stripBracket xs = if last xs == ']' then init xs else xs
+ where stripBracket t = case T.unsnoc t of
+ Just (t', ']') -> t'
+ _ -> t
go :: PandocMonad m => Int -> MarkdownParser m ()
go 0 = return ()
go openBrackets =
@@ -160,7 +164,7 @@ inlinesInBalancedBrackets =
-- document structure
--
-rawTitleBlockLine :: PandocMonad m => MarkdownParser m String
+rawTitleBlockLine :: PandocMonad m => MarkdownParser m Text
rawTitleBlockLine = do
char '%'
skipSpaces
@@ -169,7 +173,7 @@ rawTitleBlockLine = do
notFollowedBy blankline
skipSpaces
anyLine
- return $ trim $ unlines (first:rest)
+ return $ trim $ T.unlines (first:rest)
titleLine :: PandocMonad m => MarkdownParser m (F Inlines)
titleLine = try $ do
@@ -222,9 +226,9 @@ yamlMetaBlock = try $ do
notFollowedBy blankline -- if --- is followed by a blank it's an HRULE
rawYamlLines <- manyTill anyLine stopLine
-- by including --- and ..., we allow yaml blocks with just comments:
- let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."]))
+ let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
optional blanklines
- newMetaF <- yamlBsToMeta $ UTF8.fromStringLazy rawYaml
+ newMetaF <- yamlBsToMeta $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
-- Since `<>` is left-biased, existing values are not touched:
updateState $ \st -> st{ stateMeta' = (stateMeta' st) <> newMetaF }
return mempty
@@ -255,7 +259,7 @@ yamlBsToMeta bstr = do
return . return $ mempty
Left (_pos, err') -> do
logMessage $ CouldNotParseYamlMetadata
- err' pos
+ (T.pack err') pos
return . return $ mempty
nodeToKey :: PandocMonad m => YAML.Node YE.Pos -> m Text
@@ -270,11 +274,11 @@ toMetaValue x =
-- Note: a standard quoted or unquoted YAML value will
-- not end in a newline, but a "block" set off with
-- `|` or `>` will.
- if (T.pack "\n") `T.isSuffixOf` x
- then parseFromString' (asBlocks <$> parseBlocks) (xstring <> "\n")
+ if "\n" `T.isSuffixOf` x
+ then parseFromString' (asBlocks <$> parseBlocks) (x <> "\n")
else parseFromString'
((asInlines <$> try pInlines) <|> (asBlocks <$> parseBlocks))
- xstring
+ x
where pInlines = trimInlinesF . mconcat <$> manyTill inline eof
asBlocks p = do
p' <- p
@@ -282,7 +286,6 @@ toMetaValue x =
asInlines p = do
p' <- p
return $ MetaInlines (B.toList p')
- xstring = T.unpack x
checkBoolean :: Text -> Maybe Bool
checkBoolean t =
@@ -298,8 +301,8 @@ yamlToMetaValue (YAML.Scalar _ x) =
case x of
YAML.SStr t -> toMetaValue t
YAML.SBool b -> return $ return $ MetaBool b
- YAML.SFloat d -> return $ return $ MetaString (show d)
- YAML.SInt i -> return $ return $ MetaString (show i)
+ YAML.SFloat d -> return $ return $ MetaString $ tshow d
+ YAML.SInt i -> return $ return $ MetaString $ tshow i
YAML.SUnknown _ t ->
case checkBoolean t of
Just b -> return $ return $ MetaBool b
@@ -315,7 +318,7 @@ yamlToMetaValue _ = return $ return $ MetaString ""
yamlMap :: PandocMonad m
=> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos)
- -> MarkdownParser m (F (M.Map String MetaValue))
+ -> MarkdownParser m (F (M.Map Text MetaValue))
yamlMap o = do
kvs <- forM (M.toList o) $ \(key, v) -> do
k <- nodeToKey key
@@ -323,12 +326,12 @@ yamlMap o = do
let kvs' = filter (not . ignorable . fst) kvs
(fmap M.fromList . sequence) <$> mapM toMeta kvs'
where
- ignorable t = (T.pack "_") `T.isSuffixOf` t
+ ignorable t = "_" `T.isSuffixOf` t
toMeta (k, v) = do
fv <- yamlToMetaValue v
return $ do
v' <- fv
- return (T.unpack k, v')
+ return (k, v')
stopLine :: PandocMonad m => MarkdownParser m ()
stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
@@ -343,14 +346,14 @@ mmdTitleBlock = try $ do
updateState $ \st -> st{ stateMeta' = stateMeta' st <>
return (Meta $ M.fromList kvPairs) }
-kvPair :: PandocMonad m => Bool -> MarkdownParser m (String, MetaValue)
+kvPair :: PandocMonad m => Bool -> MarkdownParser m (Text, MetaValue)
kvPair allowEmpty = try $ do
- key <- many1Till (alphaNum <|> oneOf "_- ") (char ':')
- val <- trim <$> manyTill anyChar
+ key <- many1TillChar (alphaNum <|> oneOf "_- ") (char ':')
+ val <- trim <$> manyTillChar anyChar
(try $ newline >> lookAhead (blankline <|> nonspaceChar))
- guard $ allowEmpty || not (null val)
- let key' = concat $ words $ map toLower key
- let val' = MetaBlocks $ B.toList $ B.plain $B.text val
+ guard $ allowEmpty || not (T.null val)
+ let key' = T.concat $ T.words $ T.toLower key
+ let val' = MetaBlocks $ B.toList $ B.plain $ B.text val
return (key',val')
parseMarkdown :: PandocMonad m => MarkdownParser m Pandoc
@@ -380,13 +383,13 @@ referenceKey = try $ do
(_,raw) <- reference
char ':'
skipSpaces >> optional newline >> skipSpaces >> notFollowedBy (char '[')
- let sourceURL = fmap unwords $ many $ try $ do
+ let sourceURL = fmap T.unwords $ many $ try $ do
skipMany spaceChar
notFollowedBy' referenceTitle
notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes
notFollowedBy' (() <$ reference)
- many1 $ notFollowedBy space >> litChar
- let betweenAngles = try $ char '<' >> manyTill litChar (char '>')
+ many1Char $ notFollowedBy space >> litChar
+ let betweenAngles = try $ char '<' >> manyTillChar litChar (char '>')
src <- try betweenAngles <|> sourceURL
tit <- option "" referenceTitle
attr <- option nullAttr $ try $
@@ -411,20 +414,20 @@ referenceKey = try $ do
updateState $ \s -> s { stateKeys = M.insert key (target, attr') oldkeys }
return $ return mempty
-referenceTitle :: PandocMonad m => MarkdownParser m String
+referenceTitle :: PandocMonad m => MarkdownParser m Text
referenceTitle = try $ do
skipSpaces >> optional newline >> skipSpaces
quotedTitle '"' <|> quotedTitle '\'' <|> charsInBalanced '(' ')' litChar
-- A link title in quotes
-quotedTitle :: PandocMonad m => Char -> MarkdownParser m String
+quotedTitle :: PandocMonad m => Char -> MarkdownParser m Text
quotedTitle c = try $ do
char c
notFollowedBy spaces
let pEnder = try $ char c >> notFollowedBy (satisfy isAlphaNum)
- let regChunk = many1 (noneOf ['\\','\n','&',c]) <|> count 1 litChar
- let nestedChunk = (\x -> [c] ++ x ++ [c]) <$> quotedTitle c
- unwords . words . concat <$> manyTill (nestedChunk <|> regChunk) pEnder
+ let regChunk = many1Char (noneOf ['\\','\n','&',c]) <|> countChar 1 litChar
+ let nestedChunk = (\x -> T.singleton c <> x <> T.singleton c) <$> quotedTitle c
+ T.unwords . T.words . T.concat <$> manyTill (nestedChunk <|> regChunk) pEnder
-- | PHP Markdown Extra style abbreviation key. Currently
-- we just skip them, since Pandoc doesn't have an element for
@@ -440,21 +443,21 @@ abbrevKey = do
blanklines
return $ return mempty
-noteMarker :: PandocMonad m => MarkdownParser m String
-noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']')
+noteMarker :: PandocMonad m => MarkdownParser m Text
+noteMarker = string "[^" >> many1TillChar (satisfy $ not . isBlank) (char ']')
-rawLine :: PandocMonad m => MarkdownParser m String
+rawLine :: PandocMonad m => MarkdownParser m Text
rawLine = try $ do
notFollowedBy blankline
notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker
optional indentSpaces
anyLine
-rawLines :: PandocMonad m => MarkdownParser m String
+rawLines :: PandocMonad m => MarkdownParser m Text
rawLines = do
first <- anyLine
rest <- many rawLine
- return $ unlines (first:rest)
+ return $ T.unlines (first:rest)
noteBlock :: PandocMonad m => MarkdownParser m (F Blocks)
noteBlock = try $ do
@@ -466,7 +469,7 @@ noteBlock = try $ do
optional indentSpaces
first <- rawLines
rest <- many $ try $ blanklines >> indentSpaces >> rawLines
- let raw = unlines (first:rest) ++ "\n"
+ let raw = T.unlines (first:rest) <> "\n"
optional blanklines
parsed <- parseFromString' parseBlocks raw
oldnotes <- stateNotes' <$> getState
@@ -510,7 +513,7 @@ block = do
, para
, plain
] <?> "block"
- trace (take 60 $ show $ B.toList $ runF res defaultParserState)
+ trace (T.take 60 $ tshow $ B.toList $ runF res defaultParserState)
return res
--
@@ -570,7 +573,7 @@ mmdHeaderIdentifier :: PandocMonad m => MarkdownParser m Attr
mmdHeaderIdentifier = do
(_, raw) <- reference
let raw' = trim $ stripFirstAndLast raw
- let ident = concat $ words $ map toLower raw'
+ let ident = T.concat $ T.words $ T.toLower raw'
let attr = (ident, [], [])
guardDisabled Ext_implicit_header_references
<|> registerImplicitHeader raw' attr
@@ -600,20 +603,20 @@ setextHeader = try $ do
<|> registerImplicitHeader raw attr'
return $ B.headerWith attr' level <$> text
-registerImplicitHeader :: PandocMonad m => String -> Attr -> MarkdownParser m ()
+registerImplicitHeader :: PandocMonad m => Text -> Attr -> MarkdownParser m ()
registerImplicitHeader raw attr@(ident, _, _)
- | null raw = return ()
+ | T.null raw = return ()
| otherwise = do
- let key = toKey $ "[" ++ raw ++ "]"
+ let key = toKey $ "[" <> raw <> "]"
updateState $ \s ->
- s { stateHeaderKeys = M.insert key (('#':ident,""), attr)
+ s { stateHeaderKeys = M.insert key (("#" <> ident,""), attr)
(stateHeaderKeys s) }
--
-- hrule block
--
-hrule :: PandocMonad m => ParserT [Char] st m (F Blocks)
+hrule :: PandocMonad m => ParserT Text st m (F Blocks)
hrule = try $ do
skipSpaces
start <- satisfy isHruleChar
@@ -627,13 +630,13 @@ hrule = try $ do
-- code blocks
--
-indentedLine :: PandocMonad m => MarkdownParser m String
+indentedLine :: PandocMonad m => MarkdownParser m Text
indentedLine = indentSpaces >> anyLineNewline
blockDelimiter :: PandocMonad m
=> (Char -> Bool)
-> Maybe Int
- -> ParserT [Char] ParserState m Int
+ -> ParserT Text ParserState m Int
blockDelimiter f len = try $ do
skipNonindentSpaces
c <- lookAhead (satisfy f)
@@ -652,11 +655,11 @@ attributes = try $ do
attribute :: PandocMonad m => MarkdownParser m (Attr -> Attr)
attribute = identifierAttr <|> classAttr <|> keyValAttr <|> specialAttr
-identifier :: PandocMonad m => MarkdownParser m String
+identifier :: PandocMonad m => MarkdownParser m Text
identifier = do
first <- letter
rest <- many $ alphaNum <|> oneOf "-_:."
- return (first:rest)
+ return $ T.pack (first:rest)
identifierAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
identifierAttr = try $ do
@@ -674,15 +677,15 @@ keyValAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
keyValAttr = try $ do
key <- identifier
char '='
- val <- enclosed (char '"') (char '"') litChar
- <|> enclosed (char '\'') (char '\'') litChar
+ val <- T.pack <$> enclosed (char '"') (char '"') litChar
+ <|> T.pack <$> enclosed (char '\'') (char '\'') litChar
<|> ("" <$ try (string "\"\""))
<|> ("" <$ try (string "''"))
- <|> many (escapedChar' <|> noneOf " \t\n\r}")
+ <|> manyChar (escapedChar' <|> noneOf " \t\n\r}")
return $ \(id',cs,kvs) ->
case key of
"id" -> (val,cs,kvs)
- "class" -> (id',cs ++ words val,kvs)
+ "class" -> (id',cs ++ T.words val,kvs)
_ -> (id',cs,kvs ++ [(key,val)])
specialAttr :: PandocMonad m => MarkdownParser m (Attr -> Attr)
@@ -690,12 +693,12 @@ specialAttr = do
char '-'
return $ \(id',cs,kvs) -> (id',cs ++ ["unnumbered"],kvs)
-rawAttribute :: PandocMonad m => MarkdownParser m String
+rawAttribute :: PandocMonad m => MarkdownParser m Text
rawAttribute = do
char '{'
skipMany spaceChar
char '='
- format <- many1 $ satisfy (\c -> isAlphaNum c || c `elem` "-_")
+ format <- many1Char $ satisfy (\c -> isAlphaNum c || c `elem` ['-', '_'])
skipMany spaceChar
char '}'
return format
@@ -703,7 +706,7 @@ rawAttribute = do
codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks)
codeBlockFenced = try $ do
indentchars <- nonindentSpaces
- let indentLevel = length indentchars
+ let indentLevel = T.length indentchars
c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~'))
<|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`'))
size <- blockDelimiter (== c) Nothing
@@ -713,9 +716,9 @@ codeBlockFenced = try $ do
<|>
(Right <$> option ("",[],[])
(try (guardEnabled Ext_fenced_code_attributes >> attributes)
- <|> ((\x -> ("",[toLanguageId x],[])) <$> many1 nonspaceChar)))
+ <|> ((\x -> ("",[toLanguageId x],[])) <$> many1Char nonspaceChar)))
blankline
- contents <- intercalate "\n" <$>
+ contents <- T.intercalate "\n" <$>
manyTill (gobbleAtMostSpaces indentLevel >> anyLine)
(try $ do
blockDelimiter (== c) (Just size)
@@ -726,8 +729,8 @@ codeBlockFenced = try $ do
Right attr -> B.codeBlockWith attr contents
-- correctly handle github language identifiers
-toLanguageId :: String -> String
-toLanguageId = map toLower . go
+toLanguageId :: Text -> Text
+toLanguageId = T.toLower . go
where go "c++" = "cpp"
go "objective-c" = "objectivec"
go x = x
@@ -737,11 +740,11 @@ codeBlockIndented = do
contents <- many1 (indentedLine <|>
try (do b <- blanklines
l <- indentedLine
- return $ b ++ l))
+ return $ b <> l))
optional blanklines
classes <- getOption readerIndentedCodeClasses
return $ return $ B.codeBlockWith ("", classes, []) $
- stripTrailingNewlines $ concat contents
+ stripTrailingNewlines $ T.concat contents
lhsCodeBlock :: PandocMonad m => MarkdownParser m (F Blocks)
lhsCodeBlock = do
@@ -751,33 +754,33 @@ lhsCodeBlock = do
<|> (return . B.codeBlockWith ("",["haskell"],[]) <$>
lhsCodeBlockInverseBird)
-lhsCodeBlockLaTeX :: PandocMonad m => MarkdownParser m String
+lhsCodeBlockLaTeX :: PandocMonad m => MarkdownParser m Text
lhsCodeBlockLaTeX = try $ do
string "\\begin{code}"
manyTill spaceChar newline
- contents <- many1Till anyChar (try $ string "\\end{code}")
+ contents <- many1TillChar anyChar (try $ string "\\end{code}")
blanklines
return $ stripTrailingNewlines contents
-lhsCodeBlockBird :: PandocMonad m => MarkdownParser m String
+lhsCodeBlockBird :: PandocMonad m => MarkdownParser m Text
lhsCodeBlockBird = lhsCodeBlockBirdWith '>'
-lhsCodeBlockInverseBird :: PandocMonad m => MarkdownParser m String
+lhsCodeBlockInverseBird :: PandocMonad m => MarkdownParser m Text
lhsCodeBlockInverseBird = lhsCodeBlockBirdWith '<'
-lhsCodeBlockBirdWith :: PandocMonad m => Char -> MarkdownParser m String
+lhsCodeBlockBirdWith :: PandocMonad m => Char -> MarkdownParser m Text
lhsCodeBlockBirdWith c = try $ do
pos <- getPosition
when (sourceColumn pos /= 1) $ Prelude.fail "Not in first column"
lns <- many1 $ birdTrackLine c
-- if (as is normal) there is always a space after >, drop it
- let lns' = if all (\ln -> null ln || take 1 ln == " ") lns
- then map (drop 1) lns
+ let lns' = if all (\ln -> T.null ln || T.take 1 ln == " ") lns
+ then map (T.drop 1) lns
else lns
blanklines
- return $ intercalate "\n" lns'
+ return $ T.intercalate "\n" lns'
-birdTrackLine :: PandocMonad m => Char -> ParserT [Char] st m String
+birdTrackLine :: PandocMonad m => Char -> ParserT Text st m Text
birdTrackLine c = try $ do
char c
-- allow html tags on left margin:
@@ -791,12 +794,12 @@ birdTrackLine c = try $ do
emailBlockQuoteStart :: PandocMonad m => MarkdownParser m Char
emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' <* optional (char ' ')
-emailBlockQuote :: PandocMonad m => MarkdownParser m [String]
+emailBlockQuote :: PandocMonad m => MarkdownParser m [Text]
emailBlockQuote = try $ do
emailBlockQuoteStart
- let emailLine = many $ nonEndline <|> try
- (endline >> notFollowedBy emailBlockQuoteStart >>
- return '\n')
+ let emailLine = manyChar $ nonEndline <|> try
+ (endline >> notFollowedBy emailBlockQuoteStart >>
+ return '\n')
let emailSep = try (newline >> emailBlockQuoteStart)
first <- emailLine
rest <- many $ try $ emailSep >> emailLine
@@ -809,7 +812,7 @@ blockQuote :: PandocMonad m => MarkdownParser m (F Blocks)
blockQuote = do
raw <- emailBlockQuote
-- parse the extracted block, which may contain various block elements:
- contents <- parseFromString' parseBlocks $ intercalate "\n" raw ++ "\n\n"
+ contents <- parseFromString' parseBlocks $ T.intercalate "\n" raw <> "\n\n"
return $ B.blockQuote <$> contents
--
@@ -833,7 +836,7 @@ orderedListStart mbstydelim = try $ do
skipNonindentSpaces
notFollowedBy $ string "p." >> spaceChar >> digit -- page number
(do guardDisabled Ext_fancy_lists
- start <- many1 digit >>= safeRead
+ start <- many1Char digit >>= safeRead
char '.'
gobbleSpaces 1 <|> () <$ lookAhead newline
optional $ try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar)
@@ -857,7 +860,7 @@ orderedListStart mbstydelim = try $ do
listStart :: PandocMonad m => MarkdownParser m ()
listStart = bulletListStart <|> Control.Monad.void (orderedListStart Nothing)
-listLine :: PandocMonad m => Int -> MarkdownParser m String
+listLine :: PandocMonad m => Int -> MarkdownParser m Text
listLine continuationIndent = try $ do
notFollowedBy' (do gobbleSpaces continuationIndent
skipMany spaceChar
@@ -867,19 +870,19 @@ listLine continuationIndent = try $ do
optional (() <$ gobbleSpaces continuationIndent)
listLineCommon
-listLineCommon :: PandocMonad m => MarkdownParser m String
-listLineCommon = concat <$> manyTill
- ( many1 (satisfy $ \c -> c `notElem` ['\n', '<', '`'])
+listLineCommon :: PandocMonad m => MarkdownParser m Text
+listLineCommon = T.concat <$> manyTill
+ ( many1Char (satisfy $ \c -> c `notElem` ['\n', '<', '`'])
<|> fmap snd (withRaw code)
<|> fmap snd (htmlTag isCommentTag)
- <|> count 1 anyChar
+ <|> countChar 1 anyChar
) newline
-- parse raw text for one list item, excluding start marker and continuations
rawListItem :: PandocMonad m
=> Bool -- four space rule
-> MarkdownParser m a
- -> MarkdownParser m (String, Int)
+ -> MarkdownParser m (Text, Int)
rawListItem fourSpaceRule start = try $ do
pos1 <- getPosition
start
@@ -892,14 +895,14 @@ rawListItem fourSpaceRule start = try $ do
notFollowedBy (() <$ codeBlockFenced)
notFollowedBy blankline
listLine continuationIndent)
- blanks <- many blankline
- let result = unlines (first:rest) ++ blanks
+ blanks <- manyChar blankline
+ let result = T.unlines (first:rest) <> blanks
return (result, continuationIndent)
-- continuation of a list item - indented and separated by blankline
-- or (in compact lists) endline.
-- note: nested lists are parsed as continuations
-listContinuation :: PandocMonad m => Int -> MarkdownParser m String
+listContinuation :: PandocMonad m => Int -> MarkdownParser m Text
listContinuation continuationIndent = try $ do
x <- try $ do
notFollowedBy blankline
@@ -913,12 +916,12 @@ listContinuation continuationIndent = try $ do
notFollowedByDivCloser
gobbleSpaces continuationIndent <|> notFollowedBy' listStart
anyLineNewline
- blanks <- many blankline
- return $ concat (x:xs) ++ blanks
+ blanks <- manyChar blankline
+ return $ T.concat (x:xs) <> blanks
-- Variant of blanklines that doesn't require blank lines
-- before a fence or eof.
-blanklines' :: PandocMonad m => MarkdownParser m [Char]
+blanklines' :: PandocMonad m => MarkdownParser m Text
blanklines' = blanklines <|> try checkDivCloser
where checkDivCloser = do
guardEnabled Ext_fenced_divs
@@ -954,7 +957,7 @@ listItem fourSpaceRule start = try $ do
(first, continuationIndent) <- rawListItem fourSpaceRule start
continuations <- many (listContinuation continuationIndent)
-- parse the extracted block, which may contain various block elements:
- let raw = concat (first:continuations)
+ let raw = T.concat (first:continuations)
contents <- parseFromString' parseBlocks raw
updateState (\st -> st {stateParserContext = oldContext})
exts <- getOption readerExtensions
@@ -990,7 +993,7 @@ defListMarker = do
sps <- nonindentSpaces
char ':' <|> char '~'
tabStop <- getOption readerTabStop
- let remaining = tabStop - (length sps + 1)
+ let remaining = tabStop - (T.length sps + 1)
if remaining > 0
then try (count remaining (char ' ')) <|> string "\t" <|> many1 spaceChar
else mzero
@@ -1001,11 +1004,11 @@ definitionListItem compact = try $ do
rawLine' <- anyLine
raw <- many1 $ defRawBlock compact
term <- parseFromString' (trimInlinesF <$> inlines) rawLine'
- contents <- mapM (parseFromString' parseBlocks . (++"\n")) raw
+ contents <- mapM (parseFromString' parseBlocks . (<> "\n")) raw
optional blanklines
return $ liftM2 (,) term (sequence contents)
-defRawBlock :: PandocMonad m => Bool -> MarkdownParser m String
+defRawBlock :: PandocMonad m => Bool -> MarkdownParser m Text
defRawBlock compact = try $ do
hasBlank <- option False $ blankline >> return True
defListMarker
@@ -1020,13 +1023,13 @@ defRawBlock compact = try $ do
<|> notFollowedBy defListMarker
anyLine )
rawlines <- many dline
- cont <- fmap concat $ many $ try $ do
+ cont <- fmap T.concat $ many $ try $ do
trailing <- option "" blanklines
ln <- indentSpaces >> notFollowedBy blankline >> anyLine
lns <- many dline
- return $ trailing ++ unlines (ln:lns)
- return $ trimr (firstline ++ unlines rawlines ++ cont) ++
- if hasBlank || not (null cont) then "\n\n" else ""
+ return $ trailing <> T.unlines (ln:lns)
+ return $ trimr (firstline <> T.unlines rawlines <> cont) <>
+ if hasBlank || not (T.null cont) then "\n\n" else ""
definitionList :: PandocMonad m => MarkdownParser m (F Blocks)
definitionList = try $ do
@@ -1063,7 +1066,7 @@ para = try $ do
| not (null alt) ->
-- the fig: at beginning of title indicates a figure
return $ B.singleton
- $ Image attr alt (src,'f':'i':'g':':':tit)
+ $ Image attr alt (src, "fig:" <> tit)
_ -> return x'
| otherwise = x
result <- implicitFigures . trimInlinesF <$> inlines1
@@ -1082,7 +1085,7 @@ para = try $ do
inHtmlBlock <- stateInHtmlBlock <$> getState
case inHtmlBlock of
Just "div" -> () <$
- lookAhead (htmlTag (~== TagClose "div"))
+ lookAhead (htmlTag (~== TagClose ("div" :: Text)))
_ -> mzero
<|> do guardEnabled Ext_fenced_divs
divLevel <- stateFencedDivLevel <$> getState
@@ -1098,7 +1101,7 @@ plain = fmap B.plain . trimInlinesF <$> inlines1
-- raw html
--
-htmlElement :: PandocMonad m => MarkdownParser m String
+htmlElement :: PandocMonad m => MarkdownParser m Text
htmlElement = rawVerbatimBlock
<|> strictHtmlBlock
<|> fmap snd (htmlTag isBlockTag)
@@ -1132,14 +1135,14 @@ htmlBlock' = try $ do
first <- htmlElement
skipMany spaceChar
optional blanklines
- return $ if null first
+ return $ if T.null first
then mempty
else return $ B.rawBlock "html" first
-strictHtmlBlock :: PandocMonad m => MarkdownParser m String
+strictHtmlBlock :: PandocMonad m => MarkdownParser m Text
strictHtmlBlock = htmlInBalanced (not . isInlineTag)
-rawVerbatimBlock :: PandocMonad m => MarkdownParser m String
+rawVerbatimBlock :: PandocMonad m => MarkdownParser m Text
rawVerbatimBlock = htmlInBalanced isVerbTag
where isVerbTag (TagOpen "pre" _) = True
isVerbTag (TagOpen "style" _) = True
@@ -1150,13 +1153,13 @@ rawVerbatimBlock = htmlInBalanced isVerbTag
rawTeXBlock :: PandocMonad m => MarkdownParser m (F Blocks)
rawTeXBlock = do
guardEnabled Ext_raw_tex
- result <- (B.rawBlock "tex" . trim . concat <$>
- many1 ((++) <$> rawConTeXtEnvironment <*> spnl'))
- <|> (B.rawBlock "tex" . trim . concat <$>
- many1 ((++) <$> rawLaTeXBlock <*> spnl'))
+ result <- (B.rawBlock "tex" . trim . T.concat <$>
+ many1 ((<>) <$> rawConTeXtEnvironment <*> spnl'))
+ <|> (B.rawBlock "tex" . trim . T.concat <$>
+ many1 ((<>) <$> rawLaTeXBlock <*> spnl'))
return $ case B.toList result of
[RawBlock _ cs]
- | all (`elem` [' ','\t','\n']) cs -> return mempty
+ | T.all (`elem` [' ','\t','\n']) cs -> return mempty
-- don't create a raw block for suppressed macro defs
_ -> return result
@@ -1186,7 +1189,7 @@ rawHtmlBlocks = do
return result
-- remove markdown="1" attribute
-stripMarkdownAttribute :: String -> String
+stripMarkdownAttribute :: Text -> Text
stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s
where filterAttrib (TagOpen t as) = TagOpen t
[(k,v) | (k,v) <- as, k /= "markdown"]
@@ -1211,7 +1214,7 @@ lineBlock = try $ do
-- and the length including trailing space.
dashedLine :: PandocMonad m
=> Char
- -> ParserT [Char] st m (Int, Int)
+ -> ParserT Text st m (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many spaceChar
@@ -1232,9 +1235,9 @@ simpleTableHeader headless = try $ do
dashes <- many1 (dashedLine '-')
newline
let (lengths, lines') = unzip dashes
- let indices = scanl (+) (length initSp) lines'
+ let indices = scanl (+) (T.length initSp) lines'
-- If no header, calculate alignment on basis of first row of text
- rawHeads <- fmap (tail . splitStringByIndices (init indices)) $
+ rawHeads <- fmap (tail . splitTextByIndices (init indices)) $
if headless
then lookAhead anyLine
else return rawContent
@@ -1250,15 +1253,15 @@ simpleTableHeader headless = try $ do
-- Returns an alignment type for a table, based on a list of strings
-- (the rows of the column header) and a number (the length of the
-- dashed line under the rows.
-alignType :: [String]
+alignType :: [Text]
-> Int
-> Alignment
alignType [] _ = AlignDefault
alignType strLst len =
- let nonempties = filter (not . null) $ map trimr strLst
+ let nonempties = filter (not . T.null) $ map trimr strLst
(leftSpace, rightSpace) =
- case sortBy (comparing length) nonempties of
- (x:_) -> (head x `elem` " \t", length x < len)
+ case sortBy (comparing T.length) nonempties of
+ (x:_) -> (T.head x `elem` [' ', 't'], T.length x < len)
[] -> (False, False)
in case (leftSpace, rightSpace) of
(True, False) -> AlignRight
@@ -1267,7 +1270,7 @@ alignType strLst len =
(False, False) -> AlignDefault
-- Parse a table footer - dashed lines followed by blank line.
-tableFooter :: PandocMonad m => MarkdownParser m String
+tableFooter :: PandocMonad m => MarkdownParser m Text
tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines'
-- Parse a table separator - dashed line.
@@ -1277,12 +1280,12 @@ tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> char '\n'
-- Parse a raw line and split it into chunks by indices.
rawTableLine :: PandocMonad m
=> [Int]
- -> MarkdownParser m [String]
+ -> MarkdownParser m [Text]
rawTableLine indices = do
notFollowedBy' (blanklines' <|> tableFooter)
- line <- many1Till anyChar newline
+ line <- many1TillChar anyChar newline
return $ map trim $ tail $
- splitStringByIndices (init indices) line
+ splitTextByIndices (init indices) line
-- Parse a table line and return a list of lists of blocks (columns).
tableLine :: PandocMonad m
@@ -1297,7 +1300,7 @@ multilineRow :: PandocMonad m
-> MarkdownParser m (F [Blocks])
multilineRow indices = do
colLines <- many1 (rawTableLine indices)
- let cols = map unlines $ transpose colLines
+ let cols = map T.unlines $ transpose colLines
fmap sequence $ mapM (parseFromString' (mconcat <$> many plain)) cols
-- Parses a table caption: inlines beginning with 'Table:'
@@ -1344,7 +1347,7 @@ multilineTableHeader headless = try $ do
dashes <- many1 (dashedLine '-')
newline
let (lengths, lines') = unzip dashes
- let indices = scanl (+) (length initSp) lines'
+ let indices = scanl (+) (T.length initSp) lines'
-- compensate for the fact that intercolumn spaces are
-- not included in the last index:
let indices' = case reverse indices of
@@ -1352,14 +1355,14 @@ multilineTableHeader headless = try $ do
(x:xs) -> reverse (x+1:xs)
rawHeadsList <- if headless
then fmap (map (:[]) . tail .
- splitStringByIndices (init indices')) $ lookAhead anyLine
+ splitTextByIndices (init indices')) $ lookAhead anyLine
else return $ transpose $ map
- (tail . splitStringByIndices (init indices'))
+ (tail . splitTextByIndices (init indices'))
rawContent
let aligns = zipWith alignType rawHeadsList lengths
let rawHeads = if headless
then replicate (length dashes) ""
- else map (unlines . map trim) rawHeadsList
+ else map (T.unlines . map trim) rawHeadsList
heads <- fmap sequence $
mapM ((parseFromString' (mconcat <$> many plain)).trim) rawHeads
return (heads, aligns, indices')
@@ -1393,7 +1396,7 @@ pipeTable = try $ do
lines' <- many pipeTableRow
let lines'' = map (take (length aligns) <$>) lines'
let maxlength = maximum $
- map (\x -> length . stringify $ runF x def) (heads' : lines'')
+ map (\x -> T.length . stringify $ runF x def) (heads' : lines'')
numColumns <- getOption readerColumns
let widths = if maxlength > numColumns
then map (\len ->
@@ -1430,7 +1433,7 @@ pipeTableCell =
return $ B.plain <$> result)
<|> return mempty
-pipeTableHeaderPart :: PandocMonad m => ParserT [Char] st m (Alignment, Int)
+pipeTableHeaderPart :: PandocMonad m => ParserT Text st m (Alignment, Int)
pipeTableHeaderPart = try $ do
skipMany spaceChar
left <- optionMaybe (char ':')
@@ -1446,12 +1449,12 @@ pipeTableHeaderPart = try $ do
(Just _,Just _) -> AlignCenter, len)
-- Succeed only if current line contains a pipe.
-scanForPipe :: PandocMonad m => ParserT [Char] st m ()
+scanForPipe :: PandocMonad m => ParserT Text st m ()
scanForPipe = do
inp <- getInput
- case break (\c -> c == '\n' || c == '|') inp of
- (_,'|':_) -> return ()
- _ -> mzero
+ case T.break (\c -> c == '\n' || c == '|') inp of
+ (_, T.uncons -> Just ('|', _)) -> return ()
+ _ -> mzero
-- | Parse a table using 'headerParser', 'rowParser',
-- 'lineParser', and 'footerParser'. Variant of the version in
@@ -1561,7 +1564,7 @@ escapedChar = do
result <- escapedChar'
case result of
' ' -> return $ return $ B.str "\160" -- "\ " is a nonbreaking space
- _ -> return $ return $ B.str [result]
+ _ -> return $ return $ B.str $ T.singleton result
ltSign :: PandocMonad m => MarkdownParser m (F Inlines)
ltSign = do
@@ -1574,12 +1577,12 @@ exampleRef :: PandocMonad m => MarkdownParser m (F Inlines)
exampleRef = try $ do
guardEnabled Ext_example_lists
char '@'
- lab <- many1 (alphaNum <|> oneOf "-_")
+ lab <- many1Char (alphaNum <|> oneOf "-_")
return $ do
st <- askF
return $ case M.lookup lab (stateExamples st) of
- Just n -> B.str (show n)
- Nothing -> B.str ('@':lab)
+ Just n -> B.str $ tshow n
+ Nothing -> B.str $ "@" <> lab
symbol :: PandocMonad m => MarkdownParser m (F Inlines)
symbol = do
@@ -1587,16 +1590,16 @@ symbol = do
<|> try (do lookAhead $ char '\\'
notFollowedBy' (() <$ rawTeXBlock)
char '\\')
- return $ return $ B.str [result]
+ return $ return $ B.str $ T.singleton result
-- parses inline code, between n `s and n `s
code :: PandocMonad m => MarkdownParser m (F Inlines)
code = try $ do
starts <- many1 (char '`')
skipSpaces
- result <- (trim . concat) <$>
+ result <- (trim . T.concat) <$>
manyTill (notFollowedBy (inList >> listStart) >>
- (many1 (noneOf "`\n") <|> many1 (char '`') <|>
+ (many1Char (noneOf "`\n") <|> many1Char (char '`') <|>
(char '\n' >> notFollowedBy' blankline >> return " ")))
(try (skipSpaces >> count (length starts) (char '`') >>
notFollowedBy (char '`')))
@@ -1627,10 +1630,10 @@ enclosure c = do
guardDisabled Ext_intraword_underscores
<|> guard (c == '*')
<|> (guard =<< notAfterString)
- cs <- many1 (char c)
+ cs <- many1Char (char c)
(return (B.str cs) <>) <$> whitespace
<|>
- case length cs of
+ case T.length cs of
3 -> three c
2 -> two c mempty
1 -> one c mempty
@@ -1653,7 +1656,7 @@ three c = do
(ender c 3 >> updateLastStrPos >> return ((B.strong . B.emph) <$> contents))
<|> (ender c 2 >> updateLastStrPos >> one c (B.strong <$> contents))
<|> (ender c 1 >> updateLastStrPos >> two c (B.emph <$> contents))
- <|> return (return (B.str [c,c,c]) <> contents)
+ <|> return (return (B.str $ T.pack [c,c,c]) <> contents)
-- Parse inlines til you hit two c's, and emit strong.
-- If you never do hit two cs, emit ** plus inlines parsed.
@@ -1662,7 +1665,7 @@ two c prefix' = do
contents <- mconcat <$> many (try $ notFollowedBy (ender c 2) >> inline)
(ender c 2 >> updateLastStrPos >>
return (B.strong <$> (prefix' <> contents)))
- <|> return (return (B.str [c,c]) <> (prefix' <> contents))
+ <|> return (return (B.str $ T.pack [c,c]) <> (prefix' <> contents))
-- Parse inlines til you hit a c, and emit emph.
-- If you never hit a c, emit * plus inlines parsed.
@@ -1673,7 +1676,7 @@ one c prefix' = do
notFollowedBy (ender c 1) >>
two c mempty) )
(ender c 1 >> updateLastStrPos >> return (B.emph <$> (prefix' <> contents)))
- <|> return (return (B.str [c]) <> (prefix' <> contents))
+ <|> return (return (B.str $ T.singleton c) <> (prefix' <> contents))
strongOrEmph :: PandocMonad m => MarkdownParser m (F Inlines)
strongOrEmph = enclosure '*' <|> enclosure '_'
@@ -1717,16 +1720,16 @@ whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace"
where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak)
regsp = skipMany spaceChar >> return B.space
-nonEndline :: PandocMonad m => ParserT [Char] st m Char
+nonEndline :: PandocMonad m => ParserT Text st m Char
nonEndline = satisfy (/='\n')
str :: PandocMonad m => MarkdownParser m (F Inlines)
str = do
- result <- many1 (alphaNum <|> try (char '.' <* notFollowedBy (char '.')))
+ result <- many1Char (alphaNum <|> try (char '.' <* notFollowedBy (char '.')))
updateLastStrPos
(do guardEnabled Ext_smart
abbrevs <- getOption readerAbbreviations
- if not (null result) && last result == '.' && result `Set.member` abbrevs
+ if not (T.null result) && T.last result == '.' && result `Set.member` abbrevs
then try (do ils <- whitespace
-- ?? lookAhead alphaNum
-- replace space after with nonbreaking space
@@ -1766,36 +1769,36 @@ endline = try $ do
--
-- a reference label for a link
-reference :: PandocMonad m => MarkdownParser m (F Inlines, String)
+reference :: PandocMonad m => MarkdownParser m (F Inlines, Text)
reference = do
guardDisabled Ext_footnotes <|> notFollowedBy' (string "[^")
guardDisabled Ext_citations <|> notFollowedBy' (string "[@")
withRaw $ trimInlinesF <$> inlinesInBalancedBrackets
-parenthesizedChars :: PandocMonad m => MarkdownParser m [Char]
+parenthesizedChars :: PandocMonad m => MarkdownParser m Text
parenthesizedChars = do
result <- charsInBalanced '(' ')' litChar
- return $ '(' : result ++ ")"
+ return $ "(" <> result <> ")"
-- source for a link, with optional title
-source :: PandocMonad m => MarkdownParser m (String, String)
+source :: PandocMonad m => MarkdownParser m (Text, Text)
source = do
char '('
skipSpaces
let urlChunk =
try parenthesizedChars
- <|> (notFollowedBy (oneOf " )") >> count 1 litChar)
- <|> try (many1 spaceChar <* notFollowedBy (oneOf "\"')"))
- let sourceURL = (unwords . words . concat) <$> many urlChunk
+ <|> (notFollowedBy (oneOf " )") >> countChar 1 litChar)
+ <|> try (many1Char spaceChar <* notFollowedBy (oneOf "\"')"))
+ let sourceURL = (T.unwords . T.words . T.concat) <$> many urlChunk
let betweenAngles = try $
- char '<' >> manyTill litChar (char '>')
+ char '<' >> manyTillChar litChar (char '>')
src <- try betweenAngles <|> sourceURL
tit <- option "" $ try $ spnl >> linkTitle
skipSpaces
char ')'
return (escapeURI $ trimr src, tit)
-linkTitle :: PandocMonad m => MarkdownParser m String
+linkTitle :: PandocMonad m => MarkdownParser m Text
linkTitle = quotedTitle '"' <|> quotedTitle '\''
link :: PandocMonad m => MarkdownParser m (F Inlines)
@@ -1823,13 +1826,13 @@ isSmallCaps :: Attr -> Bool
isSmallCaps ("",["smallcaps"],[]) = True
isSmallCaps ("",[],kvs) =
case lookup "style" kvs of
- Just s -> map toLower (filter (`notElem` " \t;") s) ==
+ Just s -> T.toLower (T.filter (`notElem` [' ', '\t', ';']) s) ==
"font-variant:small-caps"
Nothing -> False
isSmallCaps _ = False
regLink :: PandocMonad m
- => (Attr -> String -> String -> Inlines -> Inlines)
+ => (Attr -> Text -> Text -> Inlines -> Inlines)
-> F Inlines
-> MarkdownParser m (F Inlines)
regLink constructor lab = try $ do
@@ -1840,8 +1843,8 @@ regLink constructor lab = try $ do
-- a link like [this][ref] or [this][] or [this]
referenceLink :: PandocMonad m
- => (Attr -> String -> String -> Inlines -> Inlines)
- -> (F Inlines, String)
+ => (Attr -> Text -> Text -> Inlines -> Inlines)
+ -> (F Inlines, Text)
-> MarkdownParser m (F Inlines)
referenceLink constructor (lab, raw) = do
sp <- (True <$ lookAhead (char ' ')) <|> return False
@@ -1863,7 +1866,7 @@ referenceLink constructor (lab, raw) = do
parsedRaw' <- parsedRaw
fallback' <- fallback
return $ B.str "[" <> fallback' <> B.str "]" <>
- (if sp && not (null raw) then B.space else mempty) <>
+ (if sp && not (T.null raw) then B.space else mempty) <>
parsedRaw'
return $ do
keys <- asksF stateKeys
@@ -1878,19 +1881,19 @@ referenceLink constructor (lab, raw) = do
else makeFallback
Just ((src,tit), attr) -> constructor attr src tit <$> lab
-dropBrackets :: String -> String
-dropBrackets = reverse . dropRB . reverse . dropLB
- where dropRB (']':xs) = xs
- dropRB xs = xs
- dropLB ('[':xs) = xs
- dropLB xs = xs
+dropBrackets :: Text -> Text
+dropBrackets = dropRB . dropLB
+ where dropRB (T.unsnoc -> Just (xs,']')) = xs
+ dropRB xs = xs
+ dropLB (T.uncons -> Just ('[',xs)) = xs
+ dropLB xs = xs
bareURL :: PandocMonad m => MarkdownParser m (F Inlines)
bareURL = try $ do
guardEnabled Ext_autolink_bare_uris
getState >>= guard . stateAllowLinks
(cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress)
- notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a")
+ notFollowedBy $ try $ spaces >> htmlTag (~== TagClose ("a" :: Text))
return $ return $ B.linkWith ("",[cls],[]) src "" (B.str orig)
autoLink :: PandocMonad m => MarkdownParser m (F Inlines)
@@ -1902,19 +1905,20 @@ autoLink = try $ do
-- is finished, because the uri parser tries to avoid parsing
-- final punctuation. for example: in `<http://hi---there>`,
-- the URI parser will stop before the dashes.
- extra <- fromEntities <$> manyTill nonspaceChar (char '>')
+ extra <- fromEntities <$> manyTillChar nonspaceChar (char '>')
attr <- option ("", [cls], []) $ try $
guardEnabled Ext_link_attributes >> attributes
- return $ return $ B.linkWith attr (src ++ escapeURI extra) ""
- (B.str $ orig ++ extra)
+ return $ return $ B.linkWith attr (src <> escapeURI extra) ""
+ (B.str $ orig <> extra)
image :: PandocMonad m => MarkdownParser m (F Inlines)
image = try $ do
char '!'
(lab,raw) <- reference
defaultExt <- getOption readerDefaultImageExtension
- let constructor attr' src = case takeExtension src of
- "" -> B.imageWith attr' (addExtension src defaultExt)
+ let constructor attr' src = case takeExtension (T.unpack src) of
+ "" -> B.imageWith attr' (T.pack $ addExtension (T.unpack src)
+ $ T.unpack defaultExt)
_ -> B.imageWith attr' src
regLink constructor lab <|> referenceLink constructor (lab,raw)
@@ -1926,7 +1930,7 @@ note = try $ do
return $ do
notes <- asksF stateNotes'
case M.lookup ref notes of
- Nothing -> return $ B.str $ "[^" ++ ref ++ "]"
+ Nothing -> return $ B.str $ "[^" <> ref <> "]"
Just (_pos, contents) -> do
st <- askF
-- process the note in a context that doesn't resolve
@@ -1949,29 +1953,29 @@ rawLaTeXInline' = try $ do
s <- rawLaTeXInline
return $ return $ B.rawInline "tex" s -- "tex" because it might be context
-rawConTeXtEnvironment :: PandocMonad m => ParserT [Char] st m String
+rawConTeXtEnvironment :: PandocMonad m => ParserT Text st m Text
rawConTeXtEnvironment = try $ do
string "\\start"
completion <- inBrackets (letter <|> digit <|> spaceChar)
- <|> many1 letter
- contents <- manyTill (rawConTeXtEnvironment <|> count 1 anyChar)
- (try $ string "\\stop" >> string completion)
- return $ "\\start" ++ completion ++ concat contents ++ "\\stop" ++ completion
+ <|> many1Char letter
+ contents <- manyTill (rawConTeXtEnvironment <|> countChar 1 anyChar)
+ (try $ string "\\stop" >> textStr completion)
+ return $ "\\start" <> completion <> T.concat contents <> "\\stop" <> completion
-inBrackets :: PandocMonad m => ParserT [Char] st m Char -> ParserT [Char] st m String
+inBrackets :: PandocMonad m => ParserT Text st m Char -> ParserT Text st m Text
inBrackets parser = do
char '['
- contents <- many parser
+ contents <- manyChar parser
char ']'
- return $ "[" ++ contents ++ "]"
+ return $ "[" <> contents <> "]"
spanHtml :: PandocMonad m => MarkdownParser m (F Inlines)
spanHtml = try $ do
guardEnabled Ext_native_spans
- (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" [])
- contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span"))
+ (TagOpen _ attrs, _) <- htmlTag (~== TagOpen ("span" :: Text) [])
+ contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose ("span" :: Text)))
let ident = fromMaybe "" $ lookup "id" attrs
- let classes = maybe [] words $ lookup "class" attrs
+ let classes = maybe [] T.words $ lookup "class" attrs
let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
return $ if isSmallCaps (ident, classes, keyvals)
then B.smallcaps <$> contents
@@ -1980,20 +1984,20 @@ spanHtml = try $ do
divHtml :: PandocMonad m => MarkdownParser m (F Blocks)
divHtml = try $ do
guardEnabled Ext_native_divs
- (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" [])
+ (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen ("div" :: Text) [])
-- we set stateInHtmlBlock so that closing tags that can be either block or
-- inline will not be parsed as inline tags
oldInHtmlBlock <- stateInHtmlBlock <$> getState
updateState $ \st -> st{ stateInHtmlBlock = Just "div" }
bls <- option "" (blankline >> option "" blanklines)
contents <- mconcat <$>
- many (notFollowedBy' (htmlTag (~== TagClose "div")) >> block)
- closed <- option False (True <$ htmlTag (~== TagClose "div"))
+ many (notFollowedBy' (htmlTag (~== TagClose ("div" :: Text))) >> block)
+ closed <- option False (True <$ htmlTag (~== TagClose ("div" :: Text)))
if closed
then do
updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock }
let ident = fromMaybe "" $ lookup "id" attrs
- let classes = maybe [] words $ lookup "class" attrs
+ let classes = maybe [] T.words $ lookup "class" attrs
let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
return $ B.divWith (ident, classes, keyvals) <$> contents
else -- avoid backtracing
@@ -2005,7 +2009,7 @@ divFenced = try $ do
string ":::"
skipMany (char ':')
skipMany spaceChar
- attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1 nonspaceChar)
+ attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1Char nonspaceChar)
skipMany spaceChar
skipMany (char ':')
blankline
@@ -2047,7 +2051,7 @@ emoji :: PandocMonad m => MarkdownParser m (F Inlines)
emoji = try $ do
guardEnabled Ext_emoji
char ':'
- emojikey <- many1 (oneOf emojiChars)
+ emojikey <- many1Char (oneOf emojiChars)
char ':'
case emojiToInline emojikey of
Just i -> return (return $ B.singleton i)
@@ -2077,14 +2081,14 @@ textualCite = try $ do
mbrest <- option Nothing $ try $ spnl >> Just <$> withRaw normalCite
case mbrest of
Just (rest, raw) ->
- return $ (flip B.cite (B.text $ '@':key ++ " " ++ raw) . (first:))
+ return $ (flip B.cite (B.text $ "@" <> key <> " " <> raw) . (first:))
<$> rest
Nothing ->
(do
(cs, raw) <- withRaw $ bareloc first
- let (spaces',raw') = span isSpace raw
- spc | null spaces' = mempty
- | otherwise = B.space
+ let (spaces',raw') = T.span isSpace raw
+ spc | T.null spaces' = mempty
+ | otherwise = B.space
lab <- parseFromString' inlines $ dropBrackets raw'
fallback <- referenceLink B.linkWith (lab,raw')
return $ do
@@ -2092,12 +2096,12 @@ textualCite = try $ do
cs' <- cs
return $
case B.toList fallback' of
- Link{}:_ -> B.cite [first] (B.str $ '@':key) <> spc <> fallback'
- _ -> B.cite cs' (B.text $ '@':key ++ " " ++ raw))
+ Link{}:_ -> B.cite [first] (B.str $ "@" <> key) <> spc <> fallback'
+ _ -> B.cite cs' (B.text $ "@" <> key <> " " <> raw))
<|> return (do st <- askF
return $ case M.lookup key (stateExamples st) of
- Just n -> B.str (show n)
- _ -> B.cite [first] $ B.str $ '@':key)
+ Just n -> B.str $ tshow n
+ _ -> B.cite [first] $ B.str $ "@" <> key)
bareloc :: PandocMonad m => Citation -> MarkdownParser m (F [Citation])
bareloc c = try $ do
diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs
index 35bb8e3eb..07240e951 100644
--- a/src/Text/Pandoc/Readers/MediaWiki.hs
+++ b/src/Text/Pandoc/Readers/MediaWiki.hs
@@ -1,5 +1,6 @@
{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE RelaxedPolyRec #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE RelaxedPolyRec #-}
-- RelaxedPolyRec needed for inlinesBetween on GHC < 7
{- |
Module : Text.Pandoc.Readers.MediaWiki
@@ -24,11 +25,12 @@ import Control.Monad
import Control.Monad.Except (throwError)
import Data.Char (isDigit, isSpace)
import qualified Data.Foldable as F
-import Data.List (intercalate, intersperse, isPrefixOf)
+import Data.List (intersperse)
import Data.Maybe (fromMaybe, maybeToList)
import Data.Sequence (ViewL (..), viewl, (<|))
import qualified Data.Set as Set
-import Data.Text (Text, unpack)
+import Data.Text (Text)
+import qualified Data.Text as T
import Text.HTML.TagSoup
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
import qualified Text.Pandoc.Builder as B
@@ -39,7 +41,7 @@ import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (nested)
import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag)
import Text.Pandoc.Shared (crFilter, safeRead, stringify, stripTrailingNewlines,
- trim)
+ trim, splitTextBy, tshow)
import Text.Pandoc.Walk (walk)
import Text.Pandoc.XML (fromEntities)
@@ -57,7 +59,7 @@ readMediaWiki opts s = do
, mwLogMessages = []
, mwInTT = False
}
- (unpack (crFilter s) ++ "\n")
+ (crFilter s <> "\n")
case parsed of
Right result -> return result
Left e -> throwError e
@@ -66,12 +68,12 @@ data MWState = MWState { mwOptions :: ReaderOptions
, mwMaxNestingLevel :: Int
, mwNextLinkNumber :: Int
, mwCategoryLinks :: [Inlines]
- , mwIdentifierList :: Set.Set String
+ , mwIdentifierList :: Set.Set Text
, mwLogMessages :: [LogMessage]
, mwInTT :: Bool
}
-type MWParser m = ParserT [Char] MWState m
+type MWParser m = ParserT Text MWState m
instance HasReaderOptions MWState where
extractReaderOptions = mwOptions
@@ -105,58 +107,58 @@ specialChars = "'[]<=&*{}|\":\\"
spaceChars :: [Char]
spaceChars = " \n\t"
-sym :: PandocMonad m => String -> MWParser m ()
-sym s = () <$ try (string s)
+sym :: PandocMonad m => Text -> MWParser m ()
+sym s = () <$ try (string $ T.unpack s)
-newBlockTags :: [String]
+newBlockTags :: [Text]
newBlockTags = ["haskell","syntaxhighlight","source","gallery","references"]
-isBlockTag' :: Tag String -> Bool
+isBlockTag' :: Tag Text -> Bool
isBlockTag' tag@(TagOpen t _) = (isBlockTag tag || t `elem` newBlockTags) &&
t `notElem` eitherBlockOrInline
isBlockTag' tag@(TagClose t) = (isBlockTag tag || t `elem` newBlockTags) &&
t `notElem` eitherBlockOrInline
isBlockTag' tag = isBlockTag tag
-isInlineTag' :: Tag String -> Bool
+isInlineTag' :: Tag Text -> Bool
isInlineTag' (TagComment _) = True
isInlineTag' t = not (isBlockTag' t)
-eitherBlockOrInline :: [String]
+eitherBlockOrInline :: [Text]
eitherBlockOrInline = ["applet", "button", "del", "iframe", "ins",
"map", "area", "object"]
htmlComment :: PandocMonad m => MWParser m ()
htmlComment = () <$ htmlTag isCommentTag
-inlinesInTags :: PandocMonad m => String -> MWParser m Inlines
+inlinesInTags :: PandocMonad m => Text -> MWParser m Inlines
inlinesInTags tag = try $ do
(_,raw) <- htmlTag (~== TagOpen tag [])
- if '/' `elem` raw -- self-closing tag
+ if T.any (== '/') raw -- self-closing tag
then return mempty
else trimInlines . mconcat <$>
manyTill inline (htmlTag (~== TagClose tag))
-blocksInTags :: PandocMonad m => String -> MWParser m Blocks
+blocksInTags :: PandocMonad m => Text -> MWParser m Blocks
blocksInTags tag = try $ do
(_,raw) <- htmlTag (~== TagOpen tag [])
let closer = if tag == "li"
- then htmlTag (~== TagClose "li")
+ then htmlTag (~== TagClose ("li" :: Text))
<|> lookAhead (
- htmlTag (~== TagOpen "li" [])
- <|> htmlTag (~== TagClose "ol")
- <|> htmlTag (~== TagClose "ul"))
+ htmlTag (~== TagOpen ("li" :: Text) [])
+ <|> htmlTag (~== TagClose ("ol" :: Text))
+ <|> htmlTag (~== TagClose ("ul" :: Text)))
else htmlTag (~== TagClose tag)
- if '/' `elem` raw -- self-closing tag
+ if T.any (== '/') raw -- self-closing tag
then return mempty
else mconcat <$> manyTill block closer
-charsInTags :: PandocMonad m => String -> MWParser m [Char]
-charsInTags tag = try $ do
+textInTags :: PandocMonad m => Text -> MWParser m Text
+textInTags tag = try $ do
(_,raw) <- htmlTag (~== TagOpen tag [])
- if '/' `elem` raw -- self-closing tag
+ if T.any (== '/') raw -- self-closing tag
then return ""
- else manyTill anyChar (htmlTag (~== TagClose tag))
+ else T.pack <$> manyTill anyChar (htmlTag (~== TagClose tag))
--
-- main parser
@@ -192,7 +194,7 @@ block = do
<|> blockTag
<|> (B.rawBlock "mediawiki" <$> template)
<|> para
- trace (take 60 $ show $ B.toList res)
+ trace (T.take 60 $ tshow $ B.toList res)
return res
para :: PandocMonad m => MWParser m Blocks
@@ -234,16 +236,16 @@ table = do
else (replicate cols mempty, hdr:rows')
return $ B.table caption cellspecs headers rows
-parseAttrs :: PandocMonad m => MWParser m [(String,String)]
+parseAttrs :: PandocMonad m => MWParser m [(Text,Text)]
parseAttrs = many1 parseAttr
-parseAttr :: PandocMonad m => MWParser m (String, String)
+parseAttr :: PandocMonad m => MWParser m (Text, Text)
parseAttr = try $ do
skipMany spaceChar
- k <- many1 letter
+ k <- many1Char letter
char '='
- v <- (char '"' >> many1Till (satisfy (/='\n')) (char '"'))
- <|> many1 (satisfy $ \c -> not (isSpace c) && c /= '|')
+ v <- (char '"' >> many1TillChar (satisfy (/='\n')) (char '"'))
+ <|> many1Char (satisfy $ \c -> not (isSpace c) && c /= '|')
return (k,v)
tableStart :: PandocMonad m => MWParser m ()
@@ -293,8 +295,8 @@ tableCell = try $ do
notFollowedBy (char '|')
skipMany spaceChar
pos' <- getPosition
- ls <- concat <$> many (notFollowedBy (cellsep <|> rowsep <|> tableEnd) *>
- ((snd <$> withRaw table) <|> count 1 anyChar))
+ ls <- T.concat <$> many (notFollowedBy (cellsep <|> rowsep <|> tableEnd) *>
+ ((snd <$> withRaw table) <|> countChar 1 anyChar))
bs <- parseFromString (do setPosition pos'
mconcat <$> many block) ls
let align = case lookup "align" attrs of
@@ -307,48 +309,49 @@ tableCell = try $ do
Nothing -> 0.0
return ((align, width), bs)
-parseWidth :: String -> Maybe Double
+parseWidth :: Text -> Maybe Double
parseWidth s =
- case reverse s of
- ('%':ds) | all isDigit ds -> safeRead ('0':'.':reverse ds)
- _ -> Nothing
+ case T.unsnoc s of
+ Just (ds, '%') | T.all isDigit ds -> safeRead $ "0." <> ds
+ _ -> Nothing
-template :: PandocMonad m => MWParser m String
+template :: PandocMonad m => MWParser m Text
template = try $ do
string "{{"
notFollowedBy (char '{')
lookAhead $ letter <|> digit <|> char ':'
- let chunk = template <|> variable <|> many1 (noneOf "{}") <|> count 1 anyChar
+ let chunk = template <|> variable <|> many1Char (noneOf "{}") <|> countChar 1 anyChar
contents <- manyTill chunk (try $ string "}}")
- return $ "{{" ++ concat contents ++ "}}"
+ return $ "{{" <> T.concat contents <> "}}"
blockTag :: PandocMonad m => MWParser m Blocks
blockTag = do
(tag, _) <- lookAhead $ htmlTag isBlockTag'
case tag of
TagOpen "blockquote" _ -> B.blockQuote <$> blocksInTags "blockquote"
- TagOpen "pre" _ -> B.codeBlock . trimCode <$> charsInTags "pre"
+ TagOpen "pre" _ -> B.codeBlock . trimCode <$> textInTags "pre"
TagOpen "syntaxhighlight" attrs -> syntaxhighlight "syntaxhighlight" attrs
TagOpen "source" attrs -> syntaxhighlight "source" attrs
TagOpen "haskell" _ -> B.codeBlockWith ("",["haskell"],[]) . trimCode <$>
- charsInTags "haskell"
+ textInTags "haskell"
TagOpen "gallery" _ -> blocksInTags "gallery"
TagOpen "p" _ -> mempty <$ htmlTag (~== tag)
TagClose "p" -> mempty <$ htmlTag (~== tag)
_ -> B.rawBlock "html" . snd <$> htmlTag (~== tag)
-trimCode :: String -> String
-trimCode ('\n':xs) = stripTrailingNewlines xs
-trimCode xs = stripTrailingNewlines xs
+trimCode :: Text -> Text
+trimCode t = case T.uncons t of
+ Just ('\n', xs) -> stripTrailingNewlines xs
+ _ -> stripTrailingNewlines t
-syntaxhighlight :: PandocMonad m => String -> [Attribute String] -> MWParser m Blocks
+syntaxhighlight :: PandocMonad m => Text -> [Attribute Text] -> MWParser m Blocks
syntaxhighlight tag attrs = try $ do
let mblang = lookup "lang" attrs
let mbstart = lookup "start" attrs
let mbline = lookup "line" attrs
let classes = maybeToList mblang ++ maybe [] (const ["numberLines"]) mbline
let kvs = maybe [] (\x -> [("startFrom",x)]) mbstart
- contents <- charsInTags tag
+ contents <- textInTags tag
return $ B.codeBlockWith ("",classes,kvs) $ trimCode contents
hrule :: PandocMonad m => MWParser m Blocks
@@ -362,17 +365,17 @@ preformatted = try $ do
guardColumnOne
char ' '
let endline' = B.linebreak <$ try (newline <* char ' ')
- let whitespace' = B.str <$> many1 ('\160' <$ spaceChar)
+ let whitespace' = B.str <$> many1Char ('\160' <$ spaceChar)
let spToNbsp ' ' = '\160'
spToNbsp x = x
let nowiki' = mconcat . intersperse B.linebreak . map B.str .
- lines . fromEntities . map spToNbsp <$> try
- (htmlTag (~== TagOpen "nowiki" []) *>
- manyTill anyChar (htmlTag (~== TagClose "nowiki")))
+ T.lines . fromEntities . T.map spToNbsp <$> try
+ (htmlTag (~== TagOpen ("nowiki" :: Text) []) *>
+ manyTillChar anyChar (htmlTag (~== TagClose ("nowiki" :: Text))))
let inline' = whitespace' <|> endline' <|> nowiki'
<|> try (notFollowedBy newline *> inline)
contents <- mconcat <$> many1 inline'
- let spacesStr (Str xs) = all isSpace xs
+ let spacesStr (Str xs) = T.all isSpace xs
spacesStr _ = False
if F.all spacesStr contents
then return mempty
@@ -385,7 +388,7 @@ encode = B.fromList . normalizeCode . B.toList . walk strToCode
strToCode x = x
normalizeCode [] = []
normalizeCode (Code a1 x : Code a2 y : zs) | a1 == a2 =
- normalizeCode $ Code a1 (x ++ y) : zs
+ normalizeCode $ Code a1 (x <> y) : zs
normalizeCode (x:xs) = x : normalizeCode xs
header :: PandocMonad m => MWParser m Blocks
@@ -400,22 +403,22 @@ header = try $ do
-- See #4731:
modifyIdentifier :: Attr -> Attr
modifyIdentifier (ident,cl,kv) = (ident',cl,kv)
- where ident' = map (\c -> if c == '-' then '_' else c) ident
+ where ident' = T.map (\c -> if c == '-' then '_' else c) ident
bulletList :: PandocMonad m => MWParser m Blocks
bulletList = B.bulletList <$>
( many1 (listItem '*')
- <|> (htmlTag (~== TagOpen "ul" []) *> spaces *> many (listItem '*' <|> li) <*
- optional (htmlTag (~== TagClose "ul"))) )
+ <|> (htmlTag (~== TagOpen ("ul" :: Text) []) *> spaces *> many (listItem '*' <|> li) <*
+ optional (htmlTag (~== TagClose ("ul" :: Text)))) )
orderedList :: PandocMonad m => MWParser m Blocks
orderedList =
(B.orderedList <$> many1 (listItem '#'))
<|> try
- (do (tag,_) <- htmlTag (~== TagOpen "ol" [])
+ (do (tag,_) <- htmlTag (~== TagOpen ("ol" :: Text) [])
spaces
items <- many (listItem '#' <|> li)
- optional (htmlTag (~== TagClose "ol"))
+ optional (htmlTag (~== TagClose ("ol" :: Text)))
let start = fromMaybe 1 $ safeRead $ fromAttrib "start" tag
return $ B.orderedListWith (start, DefaultStyle, DefaultDelim) items)
@@ -452,7 +455,7 @@ anyListStart :: PandocMonad m => MWParser m Char
anyListStart = guardColumnOne >> oneOf "*#:;"
li :: PandocMonad m => MWParser m Blocks
-li = lookAhead (htmlTag (~== TagOpen "li" [])) *>
+li = lookAhead (htmlTag (~== TagOpen ("li" :: Text) [])) *>
(firstParaToPlain <$> blocksInTags "li") <* spaces
listItem :: PandocMonad m => Char -> MWParser m Blocks
@@ -464,13 +467,13 @@ listItem c = try $ do
else do
skipMany spaceChar
pos' <- getPosition
- first <- concat <$> manyTill listChunk newline
+ first <- T.concat <$> manyTill listChunk newline
rest <- many
(try $ string extras *> lookAhead listStartChar *>
- (concat <$> manyTill listChunk newline))
+ (T.concat <$> manyTill listChunk newline))
contents <- parseFromString (do setPosition pos'
many1 $ listItem' c)
- (unlines (first : rest))
+ (T.unlines (first : rest))
case c of
'*' -> return $ B.bulletList contents
'#' -> return $ B.orderedList contents
@@ -484,20 +487,20 @@ listItem c = try $ do
-- }}
-- * next list item
-- which seems to be valid mediawiki.
-listChunk :: PandocMonad m => MWParser m String
-listChunk = template <|> count 1 anyChar
+listChunk :: PandocMonad m => MWParser m Text
+listChunk = template <|> countChar 1 anyChar
listItem' :: PandocMonad m => Char -> MWParser m Blocks
listItem' c = try $ do
listStart c
skipMany spaceChar
pos' <- getPosition
- first <- concat <$> manyTill listChunk newline
+ first <- T.concat <$> manyTill listChunk newline
rest <- many (try $ char c *> lookAhead listStartChar *>
- (concat <$> manyTill listChunk newline))
+ (T.concat <$> manyTill listChunk newline))
parseFromString (do setPosition pos'
firstParaToPlain . mconcat <$> many1 block)
- $ unlines $ first : rest
+ $ T.unlines $ first : rest
firstParaToPlain :: Blocks -> Blocks
firstParaToPlain contents =
@@ -528,23 +531,23 @@ inline = whitespace
<|> special
str :: PandocMonad m => MWParser m Inlines
-str = B.str <$> many1 (noneOf $ specialChars ++ spaceChars)
+str = B.str <$> many1Char (noneOf $ specialChars ++ spaceChars)
math :: PandocMonad m => MWParser m Inlines
-math = (B.displayMath . trim <$> try (many1 (char ':') >> charsInTags "math"))
- <|> (B.math . trim <$> charsInTags "math")
- <|> (B.displayMath . trim <$> try (dmStart *> manyTill anyChar dmEnd))
- <|> (B.math . trim <$> try (mStart *> manyTill (satisfy (/='\n')) mEnd))
+math = (B.displayMath . trim <$> try (many1 (char ':') >> textInTags "math"))
+ <|> (B.math . trim <$> textInTags "math")
+ <|> (B.displayMath . trim <$> try (dmStart *> manyTillChar anyChar dmEnd))
+ <|> (B.math . trim <$> try (mStart *> manyTillChar (satisfy (/='\n')) mEnd))
where dmStart = string "\\["
dmEnd = try (string "\\]")
mStart = string "\\("
mEnd = try (string "\\)")
-variable :: PandocMonad m => MWParser m String
+variable :: PandocMonad m => MWParser m Text
variable = try $ do
string "{{{"
- contents <- manyTill anyChar (try $ string "}}}")
- return $ "{{{" ++ contents ++ "}}}"
+ contents <- manyTillChar anyChar (try $ string "}}}")
+ return $ "{{{" <> contents <> "}}}"
inlineTag :: PandocMonad m => MWParser m Inlines
inlineTag = do
@@ -553,11 +556,11 @@ inlineTag = do
TagOpen "ref" _ -> B.note . B.plain <$> inlinesInTags "ref"
TagOpen "nowiki" _ -> try $ do
(_,raw) <- htmlTag (~== tag)
- if '/' `elem` raw
+ if T.any (== '/') raw
then return mempty
else B.text . fromEntities <$>
- manyTill anyChar (htmlTag (~== TagClose "nowiki"))
- TagOpen "br" _ -> B.linebreak <$ (htmlTag (~== TagOpen "br" []) -- will get /> too
+ manyTillChar anyChar (htmlTag (~== TagClose ("nowiki" :: Text)))
+ TagOpen "br" _ -> B.linebreak <$ (htmlTag (~== TagOpen ("br" :: Text) []) -- will get /> too
*> optional blankline)
TagOpen "strike" _ -> B.strikeout <$> inlinesInTags "strike"
TagOpen "del" _ -> B.strikeout <$> inlinesInTags "del"
@@ -570,12 +573,12 @@ inlineTag = do
result <- encode <$> inlinesInTags "tt"
updateState $ \st -> st{ mwInTT = inTT }
return result
- TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> charsInTags "hask"
+ TagOpen "hask" _ -> B.codeWith ("",["haskell"],[]) <$> textInTags "hask"
_ -> B.rawInline "html" . snd <$> htmlTag (~== tag)
special :: PandocMonad m => MWParser m Inlines
-special = B.str <$> count 1 (notFollowedBy' (htmlTag isBlockTag') *>
- oneOf specialChars)
+special = B.str <$> countChar 1 (notFollowedBy' (htmlTag isBlockTag') *>
+ oneOf specialChars)
inlineHtml :: PandocMonad m => MWParser m Inlines
inlineHtml = B.rawInline "html" . snd <$> htmlTag isInlineTag'
@@ -594,7 +597,7 @@ endline = () <$ try (newline <*
notFollowedBy anyListStart)
imageIdentifiers :: PandocMonad m => [MWParser m ()]
-imageIdentifiers = [sym (identifier ++ ":") | identifier <- identifiers]
+imageIdentifiers = [sym (identifier <> ":") | identifier <- identifiers]
where identifiers = ["File", "Image", "Archivo", "Datei", "Fichier",
"Bild"]
@@ -602,9 +605,9 @@ image :: PandocMonad m => MWParser m Inlines
image = try $ do
sym "[["
choice imageIdentifiers
- fname <- addUnderscores <$> many1 (noneOf "|]")
+ fname <- addUnderscores <$> many1Char (noneOf "|]")
_ <- many imageOption
- dims <- try (char '|' *> sepBy (many digit) (char 'x') <* string "px")
+ dims <- try (char '|' *> sepBy (manyChar digit) (char 'x') <* string "px")
<|> return []
_ <- many imageOption
let kvs = case dims of
@@ -614,9 +617,9 @@ image = try $ do
let attr = ("", [], kvs)
caption <- (B.str fname <$ sym "]]")
<|> try (char '|' *> (mconcat <$> manyTill inline (sym "]]")))
- return $ B.imageWith attr fname ("fig:" ++ stringify caption) caption
+ return $ B.imageWith attr fname ("fig:" <> stringify caption) caption
-imageOption :: PandocMonad m => MWParser m String
+imageOption :: PandocMonad m => MWParser m Text
imageOption = try $ char '|' *> opt
where
opt = try (oneOfStrings [ "border", "thumbnail", "frameless"
@@ -624,30 +627,27 @@ imageOption = try $ char '|' *> opt
, "center", "none", "baseline", "sub"
, "super", "top", "text-top", "middle"
, "bottom", "text-bottom" ])
- <|> try (string "frame")
+ <|> try (textStr "frame")
<|> try (oneOfStrings ["link=","alt=","page=","class="] <* many (noneOf "|]"))
-collapseUnderscores :: String -> String
-collapseUnderscores [] = []
-collapseUnderscores ('_':'_':xs) = collapseUnderscores ('_':xs)
-collapseUnderscores (x:xs) = x : collapseUnderscores xs
-
-addUnderscores :: String -> String
-addUnderscores = collapseUnderscores . intercalate "_" . words
+addUnderscores :: Text -> Text
+addUnderscores = T.intercalate "_" . splitTextBy sep
+ where
+ sep c = isSpace c || c == '_'
internalLink :: PandocMonad m => MWParser m Inlines
internalLink = try $ do
sym "[["
- pagename <- unwords . words <$> many (noneOf "|]")
+ pagename <- T.unwords . T.words <$> manyChar (noneOf "|]")
label <- option (B.text pagename) $ char '|' *>
( (mconcat <$> many1 (notFollowedBy (char ']') *> inline))
-- the "pipe trick"
-- [[Help:Contents|] -> "Contents"
- <|> return (B.text $ drop 1 $ dropWhile (/=':') pagename) )
+ <|> return (B.text $ T.drop 1 $ T.dropWhile (/=':') pagename) )
sym "]]"
- linktrail <- B.text <$> many letter
+ linktrail <- B.text <$> manyChar letter
let link = B.link (addUnderscores pagename) "wikilink" (label <> linktrail)
- if "Category:" `isPrefixOf` pagename
+ if "Category:" `T.isPrefixOf` pagename
then do
updateState $ \st -> st{ mwCategoryLinks = link : mwCategoryLinks st }
return mempty
@@ -662,7 +662,7 @@ externalLink = try $ do
<|> do char ']'
num <- mwNextLinkNumber <$> getState
updateState $ \st -> st{ mwNextLinkNumber = num + 1 }
- return $ B.str $ show num
+ return $ B.str $ tshow num
return $ B.link src "" lab
url :: PandocMonad m => MWParser m Inlines
diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs
index b8cbe2f26..4ade61294 100644
--- a/src/Text/Pandoc/Readers/Muse.hs
+++ b/src/Text/Pandoc/Readers/Muse.hs
@@ -1,6 +1,7 @@
{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Muse
Copyright : Copyright (C) 2017-2019 Alexander Krotov
@@ -24,12 +25,12 @@ import Control.Monad.Reader
import Control.Monad.Except (throwError)
import Data.Bifunctor
import Data.Default
-import Data.List (intercalate, transpose, uncons)
-import Data.List.Split (splitOn)
+import Data.List (transpose, uncons)
import qualified Data.Map as M
import qualified Data.Set as Set
import Data.Maybe (fromMaybe, isNothing, maybeToList)
-import Data.Text (Text, unpack)
+import Data.Text (Text)
+import qualified Data.Text as T
import Text.Pandoc.Builder (Blocks, Inlines)
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad (..))
@@ -38,7 +39,7 @@ import Text.Pandoc.Error (PandocError (PandocParsecError))
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (F)
-import Text.Pandoc.Shared (crFilter, trimr, underlineSpan)
+import Text.Pandoc.Shared (crFilter, trimr, underlineSpan, tshow)
-- | Read Muse from an input string and return a Pandoc document.
readMuse :: PandocMonad m
@@ -49,18 +50,18 @@ readMuse opts s = do
let input = crFilter s
res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } "source" input
case res of
- Left e -> throwError $ PandocParsecError (unpack input) e
+ Left e -> throwError $ PandocParsecError input e
Right d -> return d
type F = Future MuseState
data MuseState = MuseState { museMeta :: F Meta -- ^ Document metadata
, museOptions :: ReaderOptions
- , museIdentifierList :: Set.Set String
+ , museIdentifierList :: Set.Set Text
, museLastSpacePos :: Maybe SourcePos -- ^ Position after last space or newline parsed
, museLastStrPos :: Maybe SourcePos -- ^ Position after last str parsed
, museLogMessages :: [LogMessage]
- , museNotes :: M.Map String (SourcePos, F Blocks)
+ , museNotes :: M.Map Text (SourcePos, F Blocks)
}
instance Default MuseState where
@@ -116,22 +117,27 @@ parseMuse = do
-- * Utility functions
-- | Trim up to one newline from the beginning of the string.
-lchop :: String -> String
-lchop ('\n':xs) = xs
-lchop s = s
+lchop :: Text -> Text
+lchop s = case T.uncons s of
+ Just ('\n', xs) -> xs
+ _ -> s
-- | Trim up to one newline from the end of the string.
-rchop :: String -> String
-rchop = reverse . lchop . reverse
+rchop :: Text -> Text
+rchop s = case T.unsnoc s of
+ Just (xs, '\n') -> xs
+ _ -> s
-unindent :: String -> String
-unindent = rchop . intercalate "\n" . dropSpacePrefix . splitOn "\n" . lchop
+unindent :: Text -> Text
+unindent = rchop . T.intercalate "\n" . dropSpacePrefix . T.splitOn "\n" . lchop
-dropSpacePrefix :: [String] -> [String]
-dropSpacePrefix lns = drop maxIndent <$> lns
+dropSpacePrefix :: [Text] -> [Text]
+dropSpacePrefix lns = T.drop maxIndent <$> lns
where isSpaceChar c = c == ' ' || c == '\t'
- maxIndent = length $ takeWhile (isSpaceChar . head) $ takeWhile same $ transpose lns
- same = and . (zipWith (==) <*> drop 1)
+ maxIndent = length $ takeWhile (isSpaceChar . T.head) $ takeWhile same $ T.transpose lns
+ same t = case T.uncons t of
+ Just (c, cs) -> T.all (== c) cs
+ Nothing -> True
atStart :: PandocMonad m => MuseParser m ()
atStart = do
@@ -160,29 +166,29 @@ getIndent = subtract 1 . sourceColumn <$ many spaceChar <*> getPosition
-- ** HTML parsers
-openTag :: PandocMonad m => String -> MuseParser m [(String, String)]
+openTag :: PandocMonad m => Text -> MuseParser m [(Text, Text)]
openTag tag = try $
- char '<' *> string tag *> manyTill attr (char '>')
+ char '<' *> textStr tag *> manyTill attr (char '>')
where
attr = try $ (,)
<$ many1 spaceChar
- <*> many1 (noneOf "=\n")
+ <*> many1Char (noneOf "=\n")
<* string "=\""
- <*> manyTill (noneOf "\"") (char '"')
+ <*> manyTillChar (noneOf "\"") (char '"')
-closeTag :: PandocMonad m => String -> MuseParser m ()
-closeTag tag = try $ string "</" *> string tag *> void (char '>')
+closeTag :: PandocMonad m => Text -> MuseParser m ()
+closeTag tag = try $ string "</" *> textStr tag *> void (char '>')
-- | Convert HTML attributes to Pandoc 'Attr'
-htmlAttrToPandoc :: [(String, String)] -> Attr
+htmlAttrToPandoc :: [(Text, Text)] -> Attr
htmlAttrToPandoc attrs = (ident, classes, keyvals)
where
ident = fromMaybe "" $ lookup "id" attrs
- classes = maybe [] words $ lookup "class" attrs
+ classes = maybe [] T.words $ lookup "class" attrs
keyvals = [(k,v) | (k,v) <- attrs, k /= "id", k /= "class"]
parseHtmlContent :: PandocMonad m
- => String -- ^ Tag name
+ => Text -- ^ Tag name
-> MuseParser m (Attr, F Blocks)
parseHtmlContent tag = try $ getIndent >>= \indent -> (,)
<$> fmap htmlAttrToPandoc (openTag tag)
@@ -193,16 +199,16 @@ parseHtmlContent tag = try $ getIndent >>= \indent -> (,)
-- ** Directive parsers
-- While not documented, Emacs Muse allows "-" in directive name
-parseDirectiveKey :: PandocMonad m => MuseParser m String
-parseDirectiveKey = char '#' *> many (letter <|> char '-')
+parseDirectiveKey :: PandocMonad m => MuseParser m Text
+parseDirectiveKey = char '#' *> manyChar (letter <|> char '-')
-parseEmacsDirective :: PandocMonad m => MuseParser m (String, F Inlines)
+parseEmacsDirective :: PandocMonad m => MuseParser m (Text, F Inlines)
parseEmacsDirective = (,)
<$> parseDirectiveKey
<* spaceChar
<*> (trimInlinesF . mconcat <$> manyTill inline' eol)
-parseAmuseDirective :: PandocMonad m => MuseParser m (String, F Inlines)
+parseAmuseDirective :: PandocMonad m => MuseParser m (Text, F Inlines)
parseAmuseDirective = (,)
<$> parseDirectiveKey
<* many1 spaceChar
@@ -289,7 +295,7 @@ listItemContentsUntil col pre end = p
parseBlock :: PandocMonad m => MuseParser m (F Blocks)
parseBlock = do
res <- blockElements <|> para
- trace (take 60 $ show $ B.toList $ runF res def)
+ trace (T.take 60 $ tshow $ B.toList $ runF res def)
return res
where para = fst <$> paraUntil (try (eof <|> void (lookAhead blockElements)))
@@ -337,7 +343,7 @@ pagebreak = try $ pure (B.divWith ("", [], [("style", "page-break-before: always
<* string "* * * * *"
<* manyTill spaceChar eol
-headingStart :: PandocMonad m => MuseParser m (String, Int)
+headingStart :: PandocMonad m => MuseParser m (Text, Int)
headingStart = try $ (,)
<$> option "" (try (parseAnchor <* manyTill spaceChar eol))
<* firstColumn
@@ -371,14 +377,14 @@ example :: PandocMonad m => MuseParser m (F Blocks)
example = try $ pure . B.codeBlock
<$ string "{{{"
<* many spaceChar
- <*> (unindent <$> manyTill anyChar (string "}}}"))
+ <*> (unindent <$> manyTillChar anyChar (string "}}}"))
-- | Parse an @\<example>@ tag.
exampleTag :: PandocMonad m => MuseParser m (F Blocks)
exampleTag = try $ fmap pure $ B.codeBlockWith
<$ many spaceChar
<*> (htmlAttrToPandoc <$> openTag "example")
- <*> (unindent <$> manyTill anyChar (closeTag "example"))
+ <*> (unindent <$> manyTillChar anyChar (closeTag "example"))
<* manyTill spaceChar eol
-- | Parse a @\<literal>@ tag as a raw block.
@@ -388,7 +394,7 @@ literalTag = try $ fmap pure $ B.rawBlock
<$ many spaceChar
<*> (fromMaybe "html" . lookup "style" <$> openTag "literal") -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML
<* manyTill spaceChar eol
- <*> (unindent <$> manyTill anyChar (closeTag "literal"))
+ <*> (unindent <$> manyTillChar anyChar (closeTag "literal"))
<* manyTill spaceChar eol
-- | Parse @\<center>@ tag.
@@ -428,7 +434,7 @@ playTag = do
verseLine :: PandocMonad m => MuseParser m (F Inlines)
verseLine = (<>)
- <$> fmap pure (option mempty (B.str <$> many1 ('\160' <$ char ' ')))
+ <$> fmap pure (option mempty (B.str <$> many1Char ('\160' <$ char ' ')))
<*> fmap (trimInlinesF . mconcat) (manyTill inline' eol)
-- | Parse @\<verse>@ tag.
@@ -466,17 +472,17 @@ paraUntil end = do
noteMarker' :: PandocMonad m
=> Char
-> Char
- -> MuseParser m String
-noteMarker' l r = try $ (\x y -> l:x:y ++ [r])
+ -> MuseParser m Text
+noteMarker' l r = try $ (\x y -> T.pack $ l:x:y ++ [r])
<$ char l
<*> oneOf "123456789"
<*> manyTill digit (char r)
-noteMarker :: PandocMonad m => MuseParser m String
+noteMarker :: PandocMonad m => MuseParser m Text
noteMarker = noteMarker' '[' ']' <|> noteMarker' '{' '}'
addNote :: PandocMonad m
- => String
+ => Text
-> SourcePos
-> F Blocks
-> MuseParser m ()
@@ -674,15 +680,15 @@ museGridTableRow :: PandocMonad m
-> MuseParser m (F [Blocks])
museGridTableRow indent indices = try $ do
lns <- many1 $ try (indentWith indent *> museGridTableRawLine indices)
- let cols = map (unlines . map trimr) $ transpose lns
+ let cols = map (T.unlines . map trimr) $ transpose lns
indentWith indent *> museGridTableHeader
sequence <$> mapM (parseFromString' parseBlocks) cols
museGridTableRawLine :: PandocMonad m
=> [Int]
- -> MuseParser m [String]
+ -> MuseParser m [Text]
museGridTableRawLine indices =
- char '|' *> forM indices (\n -> count n anyChar <* char '|') <* manyTill spaceChar eol
+ char '|' *> forM indices (\n -> countChar n anyChar <* char '|') <* manyTill spaceChar eol
museGridTable :: PandocMonad m => MuseParser m (F Blocks)
museGridTable = try $ do
@@ -767,12 +773,12 @@ inline = endline <|> inline'
endline :: PandocMonad m => MuseParser m (F Inlines)
endline = try $ pure B.softbreak <$ newline <* notFollowedBy blankline <* updateLastSpacePos
-parseAnchor :: PandocMonad m => MuseParser m String
-parseAnchor = try $ (:)
+parseAnchor :: PandocMonad m => MuseParser m Text
+parseAnchor = try $ T.cons
<$ firstColumn
<* char '#'
<*> letter
- <*> many (letter <|> digit <|> char '-')
+ <*> manyChar (letter <|> digit <|> char '-')
anchor :: PandocMonad m => MuseParser m (F Inlines)
anchor = try $ do
@@ -813,7 +819,7 @@ emphasisBetween p = try $ trimInlinesF . mconcat
-- | Parse an inline tag, such as @\<em>@ and @\<strong>@.
inlineTag :: PandocMonad m
- => String -- ^ Tag name
+ => Text -- ^ Tag name
-> MuseParser m (F Inlines)
inlineTag tag = try $ mconcat
<$ openTag tag
@@ -862,12 +868,12 @@ strikeoutTag = fmap B.strikeout <$> inlineTag "del"
verbatimTag :: PandocMonad m => MuseParser m (F Inlines)
verbatimTag = return . B.text
<$ openTag "verbatim"
- <*> manyTill anyChar (closeTag "verbatim")
+ <*> manyTillChar anyChar (closeTag "verbatim")
-- | Parse @\<class>@ tag.
classTag :: PandocMonad m => MuseParser m (F Inlines)
classTag = do
- classes <- maybe [] words . lookup "name" <$> openTag "class"
+ classes <- maybe [] T.words . lookup "name" <$> openTag "class"
fmap (B.spanWith ("", classes, [])) . mconcat <$> manyTill inline (closeTag "class")
-- | Parse @\<\<\<RTL>>>@ text.
@@ -886,43 +892,43 @@ nbsp = try $ pure (B.str "\160") <$ string "~~"
-- | Parse code markup, indicated by @\'=\'@ characters.
code :: PandocMonad m => MuseParser m (F Inlines)
-code = try $ fmap pure $ B.code . uncurry (++)
+code = try $ fmap pure $ B.code . uncurry (<>)
<$ atStart
<* char '='
<* notFollowedBy (spaceChar <|> newline)
- <*> manyUntil (noneOf "\n\r" <|> (newline <* notFollowedBy newline)) (try $ fmap pure $ noneOf " \t\n\r=" <* char '=')
+ <*> manyUntilChar (noneOf "\n\r" <|> (newline <* notFollowedBy newline)) (try $ fmap T.singleton $ noneOf " \t\n\r=" <* char '=')
<* notFollowedBy alphaNum
-- | Parse @\<code>@ tag.
codeTag :: PandocMonad m => MuseParser m (F Inlines)
codeTag = fmap pure $ B.codeWith
<$> (htmlAttrToPandoc <$> openTag "code")
- <*> manyTill anyChar (closeTag "code")
+ <*> manyTillChar anyChar (closeTag "code")
-- | Parse @\<math>@ tag.
-- @\<math>@ tag is an Emacs Muse extension enabled by @(require 'muse-latex2png)@
mathTag :: PandocMonad m => MuseParser m (F Inlines)
mathTag = return . B.math
<$ openTag "math"
- <*> manyTill anyChar (closeTag "math")
+ <*> manyTillChar anyChar (closeTag "math")
-- | Parse inline @\<literal>@ tag as a raw inline.
inlineLiteralTag :: PandocMonad m => MuseParser m (F Inlines)
inlineLiteralTag = try $ fmap pure $ B.rawInline
<$> (fromMaybe "html" . lookup "style" <$> openTag "literal") -- FIXME: Emacs Muse inserts <literal> without style into all output formats, but we assume HTML
- <*> manyTill anyChar (closeTag "literal")
+ <*> manyTillChar anyChar (closeTag "literal")
str :: PandocMonad m => MuseParser m (F Inlines)
-str = return . B.str <$> many1 alphaNum <* updateLastStrPos
+str = return . B.str <$> many1Char alphaNum <* updateLastStrPos
-- | Consume asterisks that were not used as emphasis opening.
-- This prevents series of asterisks from being split into
-- literal asterisk and emphasis opening.
asterisks :: PandocMonad m => MuseParser m (F Inlines)
-asterisks = pure . B.str <$> many1 (char '*')
+asterisks = pure . B.str <$> many1Char (char '*')
symbol :: PandocMonad m => MuseParser m (F Inlines)
-symbol = pure . B.str . pure <$> nonspaceChar
+symbol = pure . B.str . T.singleton <$> nonspaceChar
-- | Parse a link or image.
linkOrImage :: PandocMonad m => MuseParser m (F Inlines)
@@ -934,12 +940,12 @@ linkContent = trimInlinesF . mconcat
<*> manyTill inline (char ']')
-- | Parse a link starting with (possibly null) prefix
-link :: PandocMonad m => String -> MuseParser m (F Inlines)
+link :: PandocMonad m => Text -> MuseParser m (F Inlines)
link prefix = try $ do
inLink <- asks museInLink
guard $ not inLink
- string $ "[[" ++ prefix
- url <- manyTill anyChar $ char ']'
+ textStr $ "[[" <> prefix
+ url <- manyTillChar anyChar $ char ']'
content <- option (pure $ B.str url) (local (\s -> s { museInLink = True }) linkContent)
char ']'
return $ B.link url "" <$> content
@@ -947,27 +953,27 @@ link prefix = try $ do
image :: PandocMonad m => MuseParser m (F Inlines)
image = try $ do
string "[["
- (url, (ext, width, align)) <- manyUntil (noneOf "]") (imageExtensionAndOptions <* char ']')
+ (url, (ext, width, align)) <- manyUntilChar (noneOf "]") (imageExtensionAndOptions <* char ']')
content <- option mempty linkContent
char ']'
let widthAttr = case align of
- Just 'f' -> [("width", fromMaybe "100" width ++ "%"), ("height", "75%")]
- _ -> maybeToList (("width",) . (++ "%") <$> width)
+ Just 'f' -> [("width", fromMaybe "100" width <> "%"), ("height", "75%")]
+ _ -> maybeToList (("width",) . (<> "%") <$> width)
let alignClass = case align of
Just 'r' -> ["align-right"]
Just 'l' -> ["align-left"]
Just 'f' -> []
_ -> []
- return $ B.imageWith ("", alignClass, widthAttr) (url ++ ext) mempty <$> content
+ return $ B.imageWith ("", alignClass, widthAttr) (url <> ext) mempty <$> content
where -- Taken from muse-image-regexp defined in Emacs Muse file lisp/muse-regexps.el
imageExtensions = [".eps", ".gif", ".jpg", ".jpeg", ".pbm", ".png", ".tiff", ".xbm", ".xpm"]
- imageExtension = choice (try . string <$> imageExtensions)
+ imageExtension = choice (try . textStr <$> imageExtensions)
imageExtensionAndOptions = do
ext <- imageExtension
(width, align) <- option (Nothing, Nothing) imageAttrs
return (ext, width, align)
imageAttrs = (,)
<$ many1 spaceChar
- <*> optionMaybe (many1 digit)
+ <*> optionMaybe (many1Char digit)
<* many spaceChar
<*> optionMaybe (oneOf "rlf")
diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs
index 9e3c118d8..34d3c5e8f 100644
--- a/src/Text/Pandoc/Readers/Native.hs
+++ b/src/Text/Pandoc/Readers/Native.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Native
Copyright : Copyright (C) 2011-2019 John MacFarlane
@@ -19,7 +20,7 @@ import Text.Pandoc.Options (ReaderOptions)
import Text.Pandoc.Shared (safeRead)
import Control.Monad.Except (throwError)
-import Data.Text (Text, unpack)
+import Data.Text (Text)
import Text.Pandoc.Class
import Text.Pandoc.Error
@@ -38,18 +39,18 @@ readNative :: PandocMonad m
-> Text -- ^ String to parse (assuming @'\n'@ line endings)
-> m Pandoc
readNative _ s =
- case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead (unpack s)) of
+ case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of
Right doc -> return doc
Left _ -> throwError $ PandocParseError "couldn't read native"
readBlocks :: Text -> Either PandocError [Block]
-readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead (unpack s))
+readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s)
readBlock :: Text -> Either PandocError Block
-readBlock s = maybe (Plain <$> readInlines s) Right (safeRead (unpack s))
+readBlock s = maybe (Plain <$> readInlines s) Right (safeRead s)
readInlines :: Text -> Either PandocError [Inline]
-readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead (unpack s))
+readInlines s = maybe ((:[]) <$> readInline s) Right (safeRead s)
readInline :: Text -> Either PandocError Inline
-readInline s = maybe (Left . PandocParseError $ "Could not read: " ++ unpack s) Right (safeRead (unpack s))
+readInline s = maybe (Left . PandocParseError $ "Could not read: " <> s) Right (safeRead s)
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index 2c3b0367f..5330b0238 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.OPML
Copyright : Copyright (C) 2013-2019 John MacFarlane
@@ -18,7 +19,8 @@ import Data.Char (toUpper)
import Data.Default
import Data.Generics
import Data.Maybe (fromMaybe)
-import Data.Text (Text, pack, unpack)
+import Data.Text (Text)
+import qualified Data.Text as T
import Text.HTML.TagSoup.Entity (lookupEntity)
import Text.Pandoc.Builder
import Text.Pandoc.Class (PandocMonad)
@@ -50,7 +52,7 @@ readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
readOPML opts inp = do
(bs, st') <- runStateT
(mapM parseBlock $ normalizeTree $
- parseXML (unpack (crFilter inp))) def{ opmlOptions = opts }
+ parseXML (T.unpack (crFilter inp))) def{ opmlOptions = opts }
return $
setTitle (opmlDocTitle st') $
setAuthors (opmlDocAuthors st') $
@@ -76,23 +78,26 @@ convertEntity :: String -> String
convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
-- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> String
+attrValue :: String -> Element -> Text
attrValue attr elt =
- fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
+ maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
+
+textContent :: Element -> Text
+textContent = T.pack . strContent
-- exceptT :: PandocMonad m => Either PandocError a -> OPML m a
-- exceptT = either throwError return
-asHtml :: PandocMonad m => String -> OPML m Inlines
+asHtml :: PandocMonad m => Text -> OPML m Inlines
asHtml s = do
opts <- gets opmlOptions
- Pandoc _ bs <- readHtml def{ readerExtensions = readerExtensions opts } (pack s)
+ Pandoc _ bs <- readHtml def{ readerExtensions = readerExtensions opts } s
return $ blocksToInlines' bs
-asMarkdown :: PandocMonad m => String -> OPML m Blocks
+asMarkdown :: PandocMonad m => Text -> OPML m Blocks
asMarkdown s = do
opts <- gets opmlOptions
- Pandoc _ bs <- readMarkdown def{ readerExtensions = readerExtensions opts } (pack s)
+ Pandoc _ bs <- readMarkdown def{ readerExtensions = readerExtensions opts } s
return $ fromList bs
getBlocks :: PandocMonad m => Element -> OPML m Blocks
@@ -102,11 +107,11 @@ parseBlock :: PandocMonad m => Content -> OPML m Blocks
parseBlock (Elem e) =
case qName (elName e) of
"ownerName" -> mempty <$ modify (\st ->
- st{opmlDocAuthors = [text $ strContent e]})
+ st{opmlDocAuthors = [text $ textContent e]})
"dateModified" -> mempty <$ modify (\st ->
- st{opmlDocDate = text $ strContent e})
+ st{opmlDocDate = text $ textContent e})
"title" -> mempty <$ modify (\st ->
- st{opmlDocTitle = text $ strContent e})
+ st{opmlDocTitle = text $ textContent e})
"outline" -> gets opmlSectionLevel >>= sect . (+1)
"?xml" -> return mempty
_ -> getBlocks e
@@ -115,7 +120,7 @@ parseBlock (Elem e) =
modify $ \st -> st{ opmlSectionLevel = n }
bs <- getBlocks e
modify $ \st -> st{ opmlSectionLevel = n - 1 }
- let headerText' = case map toUpper (attrValue "type" e) of
+ let headerText' = case T.toUpper (attrValue "type" e) of
"LINK" -> link
(attrValue "url" e) "" headerText
_ -> headerText
diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs
index dfa019932..f9b78d5bf 100644
--- a/src/Text/Pandoc/Readers/Odt.hs
+++ b/src/Text/Pandoc/Readers/Odt.hs
@@ -1,5 +1,6 @@
{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Reader.Odt
Copyright : Copyright (C) 2015 Martin Linnemann
diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
index d8e5ba272..ff8cdc5fa 100644
--- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
@@ -1,11 +1,12 @@
{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE Arrows #-}
-{-# LANGUAGE DeriveFoldable #-}
+{-# LANGUAGE Arrows #-}
+{-# LANGUAGE DeriveFoldable #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-{-# LANGUAGE PatternGuards #-}
-{-# LANGUAGE RecordWildCards #-}
-{-# LANGUAGE TupleSections #-}
-{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE PatternGuards #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Odt.ContentReader
Copyright : Copyright (C) 2015 Martin Linnemann
@@ -29,8 +30,9 @@ import Control.Arrow
import qualified Data.ByteString.Lazy as B
import Data.Foldable (fold)
-import Data.List (find, intercalate, stripPrefix)
+import Data.List (find, stripPrefix)
import qualified Data.Map as M
+import qualified Data.Text as T
import Data.Maybe
import Data.Semigroup (First(..), Option(..))
@@ -59,7 +61,7 @@ import qualified Data.Set as Set
-- State
--------------------------------------------------------------------------------
-type Anchor = String
+type Anchor = T.Text
type Media = [(FilePath, B.ByteString)]
data ReaderState
@@ -204,21 +206,21 @@ updateMediaWithResource = keepingTheValue (
)
>>^ fst
-lookupResource :: OdtReaderSafe String (FilePath, B.ByteString)
+lookupResource :: OdtReaderSafe FilePath (FilePath, B.ByteString)
lookupResource = proc target -> do
state <- getExtraState -< ()
case lookup target (getMediaEnv state) of
Just bs -> returnV (target, bs) -<< ()
Nothing -> returnV ("", B.empty) -< ()
-type AnchorPrefix = String
+type AnchorPrefix = T.Text
-- | An adaptation of 'uniqueIdent' from "Text.Pandoc.Shared" that generates a
-- unique identifier but without assuming that the id should be for a header.
-- Second argument is a list of already used identifiers.
uniqueIdentFrom :: AnchorPrefix -> [Anchor] -> Anchor
uniqueIdentFrom baseIdent usedIdents =
- let numIdent n = baseIdent ++ "-" ++ show n
+ let numIdent n = baseIdent <> "-" <> T.pack (show n)
in if baseIdent `elem` usedIdents
then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of
Just x -> numIdent x
@@ -305,7 +307,7 @@ withNewStyle a = proc x -> do
isCodeStyle _ = False
inlineCode :: Inlines -> Inlines
- inlineCode = code . intercalate "" . map stringify . toList
+ inlineCode = code . T.concat . map stringify . toList
type PropertyTriple = (ReaderState, TextProperties, Maybe StyleFamily)
type InlineModifier = Inlines -> Inlines
@@ -535,7 +537,6 @@ matchChildContent :: (Monoid result)
-> OdtReaderSafe _x result
matchChildContent ls fallback = returnV mempty >>> matchContent ls fallback
-
--------------------------------------------
-- Matchers
--------------------------------------------
@@ -556,8 +557,8 @@ read_plain_text = fst ^&&& read_plain_text' >>% recover
)
>>?% mappend
--
- extractText :: XML.Content -> Fallible String
- extractText (XML.Text cData) = succeedWith (XML.cdData cData)
+ extractText :: XML.Content -> Fallible T.Text
+ extractText (XML.Text cData) = succeedWith (T.pack $ XML.cdData cData)
extractText _ = failEmpty
read_text_seq :: InlineMatcher
@@ -675,8 +676,8 @@ read_list_item = matchingElement NsText "list-item"
read_link :: InlineMatcher
read_link = matchingElement NsText "a"
$ liftA3 link
- ( findAttrWithDefault NsXLink "href" "" )
- ( findAttrWithDefault NsOffice "title" "" )
+ ( findAttrTextWithDefault NsXLink "href" "" )
+ ( findAttrTextWithDefault NsOffice "title" "" )
( matchChildContent [ read_span
, read_note
, read_citation
@@ -709,12 +710,12 @@ read_citation :: InlineMatcher
read_citation = matchingElement NsText "bibliography-mark"
$ liftA2 cite
( liftA2 makeCitation
- ( findAttrWithDefault NsText "identifier" "" )
+ ( findAttrTextWithDefault NsText "identifier" "" )
( readAttrWithDefault NsText "number" 0 )
)
( matchChildContent [] read_plain_text )
where
- makeCitation :: String -> Int -> [Citation]
+ makeCitation :: T.Text -> Int -> [Citation]
makeCitation citeId num = [Citation citeId [] [] NormalCitation num 0]
@@ -779,17 +780,17 @@ read_frame_img =
let exts = extensionsFromList [Ext_auto_identifiers]
resource <- lookupResource -< src'
_ <- updateMediaWithResource -< resource
- w <- findAttr' NsSVG "width" -< ()
- h <- findAttr' NsSVG "height" -< ()
+ w <- findAttrText' NsSVG "width" -< ()
+ h <- findAttrText' NsSVG "height" -< ()
titleNodes <- matchChildContent' [ read_frame_title ] -< ()
alt <- matchChildContent [] read_plain_text -< ()
arr (firstMatch . uncurry4 imageWith) -<
- (image_attributes w h, src', inlineListToIdentifier exts (toList titleNodes), alt)
+ (image_attributes w h, T.pack src', inlineListToIdentifier exts (toList titleNodes), alt)
read_frame_title :: InlineMatcher
read_frame_title = matchingElement NsSVG "title" (matchChildContent [] read_plain_text)
-image_attributes :: Maybe String -> Maybe String -> Attr
+image_attributes :: Maybe T.Text -> Maybe T.Text -> Attr
image_attributes x y =
( "", [], (dim "width" x) ++ (dim "height" y))
where
@@ -806,7 +807,7 @@ read_frame_mathml =
src' -> do
let path = fromMaybe src' (stripPrefix "./" src') ++ "/content.xml"
(_, mathml) <- lookupResource -< path
- case readMathML (UTF8.toString $ B.toStrict mathml) of
+ case readMathML (UTF8.toText $ B.toStrict mathml) of
Left _ -> returnV mempty -< ()
Right exps -> arr (firstMatch . displayMath . writeTeX) -< exps
@@ -817,9 +818,9 @@ read_frame_text_box = proc box -> do
read_img_with_caption :: [Block] -> FirstMatch Inlines
read_img_with_caption (Para [Image attr alt (src,title)] : _) =
- firstMatch $ singleton (Image attr alt (src, 'f':'i':'g':':':title)) -- no text, default caption
+ firstMatch $ singleton (Image attr alt (src, "fig:" <> title)) -- no text, default caption
read_img_with_caption (Para (Image attr _ (src,title) : txt) : _) =
- firstMatch $ singleton (Image attr txt (src, 'f':'i':'g':':':title) ) -- override caption with the text that follows
+ firstMatch $ singleton (Image attr txt (src, "fig:" <> title) ) -- override caption with the text that follows
read_img_with_caption ( Para (_ : xs) : ys) =
read_img_with_caption (Para xs : ys)
read_img_with_caption _ =
@@ -829,12 +830,12 @@ read_img_with_caption _ =
-- Internal links
----------------------
-_ANCHOR_PREFIX_ :: String
+_ANCHOR_PREFIX_ :: T.Text
_ANCHOR_PREFIX_ = "anchor"
--
readAnchorAttr :: OdtReader _x Anchor
-readAnchorAttr = findAttr NsText "name"
+readAnchorAttr = findAttrText NsText "name"
-- | Beware: may fail
findAnchorName :: OdtReader AnchorPrefix Anchor
@@ -875,7 +876,7 @@ read_reference_start = matchingElement NsText "reference-mark-start"
-- | Beware: may fail
findAnchorRef :: OdtReader _x Anchor
-findAnchorRef = ( findAttr NsText "ref-name"
+findAnchorRef = ( findAttrText NsText "ref-name"
>>?^ (_ANCHOR_PREFIX_,)
) >>?! getPrettyAnchor
@@ -890,7 +891,7 @@ maybeInAnchorRef = proc inlines -> do
Left _ -> returnA -< inlines
where
toAnchorRef :: Anchor -> Inlines -> Inlines
- toAnchorRef anchor = link ('#':anchor) "" -- no title
+ toAnchorRef anchor = link ("#" <> anchor) "" -- no title
--
read_bookmark_ref :: InlineMatcher
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
index ccbaf6fc4..59d1b8abd 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
@@ -38,8 +38,11 @@ module Text.Pandoc.Readers.Odt.Generic.XMLConverter
, lookupAttr'
, lookupDefaultingAttr
, findAttr'
+, findAttrText'
, findAttr
+, findAttrText
, findAttrWithDefault
+, findAttrTextWithDefault
, readAttr
, readAttr'
, readAttrWithDefault
@@ -59,6 +62,7 @@ import Control.Arrow
import Data.Either ( rights )
import qualified Data.Map as M
+import qualified Data.Text as T
import Data.Default
import Data.Maybe
@@ -79,6 +83,7 @@ import Text.Pandoc.Readers.Odt.Generic.Fallible
type ElementName = String
type AttributeName = String
type AttributeValue = String
+type TextAttributeValue = T.Text
--
type NameSpacePrefix = String
@@ -466,6 +471,16 @@ findAttr' nsID attrName = qualifyName nsID attrName
&&& getCurrentElement
>>% XML.findAttr
+-- | Return value as a (Maybe Text)
+findAttrText' :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> XMLConverter nsID extraState x (Maybe TextAttributeValue)
+findAttrText' nsID attrName
+ = qualifyName nsID attrName
+ &&& getCurrentElement
+ >>% XML.findAttr
+ >>^ fmap T.pack
+
-- | Return value as string or fail
findAttr :: (NameSpaceID nsID)
=> nsID -> AttributeName
@@ -473,6 +488,15 @@ findAttr :: (NameSpaceID nsID)
findAttr nsID attrName = findAttr' nsID attrName
>>> maybeToChoice
+-- | Return value as text or fail
+findAttrText :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> FallibleXMLConverter nsID extraState x TextAttributeValue
+findAttrText nsID attrName
+ = findAttr' nsID attrName
+ >>^ fmap T.pack
+ >>> maybeToChoice
+
-- | Return value as string or return provided default value
findAttrWithDefault :: (NameSpaceID nsID)
=> nsID -> AttributeName
@@ -482,6 +506,15 @@ findAttrWithDefault nsID attrName deflt
= findAttr' nsID attrName
>>^ fromMaybe deflt
+-- | Return value as string or return provided default value
+findAttrTextWithDefault :: (NameSpaceID nsID)
+ => nsID -> AttributeName
+ -> TextAttributeValue
+ -> XMLConverter nsID extraState x TextAttributeValue
+findAttrTextWithDefault nsID attrName deflt
+ = findAttr' nsID attrName
+ >>^ maybe deflt T.pack
+
-- | Read and return value or fail
readAttr :: (NameSpaceID nsID, Read attrValue)
=> nsID -> AttributeName
diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
index 79e8d7aea..99fa05880 100644
--- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
@@ -548,11 +548,11 @@ readListLevelStyle :: ListLevelType -> StyleReader _x (Int, ListLevelStyle)
readListLevelStyle levelType = readAttr NsText "level"
>>?! keepingTheValue
( liftA5 toListLevelStyle
- ( returnV levelType )
- ( findAttr' NsStyle "num-prefix" )
- ( findAttr' NsStyle "num-suffix" )
- ( getAttr NsStyle "num-format" )
- ( findAttr' NsText "start-value" )
+ ( returnV levelType )
+ ( findAttr' NsStyle "num-prefix" )
+ ( findAttr' NsStyle "num-suffix" )
+ ( getAttr NsStyle "num-format" )
+ ( findAttrText' NsText "start-value" )
)
where
toListLevelStyle _ p s LinfNone b = ListLevelStyle LltBullet p s LinfNone (startValue b)
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 2c88c7776..99ece152c 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org
Copyright : Copyright (C) 2014-2019 Albert Krewinkel
@@ -26,7 +27,6 @@ import Control.Monad.Except (throwError)
import Control.Monad.Reader (runReaderT)
import Data.Text (Text)
-import qualified Data.Text as T
-- | Parse org-mode string and return a Pandoc document.
readOrg :: PandocMonad m
@@ -36,7 +36,7 @@ readOrg :: PandocMonad m
readOrg opts s = do
parsed <- flip runReaderT def $
readWithM parseOrg (optionsToParserState opts)
- (T.unpack (crFilter s) ++ "\n\n")
+ (crFilter s <> "\n\n")
case parsed of
Right result -> return result
Left _ -> throwError $ PandocParseError "problem parsing org"
diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
index 58db4f46c..b4f3cc0d8 100644
--- a/src/Text/Pandoc/Readers/Org/BlockStarts.hs
+++ b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.BlockStarts
Copyright : Copyright (C) 2014-2019 Albert Krewinkel
@@ -25,6 +26,8 @@ module Text.Pandoc.Readers.Org.BlockStarts
import Prelude
import Control.Monad (void)
+import Data.Text (Text)
+import qualified Data.Text as T
import Text.Pandoc.Readers.Org.Parsing
-- | Horizontal Line (five -- dashes or more)
@@ -49,15 +52,15 @@ gridTableStart :: Monad m => OrgParser m ()
gridTableStart = try $ skipSpaces <* char '+' <* char '-'
-latexEnvStart :: Monad m => OrgParser m String
+latexEnvStart :: Monad m => OrgParser m Text
latexEnvStart = try $
skipSpaces *> string "\\begin{"
*> latexEnvName
<* string "}"
<* blankline
where
- latexEnvName :: Monad m => OrgParser m String
- latexEnvName = try $ mappend <$> many1 alphaNum <*> option "" (string "*")
+ latexEnvName :: Monad m => OrgParser m Text
+ latexEnvName = try $ mappend <$> many1Char alphaNum <*> option "" (textStr "*")
bulletListStart :: Monad m => OrgParser m Int
bulletListStart = try $ do
@@ -68,7 +71,7 @@ bulletListStart = try $ do
return (ind + 1)
genericListStart :: Monad m
- => OrgParser m String
+ => OrgParser m Text
-> OrgParser m Int
genericListStart listMarker = try $ do
ind <- length <$> many spaceChar
@@ -82,11 +85,11 @@ eol = void (char '\n')
orderedListStart :: Monad m => OrgParser m Int
orderedListStart = genericListStart orderedListMarker
-- Ordered list markers allowed in org-mode
- where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
+ where orderedListMarker = T.snoc <$> many1Char digit <*> oneOf ".)"
-drawerStart :: Monad m => OrgParser m String
+drawerStart :: Monad m => OrgParser m Text
drawerStart = try $ skipSpaces *> drawerName <* skipSpaces <* newline
- where drawerName = char ':' *> manyTill nonspaceChar (char ':')
+ where drawerName = char ':' *> manyTillChar nonspaceChar (char ':')
metaLineStart :: Monad m => OrgParser m ()
metaLineStart = try $ skipSpaces <* string "#+"
@@ -99,12 +102,12 @@ commentLineStart = try $
exampleLineStart :: Monad m => OrgParser m ()
exampleLineStart = () <$ try (skipSpaces *> string ": ")
-noteMarker :: Monad m => OrgParser m String
+noteMarker :: Monad m => OrgParser m Text
noteMarker = try $ do
char '['
- choice [ many1Till digit (char ']')
- , (++) <$> string "fn:"
- <*> many1Till (noneOf "\n\r\t ") (char ']')
+ choice [ many1TillChar digit (char ']')
+ , (<>) <$> textStr "fn:"
+ <*> many1TillChar (noneOf "\n\r\t ") (char ']')
]
-- | Succeeds if the parser is at the end of a block.
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index cba876f06..de51dec3d 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -1,6 +1,7 @@
{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.Blocks
Copyright : Copyright (C) 2014-2019 Albert Krewinkel
@@ -23,7 +24,7 @@ import Text.Pandoc.Readers.Org.Inlines
import Text.Pandoc.Readers.Org.Meta (metaExport, metaKey, metaLine)
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
-import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename,
+import Text.Pandoc.Readers.Org.Shared (cleanLinkText, isImageFilename,
originalLang, translateLang, exportsCode)
import Text.Pandoc.Builder (Blocks, Inlines)
@@ -33,11 +34,13 @@ import Text.Pandoc.Options
import Text.Pandoc.Shared (compactify, compactifyDL, safeRead)
import Control.Monad (foldM, guard, mzero, void)
-import Data.Char (isSpace, toLower, toUpper)
+import Data.Char (isSpace)
import Data.Default (Default)
-import Data.List (foldl', isPrefixOf)
+import Data.List (foldl')
import Data.Maybe (fromMaybe, isJust, isNothing)
+import Data.Text (Text)
+import qualified Data.Text as T
import qualified Text.Pandoc.Builder as B
import qualified Text.Pandoc.Walk as Walk
@@ -90,10 +93,10 @@ horizontalRule = return B.horizontalRule <$ try hline
-- | Attributes that may be added to figures (like a name or caption).
data BlockAttributes = BlockAttributes
- { blockAttrName :: Maybe String
- , blockAttrLabel :: Maybe String
+ { blockAttrName :: Maybe Text
+ , blockAttrLabel :: Maybe Text
, blockAttrCaption :: Maybe (F Inlines)
- , blockAttrKeyValues :: [(String, String)]
+ , blockAttrKeyValues :: [(Text, Text)]
}
-- | Convert BlockAttributes into pandoc Attr
@@ -103,14 +106,14 @@ attrFromBlockAttributes BlockAttributes{..} =
ident = fromMaybe mempty $ lookup "id" blockAttrKeyValues
classes = case lookup "class" blockAttrKeyValues of
Nothing -> []
- Just clsStr -> words clsStr
+ Just clsStr -> T.words clsStr
kv = filter ((`notElem` ["id", "class"]) . fst) blockAttrKeyValues
in (ident, classes, kv)
-stringyMetaAttribute :: Monad m => OrgParser m (String, String)
+stringyMetaAttribute :: Monad m => OrgParser m (Text, Text)
stringyMetaAttribute = try $ do
metaLineStart
- attrName <- map toUpper <$> many1Till nonspaceChar (char ':')
+ attrName <- T.toUpper <$> many1TillChar nonspaceChar (char ':')
skipSpaces
attrValue <- anyLine <|> ("" <$ newline)
return (attrName, attrValue)
@@ -129,8 +132,8 @@ blockAttributes = try $ do
let label = lookup "LABEL" kv
caption' <- case caption of
Nothing -> return Nothing
- Just s -> Just <$> parseFromString inlines (s ++ "\n")
- kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs
+ Just s -> Just <$> parseFromString inlines (s <> "\n")
+ kvAttrs' <- parseFromString keyValues . (<> "\n") $ fromMaybe mempty kvAttrs
return BlockAttributes
{ blockAttrName = name
, blockAttrLabel = label
@@ -138,31 +141,31 @@ blockAttributes = try $ do
, blockAttrKeyValues = kvAttrs'
}
where
- isBlockAttr :: String -> Bool
+ isBlockAttr :: Text -> Bool
isBlockAttr = flip elem
[ "NAME", "LABEL", "CAPTION"
, "ATTR_HTML", "ATTR_LATEX"
, "RESULTS"
]
- appendValues :: String -> Maybe String -> (String, String) -> Maybe String
+ appendValues :: Text -> Maybe Text -> (Text, Text) -> Maybe Text
appendValues attrName accValue (key, value) =
if key /= attrName
then accValue
else case accValue of
- Just acc -> Just $ acc ++ ' ':value
+ Just acc -> Just $ acc <> " " <> value
Nothing -> Just value
-- | Parse key-value pairs for HTML attributes
-keyValues :: Monad m => OrgParser m [(String, String)]
+keyValues :: Monad m => OrgParser m [(Text, Text)]
keyValues = try $
manyTill ((,) <$> key <*> value) newline
where
- key :: Monad m => OrgParser m String
- key = try $ skipSpaces *> char ':' *> many1 nonspaceChar
+ key :: Monad m => OrgParser m Text
+ key = try $ skipSpaces *> char ':' *> many1Char nonspaceChar
- value :: Monad m => OrgParser m String
- value = skipSpaces *> manyTill anyChar endOfValue
+ value :: Monad m => OrgParser m Text
+ value = skipSpaces *> manyTillChar anyChar endOfValue
endOfValue :: Monad m => OrgParser m ()
endOfValue =
@@ -180,7 +183,7 @@ orgBlock = try $ do
blockAttrs <- blockAttributes
blkType <- blockHeaderStart
($ blkType) $
- case map toLower blkType of
+ case T.toLower blkType of
"export" -> exportBlock
"comment" -> rawBlockLines (const mempty)
"html" -> rawBlockLines (return . B.rawBlock (lowercase blkType))
@@ -194,13 +197,13 @@ orgBlock = try $ do
let (ident, classes, kv) = attrFromBlockAttributes blockAttrs
in fmap $ B.divWith (ident, classes ++ [blkType], kv)
where
- blockHeaderStart :: Monad m => OrgParser m String
+ blockHeaderStart :: Monad m => OrgParser m Text
blockHeaderStart = try $ skipSpaces *> stringAnyCase "#+begin_" *> orgArgWord
- lowercase :: String -> String
- lowercase = map toLower
+ lowercase :: Text -> Text
+ lowercase = T.toLower
-exampleBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks)
+exampleBlock :: PandocMonad m => BlockAttributes -> Text -> OrgParser m (F Blocks)
exampleBlock blockAttrs _label = do
skipSpaces
(classes, kv) <- switchesAsAttributes
@@ -210,54 +213,54 @@ exampleBlock blockAttrs _label = do
let codeBlck = B.codeBlockWith (id', "example":classes, kv) content
return . return $ codeBlck
-rawBlockLines :: Monad m => (String -> F Blocks) -> String -> OrgParser m (F Blocks)
+rawBlockLines :: Monad m => (Text -> F Blocks) -> Text -> OrgParser m (F Blocks)
rawBlockLines f blockType = ignHeaders *> (f <$> rawBlockContent blockType)
-parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> String -> OrgParser m (F Blocks)
+parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> Text -> OrgParser m (F Blocks)
parseBlockLines f blockType = ignHeaders *> (f <$> parsedBlockContent)
where
parsedBlockContent :: PandocMonad m => OrgParser m (F Blocks)
parsedBlockContent = try $ do
raw <- rawBlockContent blockType
- parseFromString blocks (raw ++ "\n")
+ parseFromString blocks (raw <> "\n")
-- | Read the raw string content of a block
-rawBlockContent :: Monad m => String -> OrgParser m String
+rawBlockContent :: Monad m => Text -> OrgParser m Text
rawBlockContent blockType = try $ do
blkLines <- manyTill rawLine blockEnder
tabLen <- getOption readerTabStop
trimP <- orgStateTrimLeadBlkIndent <$> getState
- let stripIndent strs = if trimP then map (drop (shortestIndent strs)) strs else strs
- (unlines
+ let stripIndent strs = if trimP then map (T.drop (shortestIndent strs)) strs else strs
+ (T.unlines
. stripIndent
. map (tabsToSpaces tabLen . commaEscaped)
$ blkLines)
<$ updateState (\s -> s { orgStateTrimLeadBlkIndent = True })
where
- rawLine :: Monad m => OrgParser m String
+ rawLine :: Monad m => OrgParser m Text
rawLine = try $ ("" <$ blankline) <|> anyLine
blockEnder :: Monad m => OrgParser m ()
blockEnder = try $ skipSpaces <* stringAnyCase ("#+end_" <> blockType)
- shortestIndent :: [String] -> Int
- shortestIndent = foldr (min . length . takeWhile isSpace) maxBound
- . filter (not . null)
-
- tabsToSpaces :: Int -> String -> String
- tabsToSpaces _ [] = []
- tabsToSpaces tabLen cs'@(c:cs) =
- case c of
- ' ' -> ' ':tabsToSpaces tabLen cs
- '\t' -> replicate tabLen ' ' ++ tabsToSpaces tabLen cs
- _ -> cs'
-
- commaEscaped :: String -> String
- commaEscaped (',':cs@('*':_)) = cs
- commaEscaped (',':cs@('#':'+':_)) = cs
- commaEscaped (' ':cs) = ' ':commaEscaped cs
- commaEscaped ('\t':cs) = '\t':commaEscaped cs
- commaEscaped cs = cs
+ shortestIndent :: [Text] -> Int
+ shortestIndent = foldr (min . T.length . T.takeWhile isSpace) maxBound
+ . filter (not . T.null)
+
+ tabsToSpaces :: Int -> Text -> Text
+ tabsToSpaces tabStop t =
+ let (ind, suff) = T.span (\c -> c == ' ' || c == '\t') t
+ tabNum = T.length $ T.filter (== '\n') ind
+ spaceNum = T.length ind - tabNum
+ in T.replicate (spaceNum + tabStop * tabNum) " " <> suff
+
+ commaEscaped t =
+ let (ind, suff) = T.span (\c -> c == ' ' || c == '\t') t
+ in case T.uncons suff of
+ Just (',', cs)
+ | "*" <- T.take 1 cs -> ind <> cs
+ | "#+" <- T.take 2 cs -> ind <> cs
+ _ -> t
-- | Read but ignore all remaining block headers.
ignHeaders :: Monad m => OrgParser m ()
@@ -265,34 +268,34 @@ ignHeaders = (() <$ newline) <|> (() <$ anyLine)
-- | Read a block containing code intended for export in specific backends
-- only.
-exportBlock :: Monad m => String -> OrgParser m (F Blocks)
+exportBlock :: Monad m => Text -> OrgParser m (F Blocks)
exportBlock blockType = try $ do
exportType <- skipSpaces *> orgArgWord <* ignHeaders
contents <- rawBlockContent blockType
- returnF (B.rawBlock (map toLower exportType) contents)
+ returnF (B.rawBlock (T.toLower exportType) contents)
-verseBlock :: PandocMonad m => String -> OrgParser m (F Blocks)
+verseBlock :: PandocMonad m => Text -> OrgParser m (F Blocks)
verseBlock blockType = try $ do
ignHeaders
content <- rawBlockContent blockType
fmap B.lineBlock . sequence
- <$> mapM parseVerseLine (lines content)
+ <$> mapM parseVerseLine (T.lines content)
where
-- replace initial spaces with nonbreaking spaces to preserve
-- indentation, parse the rest as normal inline
- parseVerseLine :: PandocMonad m => String -> OrgParser m (F Inlines)
+ parseVerseLine :: PandocMonad m => Text -> OrgParser m (F Inlines)
parseVerseLine cs = do
- let (initialSpaces, indentedLine) = span isSpace cs
- let nbspIndent = if null initialSpaces
+ let (initialSpaces, indentedLine) = T.span isSpace cs
+ let nbspIndent = if T.null initialSpaces
then mempty
- else B.str $ map (const '\160') initialSpaces
- line <- parseFromString inlines (indentedLine ++ "\n")
+ else B.str $ T.map (const '\160') initialSpaces
+ line <- parseFromString inlines (indentedLine <> "\n")
return (trimInlinesF $ pure nbspIndent <> line)
-- | Read a code block and the associated results block if present. Which of
-- boths blocks is included in the output is determined using the "exports"
-- argument in the block header.
-codeBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks)
+codeBlock :: PandocMonad m => BlockAttributes -> Text -> OrgParser m (F Blocks)
codeBlock blockAttrs blockType = do
skipSpaces
(classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders)
@@ -314,7 +317,7 @@ codeBlock blockAttrs blockType = do
labelledBlock :: F Inlines -> F Blocks
labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
- exportsResults :: [(String, String)] -> Bool
+ exportsResults :: [(Text, Text)] -> Bool
exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports"
-- | Parse the result of an evaluated babel code block.
@@ -329,7 +332,7 @@ babelResultsBlock = try $ do
resultsMarker = try . void $ stringAnyCase "#+RESULTS:" *> blankline
-- | Parse code block arguments
-codeHeaderArgs :: Monad m => OrgParser m ([String], [(String, String)])
+codeHeaderArgs :: Monad m => OrgParser m ([Text], [(Text, Text)])
codeHeaderArgs = try $ do
language <- skipSpaces *> orgArgWord
(switchClasses, switchKv) <- switchesAsAttributes
@@ -338,14 +341,14 @@ codeHeaderArgs = try $ do
, originalLang language <> switchKv <> parameters
)
-switchesAsAttributes :: Monad m => OrgParser m ([String], [(String, String)])
+switchesAsAttributes :: Monad m => OrgParser m ([Text], [(Text, Text)])
switchesAsAttributes = try $ do
switches <- skipSpaces *> try (switch `sepBy` many1 spaceChar)
return $ foldr addToAttr ([], []) switches
where
- addToAttr :: (Char, Maybe String, SwitchPolarity)
- -> ([String], [(String, String)])
- -> ([String], [(String, String)])
+ addToAttr :: (Char, Maybe Text, SwitchPolarity)
+ -> ([Text], [(Text, Text)])
+ -> ([Text], [(Text, Text)])
addToAttr ('n', lineNum, pol) (cls, kv) =
let kv' = case lineNum of
Just num -> ("startFrom", num):kv
@@ -365,15 +368,15 @@ switchPolarity :: Monad m => OrgParser m SwitchPolarity
switchPolarity = (SwitchMinus <$ char '-') <|> (SwitchPlus <$ char '+')
-- | Parses a source block switch option.
-switch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity)
+switch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity)
switch = try $ lineNumberSwitch <|> labelSwitch
<|> whitespaceSwitch <|> simpleSwitch
where
simpleSwitch = (\pol c -> (c, Nothing, pol)) <$> switchPolarity <*> letter
labelSwitch = genericSwitch 'l' $
- char '"' *> many1Till nonspaceChar (char '"')
+ char '"' *> many1TillChar nonspaceChar (char '"')
-whitespaceSwitch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity)
+whitespaceSwitch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity)
whitespaceSwitch = do
string "-i"
updateState $ \s -> s { orgStateTrimLeadBlkIndent = False }
@@ -382,8 +385,8 @@ whitespaceSwitch = do
-- | Generic source block switch-option parser.
genericSwitch :: Monad m
=> Char
- -> OrgParser m String
- -> OrgParser m (Char, Maybe String, SwitchPolarity)
+ -> OrgParser m Text
+ -> OrgParser m (Char, Maybe Text, SwitchPolarity)
genericSwitch c p = try $ do
polarity <- switchPolarity <* char c <* skipSpaces
arg <- optionMaybe p
@@ -391,17 +394,17 @@ genericSwitch c p = try $ do
-- | Reads a line number switch option. The line number switch can be used with
-- example and source blocks.
-lineNumberSwitch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity)
-lineNumberSwitch = genericSwitch 'n' (many digit)
+lineNumberSwitch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity)
+lineNumberSwitch = genericSwitch 'n' (manyChar digit)
-blockOption :: Monad m => OrgParser m (String, String)
+blockOption :: Monad m => OrgParser m (Text, Text)
blockOption = try $ do
argKey <- orgArgKey
paramValue <- option "yes" orgParamValue
return (argKey, paramValue)
-orgParamValue :: Monad m => OrgParser m String
-orgParamValue = try $
+orgParamValue :: Monad m => OrgParser m Text
+orgParamValue = try $ fmap T.pack $
skipSpaces
*> notFollowedBy orgArgKey
*> noneOf "\n\r" `many1Till` endOfValue
@@ -420,7 +423,7 @@ orgParamValue = try $
-- export setting.
genericDrawer :: PandocMonad m => OrgParser m (F Blocks)
genericDrawer = try $ do
- name <- map toUpper <$> drawerStart
+ name <- T.toUpper <$> drawerStart
content <- manyTill drawerLine (try drawerEnd)
state <- getState
-- Include drawer if it is explicitly included in or not explicitly excluded
@@ -432,16 +435,16 @@ genericDrawer = try $ do
Right names | name `notElem` names -> return mempty
_ -> drawerDiv name <$> parseLines content
where
- parseLines :: PandocMonad m => [String] -> OrgParser m (F Blocks)
- parseLines = parseFromString blocks . (++ "\n") . unlines
+ parseLines :: PandocMonad m => [Text] -> OrgParser m (F Blocks)
+ parseLines = parseFromString blocks . (<> "\n") . T.unlines
- drawerDiv :: String -> F Blocks -> F Blocks
+ drawerDiv :: Text -> F Blocks -> F Blocks
drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty)
-drawerLine :: Monad m => OrgParser m String
+drawerLine :: Monad m => OrgParser m Text
drawerLine = anyLine
-drawerEnd :: Monad m => OrgParser m String
+drawerEnd :: Monad m => OrgParser m Text
drawerEnd = try $
skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
@@ -456,17 +459,17 @@ figure :: PandocMonad m => OrgParser m (F Blocks)
figure = try $ do
figAttrs <- blockAttributes
src <- skipSpaces *> selfTarget <* skipSpaces <* endOfParagraph
- case cleanLinkString src of
+ case cleanLinkText src of
Nothing -> mzero
Just imgSrc -> do
guard (isImageFilename imgSrc)
let isFigure = isJust $ blockAttrCaption figAttrs
return $ imageBlock isFigure figAttrs imgSrc
where
- selfTarget :: PandocMonad m => OrgParser m String
+ selfTarget :: PandocMonad m => OrgParser m Text
selfTarget = try $ char '[' *> linkTarget <* char ']'
- imageBlock :: Bool -> BlockAttributes -> String -> F Blocks
+ imageBlock :: Bool -> BlockAttributes -> Text -> F Blocks
imageBlock isFigure figAttrs imgSrc =
let
figName = fromMaybe mempty $ blockAttrName figAttrs
@@ -478,11 +481,11 @@ figure = try $ do
in
B.para . B.imageWith attr imgSrc figTitle <$> figCaption
- withFigPrefix :: String -> String
+ withFigPrefix :: Text -> Text
withFigPrefix cs =
- if "fig:" `isPrefixOf` cs
+ if "fig:" `T.isPrefixOf` cs
then cs
- else "fig:" ++ cs
+ else "fig:" <> cs
-- | Succeeds if looking at the end of the current paragraph
endOfParagraph :: Monad m => OrgParser m ()
@@ -495,12 +498,12 @@ endOfParagraph = try $ skipSpaces *> newline *> endOfBlock
-- | Example code marked up by a leading colon.
example :: Monad m => OrgParser m (F Blocks)
-example = try $ returnF . exampleCode =<< unlines <$> many1 exampleLine
+example = try $ returnF . exampleCode =<< T.unlines <$> many1 exampleLine
where
- exampleLine :: Monad m => OrgParser m String
+ exampleLine :: Monad m => OrgParser m Text
exampleLine = try $ exampleLineStart *> anyLine
-exampleCode :: String -> Blocks
+exampleCode :: Text -> Blocks
exampleCode = B.codeBlockWith ("", ["example"], [])
@@ -516,7 +519,7 @@ include :: PandocMonad m => OrgParser m (F Blocks)
include = try $ do
metaLineStart <* stringAnyCase "include:" <* skipSpaces
filename <- includeTarget
- includeArgs <- many (try $ skipSpaces *> many1 alphaNum)
+ includeArgs <- many (try $ skipSpaces *> many1Char alphaNum)
params <- keyValues
blocksParser <- case includeArgs of
("example" : _) -> return $ pure . B.codeBlock <$> parseRaw
@@ -535,10 +538,10 @@ include = try $ do
char '"'
manyTill (noneOf "\n\r\t") (char '"')
- parseRaw :: PandocMonad m => OrgParser m String
- parseRaw = many anyChar
+ parseRaw :: PandocMonad m => OrgParser m Text
+ parseRaw = manyChar anyChar
- blockFilter :: [(String, String)] -> [Block] -> [Block]
+ blockFilter :: [(Text, Text)] -> [Block] -> [Block]
blockFilter params blks =
let minlvl = lookup "minlevel" params
in case (minlvl >>= safeRead :: Maybe Int) of
@@ -660,7 +663,7 @@ columnPropertyCell = emptyCell <|> propCell <?> "alignment info"
<$> (skipSpaces
*> char '<'
*> optionMaybe tableAlignFromChar)
- <*> (optionMaybe (many1 digit >>= safeRead)
+ <*> (optionMaybe (many1Char digit >>= safeRead)
<* char '>'
<* emptyCell)
@@ -739,10 +742,10 @@ latexFragment = try $ do
, "\\end{", e, "}\n"
]
-latexEnd :: Monad m => String -> OrgParser m ()
+latexEnd :: Monad m => Text -> OrgParser m ()
latexEnd envName = try $
() <$ skipSpaces
- <* string ("\\end{" ++ envName ++ "}")
+ <* textStr ("\\end{" <> envName <> "}")
<* blankline
@@ -813,12 +816,12 @@ definitionListItem :: PandocMonad m
-> OrgParser m (F (Inlines, [Blocks]))
definitionListItem parseIndentedMarker = try $ do
markerLength <- parseIndentedMarker
- term <- manyTill (noneOf "\n\r") (try definitionMarker)
+ term <- manyTillChar (noneOf "\n\r") (try definitionMarker)
line1 <- anyLineNewline
blank <- option "" ("\n" <$ blankline)
- cont <- concat <$> many (listContinuation markerLength)
+ cont <- T.concat <$> many (listContinuation markerLength)
term' <- parseFromString inlines term
- contents' <- parseFromString blocks $ line1 ++ blank ++ cont
+ contents' <- parseFromString blocks $ line1 <> blank <> cont
return $ (,) <$> term' <*> fmap (:[]) contents'
where
definitionMarker =
@@ -832,16 +835,16 @@ listItem parseIndentedMarker = try . withContext ListItemState $ do
markerLength <- try parseIndentedMarker
firstLine <- anyLineNewline
blank <- option "" ("\n" <$ blankline)
- rest <- concat <$> many (listContinuation markerLength)
- parseFromString blocks $ firstLine ++ blank ++ rest
+ rest <- T.concat <$> many (listContinuation markerLength)
+ parseFromString blocks $ firstLine <> blank <> rest
-- continuation of a list item - indented and separated by blankline or endline.
-- Note: nested lists are parsed as continuations.
-listContinuation :: PandocMonad m => Int -> OrgParser m String
+listContinuation :: PandocMonad m => Int -> OrgParser m Text
listContinuation markerLength = try $ do
notFollowedBy' blankline
- mappend <$> (concat <$> many1 (listContinuation' markerLength))
- <*> many blankline
+ mappend <$> (T.concat <$> many1 (listContinuation' markerLength))
+ <*> manyChar blankline
where
listContinuation' indentation =
blockLines indentation <|> listLine indentation
@@ -853,6 +856,6 @@ listContinuation markerLength = try $ do
>> blockAttributes
>>= (\blockAttrs ->
case attrFromBlockAttributes blockAttrs of
- ("", [], []) -> count 1 anyChar
+ ("", [], []) -> countChar 1 anyChar
_ -> indentWith indentation))
>> (snd <$> withRaw orgBlock)
diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
index c96087be7..09a501b68 100644
--- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs
+++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
@@ -1,5 +1,6 @@
-{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.DocumentTree
Copyright : Copyright (C) 2014-2019 Albert Krewinkel
@@ -17,9 +18,9 @@ module Text.Pandoc.Readers.Org.DocumentTree
import Prelude
import Control.Arrow ((***))
import Control.Monad (guard, void)
-import Data.Char (toLower, toUpper)
import Data.List (intersperse)
import Data.Maybe (mapMaybe)
+import Data.Text (Text)
import Text.Pandoc.Builder (Blocks, Inlines)
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
@@ -28,6 +29,7 @@ import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
import qualified Data.Set as Set
+import qualified Data.Text as T
import qualified Text.Pandoc.Builder as B
--
@@ -59,28 +61,28 @@ documentTree blocks inline = do
}
-- | Create a tag containing the given string.
-toTag :: String -> Tag
+toTag :: Text -> Tag
toTag = Tag
-- | The key (also called name or type) of a property.
-newtype PropertyKey = PropertyKey { fromKey :: String }
+newtype PropertyKey = PropertyKey { fromKey :: Text }
deriving (Show, Eq, Ord)
-- | Create a property key containing the given string. Org mode keys are
-- case insensitive and are hence converted to lower case.
-toPropertyKey :: String -> PropertyKey
-toPropertyKey = PropertyKey . map toLower
+toPropertyKey :: Text -> PropertyKey
+toPropertyKey = PropertyKey . T.toLower
-- | The value assigned to a property.
-newtype PropertyValue = PropertyValue { fromValue :: String }
+newtype PropertyValue = PropertyValue { fromValue :: Text }
-- | Create a property value containing the given string.
-toPropertyValue :: String -> PropertyValue
+toPropertyValue :: Text -> PropertyValue
toPropertyValue = PropertyValue
-- | Check whether the property value is non-nil (i.e. truish).
isNonNil :: PropertyValue -> Bool
-isNonNil p = map toLower (fromValue p) `notElem` ["()", "{}", "nil"]
+isNonNil p = T.toLower (fromValue p) `notElem` ["()", "{}", "nil"]
-- | Key/value pairs from a PROPERTIES drawer
type Properties = [(PropertyKey, PropertyValue)]
@@ -273,7 +275,7 @@ headlineToHeader hdln = do
todoKeyword :: Monad m => OrgParser m TodoMarker
todoKeyword = try $ do
taskStates <- activeTodoMarkers <$> getState
- let kwParser tdm = try (tdm <$ string (todoMarkerName tdm)
+ let kwParser tdm = try (tdm <$ textStr (todoMarkerName tdm)
<* spaceChar
<* updateLastPreCharPos)
choice (map kwParser taskStates)
@@ -281,26 +283,26 @@ todoKeyword = try $ do
todoKeywordToInlines :: TodoMarker -> Inlines
todoKeywordToInlines tdm =
let todoText = todoMarkerName tdm
- todoState = map toLower . show $ todoMarkerState tdm
+ todoState = T.toLower . T.pack . show $ todoMarkerState tdm
classes = [todoState, todoText]
in B.spanWith (mempty, classes, mempty) (B.str todoText)
propertiesToAttr :: Properties -> Attr
propertiesToAttr properties =
let
- toStringPair = fromKey *** fromValue
+ toTextPair = fromKey *** fromValue
customIdKey = toPropertyKey "custom_id"
classKey = toPropertyKey "class"
unnumberedKey = toPropertyKey "unnumbered"
specialProperties = [customIdKey, classKey, unnumberedKey]
id' = maybe mempty fromValue . lookup customIdKey $ properties
cls = maybe mempty fromValue . lookup classKey $ properties
- kvs' = map toStringPair . filter ((`notElem` specialProperties) . fst)
+ kvs' = map toTextPair . filter ((`notElem` specialProperties) . fst)
$ properties
isUnnumbered =
maybe False isNonNil . lookup unnumberedKey $ properties
in
- (id', words cls ++ ["unnumbered" | isUnnumbered], kvs')
+ (id', T.words cls ++ ["unnumbered" | isUnnumbered], kvs')
tagsToInlines :: [Tag] -> Inlines
tagsToInlines [] = mempty
@@ -336,15 +338,15 @@ planningToBlock planning = do
<> B.emph (B.str time)
-- | An Org timestamp, including repetition marks. TODO: improve
-type Timestamp = String
+type Timestamp = Text
timestamp :: Monad m => OrgParser m Timestamp
timestamp = try $ do
openChar <- oneOf "<["
let isActive = openChar == '<'
let closeChar = if isActive then '>' else ']'
- content <- many1Till anyChar (char closeChar)
- return (openChar : content ++ [closeChar])
+ content <- many1TillChar anyChar (char closeChar)
+ return $ T.cons openChar $ content `T.snoc` closeChar
-- | Planning information for a subtree/headline.
data PlanningInfo = PlanningInfo
@@ -374,7 +376,7 @@ planningInfo = try $ do
propertiesDrawer :: Monad m => OrgParser m Properties
propertiesDrawer = try $ do
drawerType <- drawerStart
- guard $ map toUpper drawerType == "PROPERTIES"
+ guard $ T.toUpper drawerType == "PROPERTIES"
manyTill property (try endOfDrawer)
where
property :: Monad m => OrgParser m (PropertyKey, PropertyValue)
@@ -382,12 +384,12 @@ propertiesDrawer = try $ do
key :: Monad m => OrgParser m PropertyKey
key = fmap toPropertyKey . try $
- skipSpaces *> char ':' *> many1Till nonspaceChar (char ':')
+ skipSpaces *> char ':' *> many1TillChar nonspaceChar (char ':')
value :: Monad m => OrgParser m PropertyValue
value = fmap toPropertyValue . try $
- skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline)
+ skipSpaces *> manyTillChar anyChar (try $ skipSpaces *> newline)
- endOfDrawer :: Monad m => OrgParser m String
+ endOfDrawer :: Monad m => OrgParser m Text
endOfDrawer = try $
skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
index f783eaa0f..f1f089273 100644
--- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs
+++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.ExportSettings
Copyright : © 2016–2019 Albert Krewinkel
@@ -21,6 +22,7 @@ import Text.Pandoc.Readers.Org.Parsing
import Control.Monad (mzero, void)
import Data.Char (toLower)
import Data.Maybe (listToMaybe)
+import Data.Text (Text)
-- | Read and handle space separated org-mode export settings.
exportSettings :: PandocMonad m => OrgParser m ()
@@ -70,11 +72,11 @@ exportSetting = choice
genericExportSetting :: Monad m
=> OrgParser m a
- -> String
+ -> Text
-> ExportSettingSetter a
-> OrgParser m ()
genericExportSetting optionParser settingIdentifier setter = try $ do
- _ <- string settingIdentifier *> char ':'
+ _ <- textStr settingIdentifier *> char ':'
value <- optionParser
updateState $ modifyExportSettings value
where
@@ -82,11 +84,11 @@ genericExportSetting optionParser settingIdentifier setter = try $ do
st { orgStateExportSettings = setter val . orgStateExportSettings $ st }
-- | A boolean option, either nil (False) or non-nil (True).
-booleanSetting :: Monad m => String -> ExportSettingSetter Bool -> OrgParser m ()
+booleanSetting :: Monad m => Text -> ExportSettingSetter Bool -> OrgParser m ()
booleanSetting = genericExportSetting elispBoolean
-- | An integer-valued option.
-integerSetting :: Monad m => String -> ExportSettingSetter Int -> OrgParser m ()
+integerSetting :: Monad m => Text -> ExportSettingSetter Int -> OrgParser m ()
integerSetting = genericExportSetting parseInt
where
parseInt = try $
@@ -95,7 +97,7 @@ integerSetting = genericExportSetting parseInt
-- | Either the string "headline" or an elisp boolean and treated as an
-- @ArchivedTreesOption@.
archivedTreeSetting :: Monad m
- => String
+ => Text
-> ExportSettingSetter ArchivedTreesOption
-> OrgParser m ()
archivedTreeSetting =
@@ -115,42 +117,42 @@ archivedTreeSetting =
-- | A list or a complement list (i.e. a list starting with `not`).
complementableListSetting :: Monad m
- => String
- -> ExportSettingSetter (Either [String] [String])
+ => Text
+ -> ExportSettingSetter (Either [Text] [Text])
-> OrgParser m ()
complementableListSetting = genericExportSetting $ choice
- [ Left <$> complementStringList
+ [ Left <$> complementTextList
, Right <$> stringList
, (\b -> if b then Left [] else Right []) <$> elispBoolean
]
where
-- Read a plain list of strings.
- stringList :: Monad m => OrgParser m [String]
+ stringList :: Monad m => OrgParser m [Text]
stringList = try $
char '('
- *> sepBy elispString spaces
+ *> sepBy elispText spaces
<* char ')'
-- Read an emacs lisp list specifying a complement set.
- complementStringList :: Monad m => OrgParser m [String]
- complementStringList = try $
+ complementTextList :: Monad m => OrgParser m [Text]
+ complementTextList = try $
string "(not "
- *> sepBy elispString spaces
+ *> sepBy elispText spaces
<* char ')'
- elispString :: Monad m => OrgParser m String
- elispString = try $
+ elispText :: Monad m => OrgParser m Text
+ elispText = try $
char '"'
- *> manyTill alphaNum (char '"')
+ *> manyTillChar alphaNum (char '"')
-- | Read but ignore the export setting.
-ignoredSetting :: Monad m => String -> OrgParser m ()
-ignoredSetting s = try (() <$ string s <* char ':' <* many1 nonspaceChar)
+ignoredSetting :: Monad m => Text -> OrgParser m ()
+ignoredSetting s = try (() <$ textStr s <* char ':' <* many1 nonspaceChar)
-- | Read any setting string, but ignore it and emit a warning.
ignoreAndWarn :: PandocMonad m => OrgParser m ()
ignoreAndWarn = try $ do
- opt <- many1 nonspaceChar
+ opt <- many1Char nonspaceChar
report (UnknownOrgExportOption opt)
return ()
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index cae590c5f..da638f717 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -20,7 +20,7 @@ import Prelude
import Text.Pandoc.Readers.Org.BlockStarts (endOfBlock, noteMarker)
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
-import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename,
+import Text.Pandoc.Readers.Org.Shared (cleanLinkText, isImageFilename,
originalLang, translateLang, exportsCode)
import Text.Pandoc.Builder (Inlines)
@@ -38,12 +38,14 @@ import Control.Monad.Trans (lift)
import Data.Char (isAlphaNum, isSpace)
import Data.List (intersperse)
import qualified Data.Map as M
+import Data.Text (Text)
+import qualified Data.Text as T
import Data.Maybe (fromMaybe)
--
-- Functions acting on the parser state
--
-recordAnchorId :: PandocMonad m => String -> OrgParser m ()
+recordAnchorId :: PandocMonad m => Text -> OrgParser m ()
recordAnchorId i = updateState $ \s ->
s{ orgStateAnchorIds = i : orgStateAnchorIds s }
@@ -127,7 +129,7 @@ linebreak :: PandocMonad m => OrgParser m (F Inlines)
linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline
str :: PandocMonad m => OrgParser m (F Inlines)
-str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
+str = return . B.str <$> many1Char (noneOf $ specialChars ++ "\n\r ")
<* updateLastStrPos
-- | An endline character that can be treated as a space, not a structural
@@ -321,7 +323,7 @@ linkLikeOrgRefCite = try $ do
-- | Read a citation key. The characters allowed in citation keys are taken
-- from the `org-ref-cite-re` variable in `org-ref.el`.
-orgRefCiteKey :: PandocMonad m => OrgParser m String
+orgRefCiteKey :: PandocMonad m => OrgParser m Text
orgRefCiteKey =
let citeKeySpecialChars = "-_:\\./," :: String
isCiteKeySpecialChar c = c `elem` citeKeySpecialChars
@@ -329,7 +331,7 @@ orgRefCiteKey =
endOfCitation = try $ do
many $ satisfy isCiteKeySpecialChar
satisfy $ not . isCiteKeyChar
- in try $ satisfy isCiteKeyChar `many1Till` lookAhead endOfCitation
+ in try $ satisfy isCiteKeyChar `many1TillChar` lookAhead endOfCitation
-- | Supported citation types. Only a small subset of org-ref types is
@@ -384,11 +386,11 @@ footnote = try $ inlineNote <|> referencedNote
inlineNote :: PandocMonad m => OrgParser m (F Inlines)
inlineNote = try $ do
string "[fn:"
- ref <- many alphaNum
+ ref <- manyChar alphaNum
char ':'
note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']')
- unless (null ref) $
- addToNotesTable ("fn:" ++ ref, note)
+ unless (T.null ref) $
+ addToNotesTable ("fn:" <> ref, note)
return $ B.note <$> note
referencedNote :: PandocMonad m => OrgParser m (F Inlines)
@@ -397,7 +399,7 @@ referencedNote = try $ do
return $ do
notes <- asksF orgStateNotes'
case lookup ref notes of
- Nothing -> return . B.str $ "[" ++ ref ++ "]"
+ Nothing -> return . B.str $ "[" <> ref <> "]"
Just contents -> do
st <- askF
let contents' = runF contents st{ orgStateNotes' = [] }
@@ -420,7 +422,7 @@ explicitOrImageLink = try $ do
return $ do
src <- srcF
title <- titleF
- case cleanLinkString descr of
+ case cleanLinkText descr of
Just imgSrc | isImageFilename imgSrc ->
return . B.link src "" $ B.image imgSrc mempty mempty
_ ->
@@ -429,10 +431,10 @@ explicitOrImageLink = try $ do
selflinkOrImage :: PandocMonad m => OrgParser m (F Inlines)
selflinkOrImage = try $ do
target <- char '[' *> linkTarget <* char ']'
- case cleanLinkString target of
- Nothing -> case target of
- '#':_ -> returnF $ B.link target "" (B.str target)
- _ -> return $ internalLink target (B.str target)
+ case cleanLinkText target of
+ Nothing -> case T.uncons target of
+ Just ('#', _) -> returnF $ B.link target "" (B.str target)
+ _ -> return $ internalLink target (B.str target)
Just nonDocTgt -> if isImageFilename nonDocTgt
then returnF $ B.image nonDocTgt "" ""
else returnF $ B.link nonDocTgt "" (B.str target)
@@ -449,35 +451,35 @@ angleLink = try $ do
char '>'
return link
-linkTarget :: PandocMonad m => OrgParser m String
-linkTarget = enclosedByPair1 '[' ']' (noneOf "\n\r[]")
+linkTarget :: PandocMonad m => OrgParser m Text
+linkTarget = T.pack <$> enclosedByPair1 '[' ']' (noneOf "\n\r[]")
-possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m String
+possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m Text
possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]")
-applyCustomLinkFormat :: String -> OrgParser m (F String)
+applyCustomLinkFormat :: Text -> OrgParser m (F Text)
applyCustomLinkFormat link = do
- let (linkType, rest) = break (== ':') link
+ let (linkType, rest) = T.break (== ':') link
return $ do
formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters
- return $ maybe link ($ drop 1 rest) formatter
+ return $ maybe link ($ T.drop 1 rest) formatter
-- | Take a link and return a function which produces new inlines when given
-- description inlines.
-linkToInlinesF :: String -> Inlines -> F Inlines
+linkToInlinesF :: Text -> Inlines -> F Inlines
linkToInlinesF linkStr =
- case linkStr of
- "" -> pure . B.link mempty "" -- wiki link (empty by convention)
- ('#':_) -> pure . B.link linkStr "" -- document-local fraction
- _ -> case cleanLinkString linkStr of
- Just extTgt -> return . B.link extTgt ""
- Nothing -> internalLink linkStr -- other internal link
-
-internalLink :: String -> Inlines -> F Inlines
+ case T.uncons linkStr of
+ Nothing -> pure . B.link mempty "" -- wiki link (empty by convention)
+ Just ('#', _) -> pure . B.link linkStr "" -- document-local fraction
+ _ -> case cleanLinkText linkStr of
+ Just extTgt -> return . B.link extTgt ""
+ Nothing -> internalLink linkStr -- other internal link
+
+internalLink :: Text -> Inlines -> F Inlines
internalLink link title = do
anchorB <- (link `elem`) <$> asksF orgStateAnchorIds
if anchorB
- then return $ B.link ('#':link) "" title
+ then return $ B.link ("#" <> link) "" title
else return $ B.emph title
-- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
@@ -493,15 +495,15 @@ anchor = try $ do
returnF $ B.spanWith (solidify anchorId, [], []) mempty
where
parseAnchor = string "<<"
- *> many1 (noneOf "\t\n\r<>\"' ")
+ *> many1Char (noneOf "\t\n\r<>\"' ")
<* string ">>"
<* skipSpaces
-- | Replace every char but [a-zA-Z0-9_.-:] with a hyphen '-'. This mirrors
-- the org function @org-export-solidify-link-text@.
-solidify :: String -> String
-solidify = map replaceSpecialChar
+solidify :: Text -> Text
+solidify = T.map replaceSpecialChar
where replaceSpecialChar c
| isAlphaNum c = c
| c `elem` ("_.-:" :: String) = c
@@ -511,25 +513,25 @@ solidify = map replaceSpecialChar
inlineCodeBlock :: PandocMonad m => OrgParser m (F Inlines)
inlineCodeBlock = try $ do
string "src_"
- lang <- many1 orgArgWordChar
+ lang <- many1Char orgArgWordChar
opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption
- inlineCode <- enclosedByPair1 '{' '}' (noneOf "\n\r")
+ inlineCode <- T.pack <$> enclosedByPair1 '{' '}' (noneOf "\n\r")
let attrClasses = [translateLang lang]
let attrKeyVal = originalLang lang <> opts
let codeInlineBlck = B.codeWith ("", attrClasses, attrKeyVal) inlineCode
returnF $ if exportsCode opts then codeInlineBlck else mempty
where
- inlineBlockOption :: PandocMonad m => OrgParser m (String, String)
+ inlineBlockOption :: PandocMonad m => OrgParser m (Text, Text)
inlineBlockOption = try $ do
argKey <- orgArgKey
paramValue <- option "yes" orgInlineParamValue
return (argKey, paramValue)
- orgInlineParamValue :: PandocMonad m => OrgParser m String
+ orgInlineParamValue :: PandocMonad m => OrgParser m Text
orgInlineParamValue = try $
skipSpaces
*> notFollowedBy (char ':')
- *> many1 (noneOf "\t\n\r ]")
+ *> many1Char (noneOf "\t\n\r ]")
<* skipSpaces
@@ -584,7 +586,7 @@ superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr)
math :: PandocMonad m => OrgParser m (F Inlines)
math = return . B.math <$> choice [ math1CharBetween '$'
- , mathStringBetween '$'
+ , mathTextBetween '$'
, rawMathBetween "\\(" "\\)"
]
@@ -604,7 +606,7 @@ updatePositions c = do
return c
symbol :: PandocMonad m => OrgParser m (F Inlines)
-symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
+symbol = return . B.str . T.singleton <$> (oneOf specialChars >>= updatePositions)
emphasisBetween :: PandocMonad m
=> Char
@@ -619,7 +621,7 @@ emphasisBetween c = try $ do
verbatimBetween :: PandocMonad m
=> Char
- -> OrgParser m String
+ -> OrgParser m Text
verbatimBetween c = try $
emphasisStart c *>
many1TillNOrLessNewlines 1 verbatimChar (emphasisEnd c)
@@ -627,33 +629,33 @@ verbatimBetween c = try $
verbatimChar = noneOf "\n\r" >>= updatePositions
-- | Parses a raw string delimited by @c@ using Org's math rules
-mathStringBetween :: PandocMonad m
+mathTextBetween :: PandocMonad m
=> Char
- -> OrgParser m String
-mathStringBetween c = try $ do
+ -> OrgParser m Text
+mathTextBetween c = try $ do
mathStart c
body <- many1TillNOrLessNewlines mathAllowedNewlines
(noneOf (c:"\n\r"))
(lookAhead $ mathEnd c)
final <- mathEnd c
- return $ body ++ [final]
+ return $ T.snoc body final
-- | Parse a single character between @c@ using math rules
math1CharBetween :: PandocMonad m
=> Char
- -> OrgParser m String
+ -> OrgParser m Text
math1CharBetween c = try $ do
char c
res <- noneOf $ c:mathForbiddenBorderChars
char c
eof <|> () <$ lookAhead (oneOf mathPostChars)
- return [res]
+ return $ T.singleton res
rawMathBetween :: PandocMonad m
- => String
- -> String
- -> OrgParser m String
-rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e)
+ => Text
+ -> Text
+ -> OrgParser m Text
+rawMathBetween s e = try $ textStr s *> manyTillChar anyChar (try $ textStr e)
-- | Parses the start (opening character) of emphasis
emphasisStart :: PandocMonad m => Char -> OrgParser m Char
@@ -702,10 +704,10 @@ enclosedInlines start end = try $
enclosedRaw :: (PandocMonad m, Show b) => OrgParser m a
-> OrgParser m b
- -> OrgParser m String
+ -> OrgParser m Text
enclosedRaw start end = try $
start *> (onSingleLine <|> spanningTwoLines)
- where onSingleLine = try $ many1Till (noneOf "\n\r") end
+ where onSingleLine = try $ many1TillChar (noneOf "\n\r") end
spanningTwoLines = try $
anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine
@@ -714,7 +716,7 @@ enclosedRaw start end = try $
many1TillNOrLessNewlines :: PandocMonad m => Int
-> OrgParser m Char
-> OrgParser m a
- -> OrgParser m String
+ -> OrgParser m Text
many1TillNOrLessNewlines n p end = try $
nMoreLines (Just n) mempty >>= oneOrMore
where
@@ -726,7 +728,7 @@ many1TillNOrLessNewlines n p end = try $
rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline)
finalLine = try $ manyTill p end
minus1 k = k - 1
- oneOrMore cs = cs <$ guard (not $ null cs)
+ oneOrMore cs = T.pack cs <$ guard (not $ null cs)
-- Org allows customization of the way it reads emphasis. We use the defaults
-- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components`
@@ -773,17 +775,17 @@ subOrSuperExpr :: PandocMonad m => OrgParser m (F Inlines)
subOrSuperExpr = try $
choice [ charsInBalanced '{' '}' (noneOf "\n\r")
, enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r")
- , simpleSubOrSuperString
+ , simpleSubOrSuperText
] >>= parseFromString (mconcat <$> many inline)
- where enclosing (left, right) s = left : s ++ [right]
+ where enclosing (left, right) s = T.cons left $ T.snoc s right
-simpleSubOrSuperString :: PandocMonad m => OrgParser m String
-simpleSubOrSuperString = try $ do
+simpleSubOrSuperText :: PandocMonad m => OrgParser m Text
+simpleSubOrSuperText = try $ do
state <- getState
guard . exportSubSuperscripts . orgStateExportSettings $ state
- choice [ string "*"
- , mappend <$> option [] ((:[]) <$> oneOf "+-")
- <*> many1 alphaNum
+ choice [ textStr "*"
+ , mappend <$> option "" (T.singleton <$> oneOf "+-")
+ <*> many1Char alphaNum
]
inlineLaTeX :: PandocMonad m => OrgParser m (F Inlines)
@@ -793,28 +795,28 @@ inlineLaTeX = try $ do
maybe mzero returnF $
parseAsMathMLSym cmd `mplus` parseAsMath cmd `mplus` ils
where
- parseAsMath :: String -> Maybe Inlines
+ parseAsMath :: Text -> Maybe Inlines
parseAsMath cs = B.fromList <$> texMathToPandoc cs
- parseAsInlineLaTeX :: PandocMonad m => String -> m (Maybe Inlines)
+ parseAsInlineLaTeX :: PandocMonad m => Text -> m (Maybe Inlines)
parseAsInlineLaTeX cs = maybeRight <$> runParserT inlineCommand state "" cs
- parseAsMathMLSym :: String -> Maybe Inlines
+ parseAsMathMLSym :: Text -> Maybe Inlines
parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs)
-- drop initial backslash and any trailing "{}"
- where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1
+ where clean = T.dropWhileEnd (`elem` ("{}" :: String)) . T.drop 1
state :: ParserState
state = def{ stateOptions = def{ readerExtensions =
enableExtension Ext_raw_tex (readerExtensions def) } }
- texMathToPandoc :: String -> Maybe [Inline]
+ texMathToPandoc :: Text -> Maybe [Inline]
texMathToPandoc cs = maybeRight (readTeX cs) >>= writePandoc DisplayInline
maybeRight :: Either a b -> Maybe b
maybeRight = either (const Nothing) Just
-inlineLaTeXCommand :: PandocMonad m => OrgParser m String
+inlineLaTeXCommand :: PandocMonad m => OrgParser m Text
inlineLaTeXCommand = try $ do
rest <- getInput
st <- getState
@@ -823,21 +825,17 @@ inlineLaTeXCommand = try $ do
Right cs -> do
-- drop any trailing whitespace, those are not be part of the command as
-- far as org mode is concerned.
- let cmdNoSpc = dropWhileEnd isSpace cs
- let len = length cmdNoSpc
+ let cmdNoSpc = T.dropWhileEnd isSpace cs
+ let len = T.length cmdNoSpc
count len anyChar
return cmdNoSpc
_ -> mzero
--- Taken from Data.OldList.
-dropWhileEnd :: (a -> Bool) -> [a] -> [a]
-dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) []
-
exportSnippet :: PandocMonad m => OrgParser m (F Inlines)
exportSnippet = try $ do
string "@@"
- format <- many1Till (alphaNum <|> char '-') (char ':')
- snippet <- manyTill anyChar (try $ string "@@")
+ format <- many1TillChar (alphaNum <|> char '-') (char ':')
+ snippet <- manyTillChar anyChar (try $ string "@@")
returnF $ B.rawInline format snippet
macro :: PandocMonad m => OrgParser m (F Inlines)
@@ -845,7 +843,7 @@ macro = try $ do
recursionDepth <- orgStateMacroDepth <$> getState
guard $ recursionDepth < 15
string "{{{"
- name <- many alphaNum
+ name <- manyChar alphaNum
args <- ([] <$ string "}}}")
<|> char '(' *> argument `sepBy` char ',' <* eoa
expander <- lookupMacro name <$> getState
@@ -857,7 +855,7 @@ macro = try $ do
updateState $ \s -> s { orgStateMacroDepth = recursionDepth }
return res
where
- argument = many $ notFollowedBy eoa *> noneOf ","
+ argument = manyChar $ notFollowedBy eoa *> noneOf ","
eoa = string ")}}}"
smart :: PandocMonad m => OrgParser m (F Inlines)
diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs
index 0a388403e..811a5b974 100644
--- a/src/Text/Pandoc/Readers/Org/Meta.hs
+++ b/src/Text/Pandoc/Readers/Org/Meta.hs
@@ -1,6 +1,7 @@
{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.Meta
Copyright : Copyright (C) 2014-2019 Albert Krewinkel
@@ -30,11 +31,12 @@ import Text.Pandoc.Definition
import Text.Pandoc.Shared (safeRead)
import Control.Monad (mzero, void, when)
-import Data.Char (toLower)
import Data.List (intersperse)
import Data.Maybe (fromMaybe)
import qualified Data.Map as M
import qualified Data.Set as Set
+import Data.Text (Text)
+import qualified Data.Text as T
import Network.HTTP (urlEncode)
-- | Returns the current meta, respecting export options.
@@ -47,7 +49,7 @@ metaExport = do
. (if exportWithEmail settings then id else removeMeta "email")
<$> orgStateMeta st
-removeMeta :: String -> Meta -> Meta
+removeMeta :: Text -> Meta -> Meta
removeMeta key meta' =
let metaMap = unMeta meta'
in Meta $ M.delete key metaMap
@@ -60,18 +62,18 @@ metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine)
declarationLine :: PandocMonad m => OrgParser m ()
declarationLine = try $ do
- key <- map toLower <$> metaKey
+ key <- T.toLower <$> metaKey
(key', value) <- metaValue key
let addMetaValue st =
st { orgStateMeta = B.setMeta key' <$> value <*> orgStateMeta st }
when (key' /= "results") $ updateState addMetaValue
-metaKey :: Monad m => OrgParser m String
-metaKey = map toLower <$> many1 (noneOf ": \n\r")
- <* char ':'
- <* skipSpaces
+metaKey :: Monad m => OrgParser m Text
+metaKey = T.toLower <$> many1Char (noneOf ": \n\r")
+ <* char ':'
+ <* skipSpaces
-metaValue :: PandocMonad m => String -> OrgParser m (String, F MetaValue)
+metaValue :: PandocMonad m => Text -> OrgParser m (Text, F MetaValue)
metaValue key =
let inclKey = "header-includes"
in case key of
@@ -88,7 +90,7 @@ metaValue key =
-- Org-mode expects class options to contain the surrounding brackets,
-- pandoc does not.
"latex_class_options" -> ("classoption",) <$>
- metaModifiedString (filter (`notElem` "[]"))
+ metaModifiedString (T.filter (`notElem` ("[]" :: String)))
"html_head" -> (inclKey,) <$>
accumulatingList inclKey (metaExportSnippet "html")
_ -> (key,) <$> metaString
@@ -98,25 +100,25 @@ metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline
metaInlinesCommaSeparated :: PandocMonad m => OrgParser m (F MetaValue)
metaInlinesCommaSeparated = do
- itemStrs <- many1 (noneOf ",\n") `sepBy1` char ','
+ itemStrs <- many1Char (noneOf ",\n") `sepBy1` char ','
newline
- items <- mapM (parseFromString inlinesTillNewline . (++ "\n")) itemStrs
+ items <- mapM (parseFromString inlinesTillNewline . (<> "\n")) itemStrs
let toMetaInlines = MetaInlines . B.toList
return $ MetaList . map toMetaInlines <$> sequence items
metaString :: Monad m => OrgParser m (F MetaValue)
metaString = metaModifiedString id
-metaModifiedString :: Monad m => (String -> String) -> OrgParser m (F MetaValue)
+metaModifiedString :: Monad m => (Text -> Text) -> OrgParser m (F MetaValue)
metaModifiedString f = return . MetaString . f <$> anyLine
-- | Read an format specific meta definition
-metaExportSnippet :: Monad m => String -> OrgParser m (F MetaValue)
+metaExportSnippet :: Monad m => Text -> OrgParser m (F MetaValue)
metaExportSnippet format =
return . MetaInlines . B.toList . B.rawInline format <$> anyLine
-- | Accumulate the result of the @parser@ in a list under @key@.
-accumulatingList :: Monad m => String
+accumulatingList :: Monad m => Text
-> OrgParser m (F MetaValue)
-> OrgParser m (F MetaValue)
accumulatingList key p = do
@@ -147,33 +149,33 @@ optionLine = try $ do
"pandoc-emphasis-post" -> emphChars >>= updateState . setEmphasisPostChar
_ -> mzero
-addLinkFormat :: Monad m => String
- -> (String -> String)
+addLinkFormat :: Monad m => Text
+ -> (Text -> Text)
-> OrgParser m ()
addLinkFormat key formatter = updateState $ \s ->
let fs = orgStateLinkFormatters s
in s{ orgStateLinkFormatters = M.insert key formatter fs }
-parseLinkFormat :: Monad m => OrgParser m (String, String -> String)
+parseLinkFormat :: Monad m => OrgParser m (Text, Text -> Text)
parseLinkFormat = try $ do
- linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces
+ linkType <- T.cons <$> letter <*> manyChar (alphaNum <|> oneOf "-_") <* skipSpaces
linkSubst <- parseFormat
return (linkType, linkSubst)
-- | An ad-hoc, single-argument-only implementation of a printf-style format
-- parser.
-parseFormat :: Monad m => OrgParser m (String -> String)
+parseFormat :: Monad m => OrgParser m (Text -> Text)
parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend
where
-- inefficient, but who cares
- replacePlain = try $ (\x -> concat . flip intersperse x)
+ replacePlain = try $ (\x -> T.concat . flip intersperse x)
<$> sequence [tillSpecifier 's', rest]
- replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode)
+ replaceUrl = try $ (\x -> T.concat . flip intersperse x . T.pack . urlEncode . T.unpack)
<$> sequence [tillSpecifier 'h', rest]
- justAppend = try $ (++) <$> rest
+ justAppend = try $ (<>) <$> rest
- rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r")
- tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))
+ rest = manyTillChar anyChar (eof <|> () <$ oneOf "\n\r")
+ tillSpecifier c = manyTillChar (noneOf "\n\r") (try $ string ('%':c:""))
tagList :: Monad m => OrgParser m [Tag]
tagList = do
@@ -231,41 +233,41 @@ todoSequence = try $ do
(x:xs) -> return $ keywordsToSequence (reverse xs) [x]
where
- todoKeywords :: Monad m => OrgParser m [String]
+ todoKeywords :: Monad m => OrgParser m [Text]
todoKeywords = try $
- let keyword = many1 nonspaceChar <* skipSpaces
+ let keyword = many1Char nonspaceChar <* skipSpaces
endOfKeywords = todoDoneSep <|> void newline
in manyTill keyword (lookAhead endOfKeywords)
todoDoneSep :: Monad m => OrgParser m ()
todoDoneSep = void . try $ skipSpaces *> char '|' <* skipSpaces1
- keywordsToSequence :: [String] -> [String] -> TodoSequence
+ keywordsToSequence :: [Text] -> [Text] -> TodoSequence
keywordsToSequence todo done =
let todoMarkers = map (TodoMarker Todo) todo
doneMarkers = map (TodoMarker Done) done
in todoMarkers ++ doneMarkers
-macroDefinition :: Monad m => OrgParser m (String, [String] -> String)
+macroDefinition :: Monad m => OrgParser m (Text, [Text] -> Text)
macroDefinition = try $ do
- macroName <- many1 nonspaceChar <* skipSpaces
+ macroName <- many1Char nonspaceChar <* skipSpaces
firstPart <- expansionPart
(elemOrder, parts) <- unzip <$> many ((,) <$> placeholder <*> expansionPart)
let expander = mconcat . alternate (firstPart:parts) . reorder elemOrder
return (macroName, expander)
where
placeholder :: Monad m => OrgParser m Int
- placeholder = try . fmap (fromMaybe 1 . safeRead) $ char '$' *> many1 digit
+ placeholder = try . fmap (fromMaybe 1 . safeRead) $ char '$' *> many1Char digit
- expansionPart :: Monad m => OrgParser m String
- expansionPart = try $ many (notFollowedBy placeholder *> noneOf "\n\r")
+ expansionPart :: Monad m => OrgParser m Text
+ expansionPart = try $ manyChar (notFollowedBy placeholder *> noneOf "\n\r")
alternate :: [a] -> [a] -> [a]
alternate [] ys = ys
alternate xs [] = xs
alternate (x:xs) (y:ys) = x : y : alternate xs ys
- reorder :: [Int] -> [String] -> [String]
+ reorder :: [Int] -> [Text] -> [Text]
reorder perm xs =
let element n = take 1 $ drop (n - 1) xs
in concatMap element perm
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
index d6dde8b22..cf5583b76 100644
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@@ -1,6 +1,7 @@
-{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.ParserState
Copyright : Copyright (C) 2014-2019 Albert Krewinkel
@@ -63,16 +64,16 @@ import Text.Pandoc.Readers.LaTeX.Types (Macro)
type F = Future OrgParserState
-- | An inline note / footnote containing the note key and its (inline) value.
-type OrgNoteRecord = (String, F Blocks)
+type OrgNoteRecord = (Text, F Blocks)
-- | Table of footnotes
type OrgNoteTable = [OrgNoteRecord]
-- | Map of functions for link transformations. The map key is refers to the
-- link-type, the corresponding function transforms the given link string.
-type OrgLinkFormatters = M.Map String (String -> String)
+type OrgLinkFormatters = M.Map Text (Text -> Text)
-- | Macro expander function
-type MacroExpander = [String] -> String
+type MacroExpander = [Text] -> Text
-- | Tag
-newtype Tag = Tag { fromTag :: String }
+newtype Tag = Tag { fromTag :: Text }
deriving (Show, Eq, Ord)
-- | The states in which a todo item can be
@@ -82,7 +83,7 @@ data TodoState = Todo | Done
-- | A ToDo keyword like @TODO@ or @DONE@.
data TodoMarker = TodoMarker
{ todoMarkerState :: TodoState
- , todoMarkerName :: String
+ , todoMarkerName :: Text
}
deriving (Show, Eq)
@@ -91,7 +92,7 @@ type TodoSequence = [TodoMarker]
-- | Org-mode parser state
data OrgParserState = OrgParserState
- { orgStateAnchorIds :: [String]
+ { orgStateAnchorIds :: [Text]
, orgStateEmphasisCharStack :: [Char]
, orgStateEmphasisPreChars :: [Char] -- ^ Chars allowed to occur before
-- emphasis; spaces and newlines are
@@ -102,13 +103,13 @@ data OrgParserState = OrgParserState
, orgStateExcludeTags :: Set.Set Tag
, orgStateExcludeTagsChanged :: Bool
, orgStateExportSettings :: ExportSettings
- , orgStateIdentifiers :: Set.Set String
- , orgStateIncludeFiles :: [String]
+ , orgStateIdentifiers :: Set.Set Text
+ , orgStateIncludeFiles :: [Text]
, orgStateLastForbiddenCharPos :: Maybe SourcePos
, orgStateLastPreCharPos :: Maybe SourcePos
, orgStateLastStrPos :: Maybe SourcePos
, orgStateLinkFormatters :: OrgLinkFormatters
- , orgStateMacros :: M.Map String MacroExpander
+ , orgStateMacros :: M.Map Text MacroExpander
, orgStateMacroDepth :: Int
, orgStateMeta :: F Meta
, orgStateNotes' :: OrgNoteTable
@@ -212,10 +213,10 @@ activeTodoSequences st =
activeTodoMarkers :: OrgParserState -> TodoSequence
activeTodoMarkers = concat . activeTodoSequences
-lookupMacro :: String -> OrgParserState -> Maybe MacroExpander
+lookupMacro :: Text -> OrgParserState -> Maybe MacroExpander
lookupMacro macroName = M.lookup macroName . orgStateMacros
-registerMacro :: (String, MacroExpander) -> OrgParserState -> OrgParserState
+registerMacro :: (Text, MacroExpander) -> OrgParserState -> OrgParserState
registerMacro (name, expander) st =
let curMacros = orgStateMacros st
in st{ orgStateMacros = M.insert name expander curMacros }
@@ -236,7 +237,7 @@ data ArchivedTreesOption =
-- These settings can be changed via OPTIONS statements.
data ExportSettings = ExportSettings
{ exportArchivedTrees :: ArchivedTreesOption -- ^ How to treat archived trees
- , exportDrawers :: Either [String] [String]
+ , exportDrawers :: Either [Text] [Text]
-- ^ Specify drawer names which should be exported. @Left@ names are
-- explicitly excluded from the resulting output while @Right@ means that
-- only the listed drawer names should be included.
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index 24aa0779d..718925120 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -32,7 +32,13 @@ module Text.Pandoc.Readers.Org.Parsing
, orgTagWordChar
-- * Re-exports from Text.Pandoc.Parser
, ParserContext (..)
+ , textStr
+ , countChar
+ , manyChar
+ , many1Char
+ , manyTillChar
, many1Till
+ , many1TillChar
, notFollowedBy'
, spaceChar
, nonspaceChar
@@ -98,6 +104,7 @@ module Text.Pandoc.Readers.Org.Parsing
) where
import Prelude
+import Data.Text (Text)
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Parsing hiding (F, anyLine, blanklines, newline,
@@ -108,14 +115,14 @@ import Control.Monad (guard)
import Control.Monad.Reader (ReaderT)
-- | The parser used to read org files.
-type OrgParser m = ParserT [Char] OrgParserState (ReaderT OrgParserLocal m)
+type OrgParser m = ParserT Text OrgParserState (ReaderT OrgParserLocal m)
--
-- Adaptions and specializations of parsing utilities
--
-- | Parse any line of text
-anyLine :: Monad m => OrgParser m String
+anyLine :: Monad m => OrgParser m Text
anyLine =
P.anyLine
<* updateLastPreCharPos
@@ -123,7 +130,7 @@ anyLine =
-- | Like @'Text.Pandoc.Parsing'@, but resets the position of the last character
-- allowed before emphasised text.
-parseFromString :: Monad m => OrgParser m a -> String -> OrgParser m a
+parseFromString :: Monad m => OrgParser m a -> Text -> OrgParser m a
parseFromString parser str' = do
updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
result <- P.parseFromString parser str'
@@ -142,7 +149,7 @@ newline =
<* updateLastForbiddenCharPos
-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
-blanklines :: Monad m => OrgParser m [Char]
+blanklines :: Monad m => OrgParser m Text
blanklines =
P.blanklines
<* updateLastPreCharPos
@@ -192,21 +199,21 @@ updateLastPreCharPos = getPosition >>= \p ->
--
-- | Read the key of a plist style key-value list.
-orgArgKey :: Monad m => OrgParser m String
+orgArgKey :: Monad m => OrgParser m Text
orgArgKey = try $
skipSpaces *> char ':'
- *> many1 orgArgWordChar
+ *> many1Char orgArgWordChar
-- | Read the value of a plist style key-value list.
-orgArgWord :: Monad m => OrgParser m String
-orgArgWord = many1 orgArgWordChar
+orgArgWord :: Monad m => OrgParser m Text
+orgArgWord = many1Char orgArgWordChar
-- | Chars treated as part of a word in plists.
orgArgWordChar :: Monad m => OrgParser m Char
orgArgWordChar = alphaNum <|> oneOf "-_"
-orgTagWord :: Monad m => OrgParser m String
-orgTagWord = many1 orgTagWordChar
+orgTagWord :: Monad m => OrgParser m Text
+orgTagWord = many1Char orgTagWordChar
orgTagWordChar :: Monad m => OrgParser m Char
orgTagWordChar = alphaNum <|> oneOf "@%#_"
diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs
index 34f958373..be0a2068e 100644
--- a/src/Text/Pandoc/Readers/Org/Shared.hs
+++ b/src/Text/Pandoc/Readers/Org/Shared.hs
@@ -10,7 +10,7 @@
Utility functions used in other Pandoc Org modules.
-}
module Text.Pandoc.Readers.Org.Shared
- ( cleanLinkString
+ ( cleanLinkText
, isImageFilename
, originalLang
, translateLang
@@ -19,44 +19,44 @@ module Text.Pandoc.Readers.Org.Shared
import Prelude
import Data.Char (isAlphaNum)
-import Data.List (isPrefixOf)
+import Data.Text (Text)
+import qualified Data.Text as T
import System.FilePath (isValid, takeExtension)
-
+import Text.Pandoc.Shared (elemText)
-- | Check whether the given string looks like the path to of URL of an image.
-isImageFilename :: String -> Bool
-isImageFilename fp = hasImageExtension && (isValid fp || isKnownProtocolUri)
+isImageFilename :: Text -> Bool
+isImageFilename fp = hasImageExtension && (isValid (T.unpack fp) || isKnownProtocolUri)
where
- hasImageExtension = takeExtension fp `elem` imageExtensions
- isKnownProtocolUri = any (\x -> (x ++ "://") `isPrefixOf` fp) protocols
+ hasImageExtension = takeExtension (T.unpack fp) `elem` imageExtensions
+ isKnownProtocolUri = any (\x -> (x <> "://") `T.isPrefixOf` fp) protocols
imageExtensions = [ ".jpeg", ".jpg", ".png", ".gif", ".svg" ]
protocols = [ "file", "http", "https" ]
-- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if
-- the string does not appear to be a link.
-cleanLinkString :: String -> Maybe String
-cleanLinkString s =
- case s of
- '/':_ -> Just $ "file://" ++ s -- absolute path
- '.':'/':_ -> Just s -- relative path
- '.':'.':'/':_ -> Just s -- relative path
- -- Relative path or URL (file schema)
- 'f':'i':'l':'e':':':s' -> Just $ if "//" `isPrefixOf` s' then s else s'
- _ -> if isUrl s then Just s else Nothing
- where
- isUrl :: String -> Bool
- isUrl cs =
- let (scheme, path) = break (== ':') cs
- in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
- && not (null path)
+cleanLinkText :: Text -> Maybe Text
+cleanLinkText s
+ | Just _ <- T.stripPrefix "/" s = Just $ "file://" <> s -- absolute path
+ | Just _ <- T.stripPrefix "./" s = Just s -- relative path
+ | Just _ <- T.stripPrefix "../" s = Just s -- relative path
+ -- Relative path or URL (file schema)
+ | Just s' <- T.stripPrefix "file:" s = Just $ if "//" `T.isPrefixOf` s' then s else s'
+ | otherwise = if isUrl s then Just s else Nothing
+ where
+ isUrl :: Text -> Bool
+ isUrl cs =
+ let (scheme, path) = T.break (== ':') cs
+ in T.all (\c -> isAlphaNum c || c `elemText` ".-") scheme
+ && not (T.null path)
-- | Creates an key-value pair marking the original language name specified for
-- a piece of source code.
-- | Creates an key-value attributes marking the original language name
-- specified for a piece of source code.
-originalLang :: String -> [(String, String)]
+originalLang :: Text -> [(Text, Text)]
originalLang lang =
let transLang = translateLang lang
in if transLang == lang
@@ -66,7 +66,7 @@ originalLang lang =
-- | Translate from Org-mode's programming language identifiers to those used
-- by Pandoc. This is useful to allow for proper syntax highlighting in
-- Pandoc output.
-translateLang :: String -> String
+translateLang :: Text -> Text
translateLang cs =
case cs of
"C" -> "c"
@@ -79,5 +79,5 @@ translateLang cs =
"sqlite" -> "sql"
_ -> cs
-exportsCode :: [(String, String)] -> Bool
+exportsCode :: [(Text, Text)] -> Bool
exportsCode = maybe True (`elem` ["code", "both"]) . lookup "exports"
diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs
index 7e29caf28..d2fba4449 100644
--- a/src/Text/Pandoc/Readers/RST.hs
+++ b/src/Text/Pandoc/Readers/RST.hs
@@ -1,7 +1,8 @@
-{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.RST
Copyright : Copyright (C) 2006-2019 John MacFarlane
@@ -19,9 +20,8 @@ import Control.Arrow (second)
import Control.Monad (forM_, guard, liftM, mplus, mzero, when)
import Control.Monad.Except (throwError)
import Control.Monad.Identity (Identity (..))
-import Data.Char (isHexDigit, isSpace, toLower, toUpper, isAlphaNum)
-import Data.List (deleteFirstsBy, elemIndex, intercalate, isInfixOf, isSuffixOf,
- nub, sort, transpose)
+import Data.Char (isHexDigit, isSpace, toUpper, isAlphaNum)
+import Data.List (deleteFirstsBy, elemIndex, nub, sort, transpose)
import qualified Data.Map as M
import Data.Maybe (fromMaybe)
import Data.Sequence (ViewR (..), viewr)
@@ -47,16 +47,16 @@ import Text.Printf (printf)
-- | Parse reStructuredText string and return Pandoc document.
readRST :: PandocMonad m
=> ReaderOptions -- ^ Reader options
- -> Text -- ^ String to parse (assuming @'\n'@ line endings)
+ -> Text -- ^ Text to parse (assuming @'\n'@ line endings)
-> m Pandoc
readRST opts s = do
parsed <- readWithM parseRST def{ stateOptions = opts }
- (T.unpack (crFilter s) ++ "\n\n")
+ (crFilter s <> "\n\n")
case parsed of
Right result -> return result
Left e -> throwError e
-type RSTParser m = ParserT [Char] ParserState m
+type RSTParser m = ParserT Text ParserState m
--
-- Constants and data structure definitions
@@ -113,7 +113,7 @@ titleTransform (bs, meta) =
metaFromDefList :: [([Inline], [[Block]])] -> Meta -> Meta
metaFromDefList ds meta = adjustAuthors $ foldr f meta ds
- where f (k,v) = setMeta (map toLower $ stringify k) (mconcat $ map fromList v)
+ where f (k,v) = setMeta (T.toLower $ stringify k) (mconcat $ map fromList v)
adjustAuthors (Meta metamap) = Meta $ M.adjust splitAuthors "author"
$ M.adjust toPlain "date"
$ M.adjust toPlain "title"
@@ -136,13 +136,13 @@ metaFromDefList ds meta = adjustAuthors $ foldr f meta ds
isSp LineBreak = True
isSp _ = False
splitOnSemi = splitBy (==Str ";")
- factorSemi (Str []) = []
- factorSemi (Str s) = case break (==';') s of
- (xs,[]) -> [Str xs]
- (xs,';':ys) -> Str xs : Str ";" :
- factorSemi (Str ys)
- (xs,ys) -> Str xs :
- factorSemi (Str ys)
+ factorSemi (Str "") = []
+ factorSemi (Str s) = case T.break (==';') s of
+ (xs,"") -> [Str xs]
+ (xs,T.uncons -> Just (';',ys)) -> Str xs : Str ";" :
+ factorSemi (Str ys)
+ (xs,ys) -> Str xs :
+ factorSemi (Str ys)
factorSemi x = [x]
parseRST :: PandocMonad m => RSTParser m Pandoc
@@ -151,7 +151,7 @@ parseRST = do
startPos <- getPosition
-- go through once just to get list of reference keys and notes
-- docMinusKeys is the raw document with blanks where the keys were...
- docMinusKeys <- concat <$>
+ docMinusKeys <- T.concat <$>
manyTill (referenceKey <|> anchorDef <|>
noteBlock <|> citationBlock <|>
(snd <$> withRaw comment) <|>
@@ -180,7 +180,7 @@ parseRST = do
return $ Pandoc meta' (blocks' ++ refBlock)
parseCitation :: PandocMonad m
- => (String, String) -> RSTParser m (Inlines, [Blocks])
+ => (Text, Text) -> RSTParser m (Inlines, [Blocks])
parseCitation (ref, raw) = do
contents <- parseFromString' parseBlocks raw
return (B.spanWith (ref, ["citation-label"], []) (B.str ref),
@@ -215,23 +215,23 @@ block = choice [ codeBlock
-- field list
--
-rawFieldListItem :: Monad m => Int -> RSTParser m (String, String)
+rawFieldListItem :: Monad m => Int -> RSTParser m (Text, Text)
rawFieldListItem minIndent = try $ do
indent <- length <$> many (char ' ')
guard $ indent >= minIndent
char ':'
- name <- many1Till (noneOf "\n") (char ':')
+ name <- many1TillChar (noneOf "\n") (char ':')
(() <$ lookAhead newline) <|> skipMany1 spaceChar
first <- anyLine
rest <- option "" $ try $ do lookAhead (count indent (char ' ') >> spaceChar)
indentedBlock
- let raw = (if null first then "" else (first ++ "\n")) ++ rest ++ "\n"
+ let raw = (if T.null first then "" else (first <> "\n")) <> rest <> "\n"
return (name, raw)
fieldListItem :: PandocMonad m => Int -> RSTParser m (Inlines, [Blocks])
fieldListItem minIndent = try $ do
(name, raw) <- rawFieldListItem minIndent
- term <- parseInlineFromString name
+ term <- parseInlineFromText name
contents <- parseFromString' parseBlocks raw
optional blanklines
return (term, [contents])
@@ -251,12 +251,12 @@ fieldList = try $ do
lineBlock :: PandocMonad m => RSTParser m Blocks
lineBlock = try $ do
lines' <- lineBlockLines
- lines'' <- mapM parseInlineFromString lines'
+ lines'' <- mapM parseInlineFromText lines'
return $ B.lineBlock lines''
-lineBlockDirective :: PandocMonad m => String -> RSTParser m Blocks
+lineBlockDirective :: PandocMonad m => Text -> RSTParser m Blocks
lineBlockDirective body = do
- lines' <- mapM parseInlineFromString $ lines $ stripTrailingNewlines body
+ lines' <- mapM parseInlineFromText $ T.lines $ stripTrailingNewlines body
return $ B.lineBlock lines'
--
@@ -271,9 +271,9 @@ para = try $ do
newline
blanklines
case viewr (B.unMany result) of
- ys :> Str xs | "::" `isSuffixOf` xs -> do
+ ys :> Str xs | "::" `T.isSuffixOf` xs -> do
raw <- option mempty codeBlockBody
- return $ B.para (B.Many ys <> B.str (take (length xs - 1) xs))
+ return $ B.para (B.Many ys <> B.str (T.take (T.length xs - 1) xs))
<> raw
_ -> return (B.para result)
@@ -349,7 +349,7 @@ singleHeader' = try $ do
-- hrule block
--
-hrule :: Monad m => ParserT [Char] st m Blocks
+hrule :: Monad m => ParserT Text st m Blocks
hrule = try $ do
chr <- oneOf underlineChars
count 3 (char chr)
@@ -364,7 +364,7 @@ hrule = try $ do
-- read a line indented by a given string
indentedLine :: (HasReaderOptions st, Monad m)
- => Int -> ParserT [Char] st m [Char]
+ => Int -> ParserT Text st m Text
indentedLine indents = try $ do
lookAhead spaceChar
gobbleAtMostSpaces indents
@@ -373,29 +373,29 @@ indentedLine indents = try $ do
-- one or more indented lines, possibly separated by blank lines.
-- any amount of indentation will work.
indentedBlock :: (HasReaderOptions st, Monad m)
- => ParserT [Char] st m [Char]
+ => ParserT Text st m Text
indentedBlock = try $ do
indents <- length <$> lookAhead (many1 spaceChar)
lns <- many1 $ try $ do b <- option "" blanklines
l <- indentedLine indents
- return (b ++ l)
+ return (b <> l)
optional blanklines
- return $ unlines lns
+ return $ T.unlines lns
-quotedBlock :: Monad m => ParserT [Char] st m [Char]
+quotedBlock :: Monad m => ParserT Text st m Text
quotedBlock = try $ do
quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
lns <- many1 $ lookAhead (char quote) >> anyLine
optional blanklines
- return $ unlines lns
+ return $ T.unlines lns
-codeBlockStart :: Monad m => ParserT [Char] st m Char
+codeBlockStart :: Monad m => ParserT Text st m Char
codeBlockStart = string "::" >> blankline >> blankline
-codeBlock :: (HasReaderOptions st, Monad m) => ParserT [Char] st m Blocks
+codeBlock :: (HasReaderOptions st, Monad m) => ParserT Text st m Blocks
codeBlock = try $ codeBlockStart >> codeBlockBody
-codeBlockBody :: (HasReaderOptions st, Monad m) => ParserT [Char] st m Blocks
+codeBlockBody :: (HasReaderOptions st, Monad m) => ParserT Text st m Blocks
codeBlockBody = try $ B.codeBlock . stripTrailingNewlines <$>
(indentedBlock <|> quotedBlock)
@@ -407,24 +407,24 @@ lhsCodeBlock = try $ do
lns <- latexCodeBlock <|> birdCodeBlock
blanklines
return $ B.codeBlockWith ("", ["haskell","literate"], [])
- $ intercalate "\n" lns
+ $ T.intercalate "\n" lns
-latexCodeBlock :: Monad m => ParserT [Char] st m [[Char]]
+latexCodeBlock :: Monad m => ParserT Text st m [Text]
latexCodeBlock = try $ do
try (latexBlockLine "\\begin{code}")
many1Till anyLine (try $ latexBlockLine "\\end{code}")
where
latexBlockLine s = skipMany spaceChar >> string s >> blankline
-birdCodeBlock :: Monad m => ParserT [Char] st m [[Char]]
+birdCodeBlock :: Monad m => ParserT Text st m [Text]
birdCodeBlock = filterSpace <$> many1 birdTrackLine
where filterSpace lns =
-- if (as is normal) there is always a space after >, drop it
- if all (\ln -> null ln || take 1 ln == " ") lns
- then map (drop 1) lns
+ if all (\ln -> T.null ln || T.take 1 ln == " ") lns
+ then map (T.drop 1) lns
else lns
-birdTrackLine :: Monad m => ParserT [Char] st m [Char]
+birdTrackLine :: Monad m => ParserT Text st m Text
birdTrackLine = char '>' >> anyLine
--
@@ -435,7 +435,7 @@ blockQuote :: PandocMonad m => RSTParser m Blocks
blockQuote = do
raw <- indentedBlock
-- parse the extracted block, which may contain various block elements:
- contents <- parseFromString' parseBlocks $ raw ++ "\n\n"
+ contents <- parseFromString' parseBlocks $ raw <> "\n\n"
return $ B.blockQuote contents
{-
@@ -445,12 +445,12 @@ encoding
-}
includeDirective :: PandocMonad m
- => String -> [(String, String)] -> String
+ => Text -> [(Text, Text)] -> Text
-> RSTParser m Blocks
includeDirective top fields body = do
let f = trim top
- guard $ not (null f)
- guard $ null (trim body)
+ guard $ not (T.null f)
+ guard $ T.null (trim body)
-- options
let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead
let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead
@@ -458,11 +458,11 @@ includeDirective top fields body = do
oldInput <- getInput
containers <- stateContainers <$> getState
when (f `elem` containers) $
- throwError $ PandocParseError $ "Include file loop at " ++ show oldPos
+ throwError $ PandocParseError $ "Include file loop at " <> tshow oldPos
updateState $ \s -> s{ stateContainers = f : stateContainers s }
- mbContents <- readFileFromDirs ["."] f
+ mbContents <- readFileFromDirs ["."] $ T.unpack f
contentLines <- case mbContents of
- Just s -> return $ lines s
+ Just s -> return $ T.lines s
Nothing -> do
logMessage $ CouldNotLoadIncludeFile f oldPos
return []
@@ -478,23 +478,23 @@ includeDirective top fields body = do
let contentLines' = drop (startLine' - 1)
$ take (endLine' - 1) contentLines
let contentLines'' = (case trim <$> lookup "end-before" fields of
- Just patt -> takeWhile (not . (patt `isInfixOf`))
+ Just patt -> takeWhile (not . (patt `T.isInfixOf`))
Nothing -> id) .
(case trim <$> lookup "start-after" fields of
Just patt -> drop 1 .
- dropWhile (not . (patt `isInfixOf`))
+ dropWhile (not . (patt `T.isInfixOf`))
Nothing -> id) $ contentLines'
- let contents' = unlines contentLines'' ++ "\n"
+ let contents' = T.unlines contentLines'' <> "\n"
case lookup "code" fields of
Just lang -> do
let numberLines = lookup "number-lines" fields
- let classes = maybe [] words (lookup "class" fields)
+ let classes = maybe [] T.words (lookup "class" fields)
let ident = maybe "" trimr $ lookup "name" fields
codeblock ident classes numberLines (trimr lang) contents' False
Nothing -> case lookup "literal" fields of
Just _ -> return $ B.rawBlock "rst" contents'
Nothing -> do
- setPosition $ newPos f 1 1
+ setPosition $ newPos (T.unpack f) 1 1
setInput contents'
bs <- optional blanklines >>
(mconcat <$> many block)
@@ -519,14 +519,14 @@ definitionListItem = try $ do
term <- trimInlines . mconcat <$> many1Till inline endline
raw <- indentedBlock
-- parse the extracted block, which may contain various block elements:
- contents <- parseFromString' parseBlocks $ raw ++ "\n"
+ contents <- parseFromString' parseBlocks $ raw <> "\n"
return (term, [contents])
definitionList :: PandocMonad m => RSTParser m Blocks
definitionList = B.definitionList <$> many1 definitionListItem
-- parses bullet list start and returns its length (inc. following whitespace)
-bulletListStart :: Monad m => ParserT [Char] st m Int
+bulletListStart :: Monad m => ParserT Text st m Int
bulletListStart = try $ do
notFollowedBy' hrule -- because hrules start out just like lists
marker <- oneOf bulletListMarkers
@@ -543,7 +543,7 @@ orderedListStart style delim = try $ do
return $ markerLen + length white
-- parse a line of a list item
-listLine :: Monad m => Int -> RSTParser m [Char]
+listLine :: Monad m => Int -> RSTParser m Text
listLine markerLength = try $ do
notFollowedBy blankline
indentWith markerLength
@@ -551,21 +551,21 @@ listLine markerLength = try $ do
-- parse raw text for one list item, excluding start marker and continuations
rawListItem :: Monad m => RSTParser m Int
- -> RSTParser m (Int, [Char])
+ -> RSTParser m (Int, Text)
rawListItem start = try $ do
markerLength <- start
firstLine <- anyLineNewline
restLines <- many (listLine markerLength)
- return (markerLength, firstLine ++ concat restLines)
+ return (markerLength, firstLine <> T.concat restLines)
-- continuation of a list item - indented and separated by blankline or
-- (in compact lists) endline.
-- Note: nested lists are parsed as continuations.
-listContinuation :: Monad m => Int -> RSTParser m [Char]
+listContinuation :: Monad m => Int -> RSTParser m Text
listContinuation markerLength = try $ do
- blanks <- many1 blankline
+ blanks <- many1Char blankline
result <- many1 (listLine markerLength)
- return $ blanks ++ concat result
+ return $ blanks <> T.concat result
listItem :: PandocMonad m
=> RSTParser m Int
@@ -581,7 +581,7 @@ listItem start = try $ do
let oldContext = stateParserContext state
setState $ state {stateParserContext = ListItemState}
-- parse the extracted block, which may itself contain block elements
- parsed <- parseFromString' parseBlocks $ concat (first:rest) ++ "\n"
+ parsed <- parseFromString' parseBlocks $ T.concat (first:rest) <> "\n"
updateState (\st -> st {stateParserContext = oldContext})
return $ case B.toList parsed of
[Para xs] ->
@@ -617,9 +617,9 @@ comment = try $ do
optional indentedBlock
return mempty
-directiveLabel :: Monad m => RSTParser m String
-directiveLabel = map toLower
- <$> many1Till (letter <|> char '-') (try $ string "::")
+directiveLabel :: Monad m => RSTParser m Text
+directiveLabel = T.toLower
+ <$> many1TillChar (letter <|> char '-') (try $ string "::")
directive :: PandocMonad m => RSTParser m Blocks
directive = try $ do
@@ -631,7 +631,7 @@ directive' = do
skipMany1 spaceChar
label <- directiveLabel
skipMany spaceChar
- top <- many $ satisfy (/='\n')
+ top <- manyChar $ satisfy (/='\n')
<|> try (char '\n' <*
notFollowedBy' (rawFieldListItem 1) <*
many1 (char ' ') <*
@@ -644,35 +644,33 @@ directive' = do
else many $ rawFieldListItem fieldIndent
body <- option "" $ try $ blanklines >> indentedBlock
optional blanklines
- let body' = body ++ "\n\n"
+ let body' = body <> "\n\n"
name = trim $ fromMaybe "" (lookup "name" fields)
- classes = words $ maybe "" trim (lookup "class" fields)
+ classes = T.words $ maybe "" trim (lookup "class" fields)
keyvals = [(k, trim v) | (k, v) <- fields, k /= "name", k /= "class"]
imgAttr cl = (name, classes ++ alignClasses, widthAttr ++ heightAttr)
where
- alignClasses = words $ maybe "" trim (lookup cl fields) ++
- maybe "" (\x -> "align-" ++ trim x)
+ alignClasses = T.words $ maybe "" trim (lookup cl fields) <>
+ maybe "" (\x -> "align-" <> trim x)
(lookup "align" fields)
scale = case trim <$> lookup "scale" fields of
- Just v -> case reverse v of
- '%':vv ->
- case safeRead (reverse vv) of
- Just (percent :: Double)
- -> percent / 100.0
- Nothing -> 1.0
- _ ->
- case safeRead v of
- Just (s :: Double) -> s
- Nothing -> 1.0
- Nothing -> 1.0
+ Just v -> case T.unsnoc v of
+ Just (vv, '%') -> case safeRead vv of
+ Just (percent :: Double)
+ -> percent / 100.0
+ Nothing -> 1.0
+ _ -> case safeRead v of
+ Just (s :: Double) -> s
+ Nothing -> 1.0
+ Nothing -> 1.0
widthAttr = maybe [] (\x -> [("width",
- show $ scaleDimension scale x)])
+ tshow $ scaleDimension scale x)])
$ lookup "width" fields >>=
- (lengthToDim . filter (not . isSpace))
+ (lengthToDim . T.filter (not . isSpace))
heightAttr = maybe [] (\x -> [("height",
- show $ scaleDimension scale x)])
+ tshow $ scaleDimension scale x)])
$ lookup "height" fields >>=
- (lengthToDim . filter (not . isSpace))
+ (lengthToDim . T.filter (not . isSpace))
case label of
"include" -> includeDirective top fields body'
"table" -> tableDirective top fields body'
@@ -682,36 +680,37 @@ directive' = do
"raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body)
"role" -> addNewRole top $ map (second trim) fields
"container" -> B.divWith
- (name, "container" : words top ++ classes, []) <$>
+ (name, "container" : T.words top ++ classes, []) <$>
parseFromString' parseBlocks body'
"replace" -> B.para <$> -- consumed by substKey
- parseInlineFromString (trim top)
+ parseInlineFromText (trim top)
"unicode" -> B.para <$> -- consumed by substKey
- parseInlineFromString (trim $ unicodeTransform top)
+ parseInlineFromText (trim $ unicodeTransform top)
"compound" -> parseFromString' parseBlocks body'
"pull-quote" -> B.blockQuote <$> parseFromString' parseBlocks body'
"epigraph" -> B.blockQuote <$> parseFromString' parseBlocks body'
"highlights" -> B.blockQuote <$> parseFromString' parseBlocks body'
- "rubric" -> B.para . B.strong <$> parseInlineFromString top
+ "rubric" -> B.para . B.strong <$> parseInlineFromText top
_ | label `elem` ["attention","caution","danger","error","hint",
"important","note","tip","warning","admonition"] ->
- do bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body'
+ do bod <- parseFromString' parseBlocks $ top <> "\n\n" <> body'
let lab = case label of
- "admonition" -> mempty
- (l:ls) -> B.divWith ("",["title"],[])
- (B.para (B.str (toUpper l : ls)))
- [] -> mempty
+ "admonition" -> mempty
+ (T.uncons -> Just (l, ls))
+ -> B.divWith ("",["title"],[])
+ (B.para (B.str $ T.cons (toUpper l) ls))
+ _ -> mempty
return $ B.divWith (name,label:classes,keyvals) (lab <> bod)
"sidebar" ->
do let subtit = maybe "" trim $ lookup "subtitle" fields
- tit <- B.para . B.strong <$> parseInlineFromString
- (trim top ++ if null subtit
+ tit <- B.para . B.strong <$> parseInlineFromText
+ (trim top <> if T.null subtit
then ""
- else (": " ++ subtit))
+ else (": " <> subtit))
bod <- parseFromString' parseBlocks body'
return $ B.divWith (name,"sidebar":classes,keyvals) $ tit <> bod
"topic" ->
- do tit <- B.para . B.strong <$> parseInlineFromString top
+ do tit <- B.para . B.strong <$> parseInlineFromText top
bod <- parseFromString' parseBlocks body'
return $ B.divWith (name,"topic":classes,keyvals) $ tit <> bod
"default-role" -> mempty <$ updateState (\s ->
@@ -726,7 +725,7 @@ directive' = do
let attribs = (name, ["aafig"], map (second trimr) fields)
return $ B.codeBlockWith attribs $ stripTrailingNewlines body
"math" -> return $ B.para $ mconcat $ map B.displayMath
- $ toChunks $ top ++ "\n\n" ++ body
+ $ toChunks $ top <> "\n\n" <> body
"figure" -> do
(caption, legend) <- parseFromString' extractCaption body'
let src = escapeURI $ trim top
@@ -742,7 +741,7 @@ directive' = do
$ B.imageWith attr src "" alt
Nothing -> B.imageWith attr src "" alt
"class" -> do
- let attrs = (name, words (trim top), map (second trimr) fields)
+ let attrs = (name, T.words (trim top), map (second trimr) fields)
-- directive content or the first immediately following element
children <- case body of
"" -> block
@@ -750,12 +749,12 @@ directive' = do
return $ B.divWith attrs children
other -> do
pos <- getPosition
- logMessage $ SkippedContent (".. " ++ other) pos
- bod <- parseFromString' parseBlocks $ top ++ "\n\n" ++ body'
+ logMessage $ SkippedContent (".. " <> other) pos
+ bod <- parseFromString' parseBlocks $ top <> "\n\n" <> body'
return $ B.divWith (name, other:classes, keyvals) bod
tableDirective :: PandocMonad m
- => String -> [(String, String)] -> String -> RSTParser m Blocks
+ => Text -> [(Text, Text)] -> Text -> RSTParser m Blocks
tableDirective top fields body = do
bs <- parseFromString' parseBlocks body
case B.toList bs of
@@ -770,7 +769,7 @@ tableDirective top fields body = do
Just "grid" -> widths'
Just specs -> normWidths
$ map (fromMaybe (0 :: Double) . safeRead)
- $ splitBy (`elem` (" ," :: String)) specs
+ $ splitTextBy (`elem` (" ," :: String)) specs
Nothing -> widths'
-- align is not applicable since we can't represent whole table align
return $ B.singleton $ Table (B.toList title)
@@ -783,7 +782,7 @@ tableDirective top fields body = do
-- since Pandoc doesn't support a table with multiple header rows.
-- We don't need to parse :align: as it represents the whole table align.
listTableDirective :: PandocMonad m
- => String -> [(String, String)] -> String
+ => Text -> [(Text, Text)] -> Text
-> RSTParser m Blocks
listTableDirective top fields body = do
bs <- parseFromString' parseBlocks body
@@ -799,7 +798,7 @@ listTableDirective top fields body = do
widths = case trim <$> lookup "widths" fields of
Just "auto" -> replicate numOfCols 0
Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) $
- splitBy (`elem` (" ," :: String)) specs
+ splitTextBy (`elem` (" ," :: String)) specs
_ -> replicate numOfCols 0
return $ B.table title
(zip (replicate numOfCols AlignDefault) widths)
@@ -812,7 +811,7 @@ listTableDirective top fields body = do
normWidths ws = map (/ max 1 (sum ws)) ws
csvTableDirective :: PandocMonad m
- => String -> [(String, String)] -> String
+ => Text -> [(Text, Text)] -> Text
-> RSTParser m Blocks
csvTableDirective top fields rawcsv = do
let explicitHeader = trim <$> lookup "header" fields
@@ -820,14 +819,17 @@ csvTableDirective top fields rawcsv = do
csvDelim = case trim <$> lookup "delim" fields of
Just "tab" -> '\t'
Just "space" -> ' '
- Just [c] -> c
+ Just (T.unpack -> [c])
+ -> c
_ -> ','
, csvQuote = case trim <$> lookup "quote" fields of
- Just [c] -> c
- _ -> '"'
+ Just (T.unpack -> [c])
+ -> c
+ _ -> '"'
, csvEscape = case trim <$> lookup "escape" fields of
- Just [c] -> Just c
- _ -> Nothing
+ Just (T.unpack -> [c])
+ -> Just c
+ _ -> Nothing
, csvKeepSpace = case trim <$> lookup "keepspace" fields of
Just "true" -> True
_ -> False
@@ -840,16 +842,16 @@ csvTableDirective top fields rawcsv = do
lookup "file" fields `mplus` lookup "url" fields of
Just u -> do
(bs, _) <- fetchItem u
- return $ UTF8.toString bs
+ return $ UTF8.toText bs
Nothing -> return rawcsv
- let res = parseCSV opts (T.pack $ case explicitHeader of
- Just h -> h ++ "\n" ++ rawcsv'
- Nothing -> rawcsv')
+ let res = parseCSV opts (case explicitHeader of
+ Just h -> h <> "\n" <> rawcsv'
+ Nothing -> rawcsv')
case res of
Left e ->
throwError $ PandocParsecError "csv table" e
Right rawrows -> do
- let parseCell = parseFromString' (plain <|> return mempty) . T.unpack
+ let parseCell = parseFromString' (plain <|> return mempty)
let parseRow = mapM parseCell
rows <- mapM parseRow rawrows
let (headerRow,bodyRows,numOfCols) =
@@ -865,7 +867,7 @@ csvTableDirective top fields rawcsv = do
Just "auto" -> replicate numOfCols 0
Just specs -> normWidths
$ map (fromMaybe (0 :: Double) . safeRead)
- $ splitBy (`elem` (" ," :: String)) specs
+ $ splitTextBy (`elem` (" ," :: String)) specs
_ -> replicate numOfCols 0
return $ B.table title
(zip (replicate numOfCols AlignDefault) widths)
@@ -876,10 +878,10 @@ csvTableDirective top fields rawcsv = do
-- - Only supports :format: fields with a single format for :raw: roles,
-- change Text.Pandoc.Definition.Format to fix
addNewRole :: PandocMonad m
- => String -> [(String, String)] -> RSTParser m Blocks
-addNewRole roleString fields = do
+ => Text -> [(Text, Text)] -> RSTParser m Blocks
+addNewRole roleText fields = do
pos <- getPosition
- (role, parentRole) <- parseFromString' inheritedRole roleString
+ (role, parentRole) <- parseFromString' inheritedRole roleText
customRoles <- stateRstCustomRoles <$> getState
let getBaseRole (r, f, a) roles =
case M.lookup r roles of
@@ -888,7 +890,7 @@ addNewRole roleString fields = do
(baseRole, baseFmt, baseAttr) =
getBaseRole (parentRole, Nothing, nullAttr) customRoles
fmt = if parentRole == "raw" then lookup "format" fields else baseFmt
- annotate :: [String] -> [String]
+ annotate :: [Text] -> [Text]
annotate = maybe id (:) $
if baseRole == "code"
then lookup "language" fields
@@ -904,7 +906,7 @@ addNewRole roleString fields = do
pos
"format" -> when (baseRole /= "raw") $ logMessage $
SkippedContent ":format: [because parent of role is not :raw:]" pos
- _ -> logMessage $ SkippedContent (":" ++ key ++ ":") pos
+ _ -> logMessage $ SkippedContent (":" <> key <> ":") pos
when (parentRole == "raw" && countKeys "format" > 1) $
logMessage $ SkippedContent
":format: [after first in definition of role]"
@@ -930,30 +932,29 @@ addNewRole roleString fields = do
-- hexadecimal numbers, prefixed by 0x, x, \x, U+, u, or \u
-- or as XML-style hexadecimal character entities, e.g. &#x1a2b;
-- or text, which is used as-is. Comments start with ..
-unicodeTransform :: String -> String
-unicodeTransform t =
- case t of
- ('.':'.':xs) -> unicodeTransform $ dropWhile (/='\n') xs -- comment
- ('0':'x':xs) -> go "0x" xs
- ('x':xs) -> go "x" xs
- ('\\':'x':xs) -> go "\\x" xs
- ('U':'+':xs) -> go "U+" xs
- ('u':xs) -> go "u" xs
- ('\\':'u':xs) -> go "\\u" xs
- ('&':'#':'x':xs) -> maybe ("&#x" ++ unicodeTransform xs)
- -- drop semicolon
- (\(c,s) -> c : unicodeTransform (drop 1 s))
- $ extractUnicodeChar xs
- (x:xs) -> x : unicodeTransform xs
- [] -> []
- where go pref zs = maybe (pref ++ unicodeTransform zs)
- (\(c,s) -> c : unicodeTransform s)
- $ extractUnicodeChar zs
-
-extractUnicodeChar :: String -> Maybe (Char, String)
+unicodeTransform :: Text -> Text
+unicodeTransform t
+ | Just xs <- T.stripPrefix ".." t = unicodeTransform $ T.dropWhile (/= '\n') xs -- comment
+ | Just xs <- T.stripPrefix "0x" t = go "0x" xs
+ | Just xs <- T.stripPrefix "x" t = go "x" xs
+ | Just xs <- T.stripPrefix "\\x" t = go "\\x" xs
+ | Just xs <- T.stripPrefix "U+" t = go "U+" xs
+ | Just xs <- T.stripPrefix "u" t = go "u" xs
+ | Just xs <- T.stripPrefix "\\u" t = go "\\u" xs
+ | Just xs <- T.stripPrefix "&#x" t = maybe ("&#x" <> unicodeTransform xs)
+ -- drop semicolon
+ (\(c,s) -> T.cons c $ unicodeTransform $ T.drop 1 s)
+ $ extractUnicodeChar xs
+ | Just (x, xs) <- T.uncons t = T.cons x $ unicodeTransform xs
+ | otherwise = ""
+ where go pref zs = maybe (pref <> unicodeTransform zs)
+ (\(c,s) -> T.cons c $ unicodeTransform s)
+ $ extractUnicodeChar zs
+
+extractUnicodeChar :: Text -> Maybe (Char, Text)
extractUnicodeChar s = maybe Nothing (\c -> Just (c,rest)) mbc
- where (ds,rest) = span isHexDigit s
- mbc = safeRead ('\'':'\\':'x':ds ++ "'")
+ where (ds,rest) = T.span isHexDigit s
+ mbc = safeRead ("'\\x" <> ds <> "'")
extractCaption :: PandocMonad m => RSTParser m (Inlines, Blocks)
extractCaption = do
@@ -963,16 +964,16 @@ extractCaption = do
-- divide string by blanklines, and surround with
-- \begin{aligned}...\end{aligned} if needed.
-toChunks :: String -> [String]
-toChunks = dropWhile null
- . map (addAligned . trim . unlines)
- . splitBy (all (`elem` (" \t" :: String))) . lines
+toChunks :: Text -> [Text]
+toChunks = dropWhile T.null
+ . map (addAligned . trim . T.unlines)
+ . splitBy (T.all (`elem` (" \t" :: String))) . T.lines
-- we put this in an aligned environment if it contains \\, see #4254
- where addAligned s = if "\\\\" `isInfixOf` s
- then "\\begin{aligned}\n" ++ s ++ "\n\\end{aligned}"
+ where addAligned s = if "\\\\" `T.isInfixOf` s
+ then "\\begin{aligned}\n" <> s <> "\n\\end{aligned}"
else s
-codeblock :: String -> [String] -> Maybe String -> String -> String -> Bool
+codeblock :: Text -> [Text] -> Maybe Text -> Text -> Text -> Bool
-> RSTParser m Blocks
codeblock ident classes numberLines lang body rmTrailingNewlines =
return $ B.codeBlockWith attribs $ stripTrailingNewlines' body
@@ -984,7 +985,7 @@ codeblock ident classes numberLines lang body rmTrailingNewlines =
: maybe [] (const ["numberLines"]) numberLines
++ classes
kvs = maybe [] (\n -> case trimr n of
- [] -> []
+ "" -> []
xs -> [("startFrom", xs)])
numberLines
@@ -992,25 +993,25 @@ codeblock ident classes numberLines lang body rmTrailingNewlines =
--- note block
---
-noteBlock :: Monad m => RSTParser m [Char]
+noteBlock :: Monad m => RSTParser m Text
noteBlock = try $ do
(ref, raw, replacement) <- noteBlock' noteMarker
updateState $ \s -> s { stateNotes = (ref, raw) : stateNotes s }
-- return blanks so line count isn't affected
return replacement
-citationBlock :: Monad m => RSTParser m [Char]
+citationBlock :: Monad m => RSTParser m Text
citationBlock = try $ do
(ref, raw, replacement) <- noteBlock' citationMarker
updateState $ \s ->
s { stateCitations = M.insert ref raw (stateCitations s),
- stateKeys = M.insert (toKey ref) (('#':ref,""), ("",["citation"],[]))
+ stateKeys = M.insert (toKey ref) (("#" <> ref,""), ("",["citation"],[]))
(stateKeys s) }
-- return blanks so line count isn't affected
return replacement
noteBlock' :: Monad m
- => RSTParser m String -> RSTParser m (String, String, String)
+ => RSTParser m Text -> RSTParser m (Text, Text, Text)
noteBlock' marker = try $ do
startPos <- getPosition
string ".."
@@ -1021,24 +1022,24 @@ noteBlock' marker = try $ do
blanks <- option "" blanklines
rest <- option "" indentedBlock
endPos <- getPosition
- let raw = first ++ "\n" ++ blanks ++ rest ++ "\n"
- let replacement =replicate (sourceLine endPos - sourceLine startPos) '\n'
+ let raw = first <> "\n" <> blanks <> rest <> "\n"
+ let replacement = T.replicate (sourceLine endPos - sourceLine startPos) "\n"
return (ref, raw, replacement)
-citationMarker :: Monad m => RSTParser m [Char]
+citationMarker :: Monad m => RSTParser m Text
citationMarker = do
char '['
res <- simpleReferenceName
char ']'
return res
-noteMarker :: Monad m => RSTParser m [Char]
+noteMarker :: Monad m => RSTParser m Text
noteMarker = do
char '['
- res <- many1 digit
+ res <- many1Char digit
<|>
- try (char '#' >> liftM ('#':) simpleReferenceName)
- <|> count 1 (oneOf "#*")
+ try (char '#' >> liftM ("#" <>) simpleReferenceName)
+ <|> countChar 1 (oneOf "#*")
char ']'
return res
@@ -1046,47 +1047,48 @@ noteMarker = do
-- reference key
--
-quotedReferenceName :: PandocMonad m => RSTParser m String
+quotedReferenceName :: PandocMonad m => RSTParser m Text
quotedReferenceName = try $ do
char '`' >> notFollowedBy (char '`') -- `` means inline code!
- manyTill anyChar (char '`')
+ manyTillChar anyChar (char '`')
-- Simple reference names are single words consisting of alphanumerics
-- plus isolated (no two adjacent) internal hyphens, underscores,
-- periods, colons and plus signs; no whitespace or other characters
-- are allowed.
-simpleReferenceName :: Monad m => ParserT [Char] st m String
+simpleReferenceName :: Monad m => ParserT Text st m Text
simpleReferenceName = do
x <- alphaNum
xs <- many $ alphaNum
<|> try (oneOf "-_:+." <* lookAhead alphaNum)
- return (x:xs)
+ return $ T.pack (x:xs)
-referenceName :: PandocMonad m => RSTParser m String
+referenceName :: PandocMonad m => RSTParser m Text
referenceName = quotedReferenceName <|> simpleReferenceName
-referenceKey :: PandocMonad m => RSTParser m [Char]
+referenceKey :: PandocMonad m => RSTParser m Text
referenceKey = do
startPos <- getPosition
choice [substKey, anonymousKey, regularKey]
optional blanklines
endPos <- getPosition
-- return enough blanks to replace key
- return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+ return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
-targetURI :: Monad m => ParserT [Char] st m [Char]
+targetURI :: Monad m => ParserT Text st m Text
targetURI = do
skipSpaces
optional $ try $ newline >> notFollowedBy blankline
contents <- trim <$>
- many1 (satisfy (/='\n')
+ many1Char (satisfy (/='\n')
<|> try (newline >> many1 spaceChar >> noneOf " \t\n"))
blanklines
- case reverse contents of
- -- strip backticks
- '_':'`':xs -> return (dropWhile (=='`') (reverse xs) ++ "_")
- '_':_ -> return contents
- _ -> return (escapeURI contents)
+ return $ stripBackticks contents
+ where
+ stripBackticks t
+ | Just xs <- T.stripSuffix "`_" t = T.dropWhile (=='`') xs <> "_"
+ | Just _ <- T.stripSuffix "_" t = t
+ | otherwise = escapeURI t
substKey :: PandocMonad m => RSTParser m ()
substKey = try $ do
@@ -1112,21 +1114,21 @@ anonymousKey = try $ do
oneOfStrings [".. __:", "__"]
src <- targetURI
pos <- getPosition
- let key = toKey $ "_" ++ printf "%09d" (sourceLine pos)
+ let key = toKey $ "_" <> T.pack (printf "%09d" (sourceLine pos))
updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
stateKeys s }
-referenceNames :: PandocMonad m => RSTParser m [String]
+referenceNames :: PandocMonad m => RSTParser m [Text]
referenceNames = do
let rn = try $ do
string ".. _"
ref <- quotedReferenceName
- <|> many ( noneOf ":\n"
- <|> try (char '\n' <*
- string " " <*
- notFollowedBy blankline)
- <|> try (char ':' <* lookAhead alphaNum)
- )
+ <|> manyChar ( noneOf ":\n"
+ <|> try (char '\n' <*
+ string " " <*
+ notFollowedBy blankline)
+ <|> try (char ':' <* lookAhead alphaNum)
+ )
char ':'
return ref
first <- rn
@@ -1140,18 +1142,18 @@ regularKey = try $ do
-- .. _goodbye: url.com
refs <- referenceNames
src <- targetURI
- guard $ not (null src)
+ guard $ not (T.null src)
let keys = map toKey refs
forM_ keys $ \key ->
updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $
stateKeys s }
-anchorDef :: PandocMonad m => RSTParser m [Char]
+anchorDef :: PandocMonad m => RSTParser m Text
anchorDef = try $ do
(refs, raw) <- withRaw $ try (referenceNames <* blanklines)
forM_ refs $ \rawkey ->
updateState $ \s -> s { stateKeys =
- M.insert (toKey rawkey) (('#':rawkey,""), nullAttr) $ stateKeys s }
+ M.insert (toKey rawkey) (("#" <> rawkey,""), nullAttr) $ stateKeys s }
-- keep this for 2nd round of parsing, where we'll add the divs (anchor)
return raw
@@ -1174,12 +1176,12 @@ anchor = try $ do
-- because it hides them from promoteHeader, see #4240
_ -> return $ foldr addDiv b refs
-headerBlock :: PandocMonad m => RSTParser m [Char]
+headerBlock :: PandocMonad m => RSTParser m Text
headerBlock = do
((txt, _), raw) <- withRaw (doubleHeader' <|> singleHeader')
(ident,_,_) <- registerHeader nullAttr txt
let key = toKey (stringify txt)
- updateState $ \s -> s { stateKeys = M.insert key (('#':ident,""), nullAttr)
+ updateState $ \s -> s { stateKeys = M.insert key (("#" <> ident,""), nullAttr)
$ stateKeys s }
return raw
@@ -1201,13 +1203,13 @@ headerBlock = do
-- Grid tables TODO:
-- - column spans
-dashedLine :: Monad m => Char -> ParserT [Char] st m (Int, Int)
+dashedLine :: Monad m => Char -> ParserT Text st m (Int, Int)
dashedLine ch = do
dashes <- many1 (char ch)
sp <- many (char ' ')
return (length dashes, length $ dashes ++ sp)
-simpleDashedLines :: Monad m => Char -> ParserT [Char] st m [(Int,Int)]
+simpleDashedLines :: Monad m => Char -> ParserT Text st m [(Int,Int)]
simpleDashedLines ch = try $ many1 (dashedLine ch)
-- Parse a table row separator
@@ -1215,17 +1217,17 @@ simpleTableSep :: Monad m => Char -> RSTParser m Char
simpleTableSep ch = try $ simpleDashedLines ch >> newline
-- Parse a table footer
-simpleTableFooter :: Monad m => RSTParser m [Char]
+simpleTableFooter :: Monad m => RSTParser m Text
simpleTableFooter = try $ simpleTableSep '=' >> blanklines
-- Parse a raw line and split it into chunks by indices.
-simpleTableRawLine :: Monad m => [Int] -> RSTParser m [String]
+simpleTableRawLine :: Monad m => [Int] -> RSTParser m [Text]
simpleTableRawLine indices = simpleTableSplitLine indices <$> anyLine
-simpleTableRawLineWithEmptyCell :: Monad m => [Int] -> RSTParser m [String]
+simpleTableRawLineWithEmptyCell :: Monad m => [Int] -> RSTParser m [Text]
simpleTableRawLineWithEmptyCell indices = try $ do
cs <- simpleTableRawLine indices
- let isEmptyCell = all (\c -> c == ' ' || c == '\t')
+ let isEmptyCell = T.all (\c -> c == ' ' || c == '\t')
guard $ any isEmptyCell cs
return cs
@@ -1235,15 +1237,15 @@ simpleTableRow indices = do
notFollowedBy' simpleTableFooter
firstLine <- simpleTableRawLine indices
conLines <- many $ simpleTableRawLineWithEmptyCell indices
- let cols = map unlines . transpose $ firstLine : conLines ++
- [replicate (length indices) ""
- | not (null conLines)]
+ let cols = map T.unlines . transpose $ firstLine : conLines ++
+ [replicate (length indices) ""
+ | not (null conLines)]
mapM (parseFromString' parseBlocks) cols
-simpleTableSplitLine :: [Int] -> String -> [String]
+simpleTableSplitLine :: [Int] -> Text -> [Text]
simpleTableSplitLine indices line =
map trimr
- $ tail $ splitByIndices (init indices) line
+ $ tail $ splitTextByIndices (init indices) line
simpleTableHeader :: PandocMonad m
=> Bool -- ^ Headerless table
@@ -1322,35 +1324,35 @@ inlineContent = choice [ whitespace
, escapedChar
, symbol ] <?> "inline content"
-parseInlineFromString :: PandocMonad m => String -> RSTParser m Inlines
-parseInlineFromString = parseFromString' (trimInlines . mconcat <$> many inline)
+parseInlineFromText :: PandocMonad m => Text -> RSTParser m Inlines
+parseInlineFromText = parseFromString' (trimInlines . mconcat <$> many inline)
hyphens :: Monad m => RSTParser m Inlines
hyphens = do
- result <- many1 (char '-')
+ result <- many1Char (char '-')
optional endline
-- don't want to treat endline after hyphen or dash as a space
return $ B.str result
-escapedChar :: Monad m => ParserT [Char] st m Inlines
+escapedChar :: Monad m => ParserT Text st m Inlines
escapedChar = do c <- escaped anyChar
return $ if c == ' ' || c == '\n' || c == '\r'
-- '\ ' is null in RST
then mempty
- else B.str [c]
+ else B.str $ T.singleton c
symbol :: Monad m => RSTParser m Inlines
symbol = do
result <- oneOf specialChars
- return $ B.str [result]
+ return $ B.str $ T.singleton result
-- parses inline code, between codeStart and codeEnd
code :: Monad m => RSTParser m Inlines
code = try $ do
string "``"
- result <- manyTill anyChar (try (string "``"))
+ result <- manyTillChar anyChar (try (string "``"))
return $ B.code
- $ trim $ unwords $ lines result
+ $ trim $ T.unwords $ T.lines result
-- succeeds only if we're not right after a str (ie. in middle of word)
atStart :: Monad m => RSTParser m a -> RSTParser m a
@@ -1382,7 +1384,7 @@ interpretedRole = try $ do
renderRole contents Nothing role nullAttr
renderRole :: PandocMonad m
- => String -> Maybe String -> String -> Attr -> RSTParser m Inlines
+ => Text -> Maybe Text -> Text -> Attr -> RSTParser m Inlines
renderRole contents fmt role attr = case role of
"sup" -> return $ B.superscript $ treatAsText contents
"superscript" -> return $ B.superscript $ treatAsText contents
@@ -1412,36 +1414,36 @@ renderRole contents fmt role attr = case role of
contents
where
titleRef ref = return $ B.spanWith ("",["title-ref"],[]) $ treatAsText ref
- rfcLink rfcNo = B.link rfcUrl ("RFC " ++ rfcNo) $ B.str ("RFC " ++ rfcNo)
- where rfcUrl = "http://www.faqs.org/rfcs/rfc" ++ rfcNo ++ ".html"
- pepLink pepNo = B.link pepUrl ("PEP " ++ pepNo) $ B.str ("PEP " ++ pepNo)
- where padNo = replicate (4 - length pepNo) '0' ++ pepNo
- pepUrl = "http://www.python.org/dev/peps/pep-" ++ padNo ++ "/"
+ rfcLink rfcNo = B.link rfcUrl ("RFC " <> rfcNo) $ B.str ("RFC " <> rfcNo)
+ where rfcUrl = "http://www.faqs.org/rfcs/rfc" <> rfcNo <> ".html"
+ pepLink pepNo = B.link pepUrl ("PEP " <> pepNo) $ B.str ("PEP " <> pepNo)
+ where padNo = T.replicate (4 - T.length pepNo) "0" <> pepNo
+ pepUrl = "http://www.python.org/dev/peps/pep-" <> padNo <> "/"
treatAsText = B.text . handleEscapes
- handleEscapes [] = []
- handleEscapes ('\\':' ':cs) = handleEscapes cs
- handleEscapes ('\\':c:cs) = c : handleEscapes cs
- handleEscapes (c:cs) = c : handleEscapes cs
+ handleEscapes = T.concat . removeSpace . T.splitOn "\\"
+ where headSpace t = fromMaybe t $ T.stripPrefix " " t
+ removeSpace (x:xs) = x : map headSpace xs
+ removeSpace [] = []
-roleName :: PandocMonad m => RSTParser m String
-roleName = many1 (letter <|> char '-')
+roleName :: PandocMonad m => RSTParser m Text
+roleName = many1Char (letter <|> char '-')
-roleMarker :: PandocMonad m => RSTParser m String
+roleMarker :: PandocMonad m => RSTParser m Text
roleMarker = char ':' *> roleName <* char ':'
-roleBefore :: PandocMonad m => RSTParser m (String,String)
+roleBefore :: PandocMonad m => RSTParser m (Text,Text)
roleBefore = try $ do
role <- roleMarker
contents <- unmarkedInterpretedText
return (role,contents)
-roleAfter :: PandocMonad m => RSTParser m (String,String)
+roleAfter :: PandocMonad m => RSTParser m (Text,Text)
roleAfter = try $ do
contents <- unmarkedInterpretedText
role <- roleMarker <|> (stateRstDefaultRole <$> getState)
return (role,contents)
-unmarkedInterpretedText :: PandocMonad m => RSTParser m [Char]
+unmarkedInterpretedText :: PandocMonad m => RSTParser m Text
unmarkedInterpretedText = try $ do
atStart (char '`')
contents <- mconcat <$> (many1
@@ -1453,7 +1455,7 @@ unmarkedInterpretedText = try $ do
lookAhead (satisfy isAlphaNum))
))
char '`'
- return contents
+ return $ T.pack contents
whitespace :: PandocMonad m => RSTParser m Inlines
whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace"
@@ -1461,7 +1463,7 @@ whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace"
str :: Monad m => RSTParser m Inlines
str = do
let strChar = noneOf ("\t\n " ++ specialChars)
- result <- many1 strChar
+ result <- many1Char strChar
updateLastStrPos
return $ B.str result
@@ -1489,7 +1491,7 @@ explicitLink = try $ do
notFollowedBy (char '`') -- `` marks start of inline code
label' <- trimInlines . mconcat <$>
manyTill (notFollowedBy (char '`') >> inlineContent) (char '<')
- src <- trim <$> manyTill (noneOf ">\n") (char '>')
+ src <- trim <$> manyTillChar (noneOf ">\n") (char '>')
skipSpaces
string "`_"
optional $ char '_' -- anonymous form
@@ -1501,22 +1503,22 @@ explicitLink = try $ do
if isURI src
then return ((src, ""), nullAttr)
else
- case reverse src of
- '_':xs -> lookupKey [] (toKey (reverse xs))
- _ -> return ((src, ""), nullAttr)
+ case T.unsnoc src of
+ Just (xs, '_') -> lookupKey [] (toKey xs)
+ _ -> return ((src, ""), nullAttr)
return $ B.linkWith attr (escapeURI src') tit label''
-citationName :: PandocMonad m => RSTParser m String
+citationName :: PandocMonad m => RSTParser m Text
citationName = do
raw <- citationMarker
- return $ "[" ++ raw ++ "]"
+ return $ "[" <> raw <> "]"
referenceLink :: PandocMonad m => RSTParser m Inlines
referenceLink = try $ do
ref <- (referenceName <|> citationName) <* char '_'
let label' = B.text ref
- let isAnonKey (Key ('_':_)) = True
- isAnonKey _ = False
+ let isAnonKey (Key (T.uncons -> Just ('_',_))) = True
+ isAnonKey _ = False
state <- getState
let keyTable = stateKeys state
key <- option (toKey ref) $
@@ -1533,7 +1535,7 @@ referenceLink = try $ do
-- We keep a list of oldkeys so we can detect lookup loops.
lookupKey :: PandocMonad m
- => [Key] -> Key -> RSTParser m ((String, String), Attr)
+ => [Key] -> Key -> RSTParser m ((Text, Text), Attr)
lookupKey oldkeys key = do
pos <- getPosition
state <- getState
@@ -1544,8 +1546,8 @@ lookupKey oldkeys key = do
logMessage $ ReferenceNotFound key' pos
return (("",""),nullAttr)
-- check for keys of the form link_, which need to be resolved:
- Just ((u@(c:_),""),_) | last u == '_', c /= '#' -> do
- let rawkey = init u
+ Just ((u, ""),_) | T.length u > 1, T.last u == '_', T.head u /= '#' -> do
+ let rawkey = T.init u
let newkey = toKey rawkey
if newkey `elem` oldkeys
then do
@@ -1576,7 +1578,7 @@ subst = try $ do
case M.lookup key substTable of
Nothing -> do
pos <- getPosition
- logMessage $ ReferenceNotFound (show key) pos
+ logMessage $ ReferenceNotFound (tshow key) pos
return mempty
Just target -> return target
diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs
index 6519587c6..73122cc14 100644
--- a/src/Text/Pandoc/Readers/Roff.hs
+++ b/src/Text/Pandoc/Readers/Roff.hs
@@ -1,5 +1,7 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.Roff
Copyright : Copyright (C) 2018-2019 Yan Pashkovsky and John MacFarlane
@@ -21,7 +23,7 @@ module Text.Pandoc.Readers.Roff
, TableRow
, RoffToken(..)
, RoffTokens(..)
- , linePartsToString
+ , linePartsToText
, lexRoff
)
where
@@ -40,7 +42,7 @@ import qualified Data.Text as T
import Text.Pandoc.Logging (LogMessage(..))
import Text.Pandoc.Options
import Text.Pandoc.Parsing
-import Text.Pandoc.Shared (safeRead, substitute)
+import Text.Pandoc.Shared (safeRead)
import Text.Parsec hiding (tokenPrim)
import Text.Pandoc.RoffChar (characterCodes, combiningAccents)
import qualified Data.Sequence as Seq
@@ -60,28 +62,28 @@ data FontSpec = FontSpec{ fontBold :: Bool
defaultFontSpec :: FontSpec
defaultFontSpec = FontSpec False False False
-data LinePart = RoffStr String
+data LinePart = RoffStr T.Text
| Font FontSpec
| MacroArg Int
deriving Show
type Arg = [LinePart]
-type TableOption = (String, String)
+type TableOption = (T.Text, T.Text)
data CellFormat =
CellFormat
{ columnType :: Char
, pipePrefix :: Bool
, pipeSuffix :: Bool
- , columnSuffixes :: [String]
+ , columnSuffixes :: [T.Text]
} deriving (Show, Eq, Ord)
type TableRow = ([CellFormat], [RoffTokens])
data RoffToken = TextLine [LinePart]
| EmptyLine
- | ControlLine String [Arg] SourcePos
+ | ControlLine T.Text [Arg] SourcePos
| Tbl [TableOption] [TableRow] SourcePos
deriving Show
@@ -95,7 +97,7 @@ data RoffMode = NormalMode
| CopyMode
deriving Show
-data RoffState = RoffState { customMacros :: M.Map String RoffTokens
+data RoffState = RoffState { customMacros :: M.Map T.Text RoffTokens
, prevFont :: FontSpec
, currentFont :: FontSpec
, tableTabChar :: Char
@@ -121,10 +123,10 @@ instance Default RoffState where
, afterConditional = False
}
-type RoffLexer m = ParserT [Char] RoffState m
+type RoffLexer m = ParserT T.Text RoffState m
--
--- Lexer: String -> RoffToken
+-- Lexer: T.Text -> RoffToken
--
eofline :: Stream s m Char => ParsecT s u m ()
@@ -133,11 +135,11 @@ eofline = void newline <|> eof <|> () <$ lookAhead (string "\\}")
spacetab :: Stream s m Char => ParsecT s u m Char
spacetab = char ' ' <|> char '\t'
-characterCodeMap :: M.Map String Char
+characterCodeMap :: M.Map T.Text Char
characterCodeMap =
M.fromList $ map (\(x,y) -> (y,x)) characterCodes
-combiningAccentsMap :: M.Map String Char
+combiningAccentsMap :: M.Map T.Text Char
combiningAccentsMap =
M.fromList $ map (\(x,y) -> (y,x)) combiningAccents
@@ -151,43 +153,40 @@ escapeGlyph = do
c <- lookAhead (oneOf ['[','('])
escapeArg >>= resolveGlyph c
-resolveGlyph :: PandocMonad m => Char -> String -> RoffLexer m [LinePart]
+resolveGlyph :: PandocMonad m => Char -> T.Text -> RoffLexer m [LinePart]
resolveGlyph delimChar glyph = do
- let cs = substitute "_u" " u" glyph -- unicode glyphs separated by _
- (case words cs of
+ let cs = T.replace "_u" " u" glyph -- unicode glyphs separated by _
+ (case T.words cs of
[] -> mzero
[s] -> case M.lookup s characterCodeMap `mplus` readUnicodeChar s of
Nothing -> mzero
- Just c -> return [RoffStr [c]]
+ Just c -> return [RoffStr $ T.singleton c]
(s:ss) -> do
basechar <- case M.lookup s characterCodeMap `mplus`
readUnicodeChar s of
Nothing ->
- case s of
+ case T.unpack s of
[ch] | isAscii ch && isAlphaNum ch ->
return ch
_ -> mzero
Just c -> return c
- let addAccents [] xs = return $ T.unpack .
- Normalize.normalize Normalize.NFC .
- T.pack $ reverse xs
+ let addAccents [] xs = return $ Normalize.normalize Normalize.NFC $
+ T.reverse xs
addAccents (a:as) xs =
case M.lookup a combiningAccentsMap `mplus` readUnicodeChar a of
- Just x -> addAccents as (x:xs)
+ Just x -> addAccents as $ T.cons x xs
Nothing -> mzero
- addAccents ss [basechar] >>= \xs -> return [RoffStr xs])
+ addAccents ss (T.singleton basechar) >>= \xs -> return [RoffStr xs])
<|> case delimChar of
- '[' -> escUnknown ("\\[" ++ glyph ++ "]")
- '(' -> escUnknown ("\\(" ++ glyph)
- '\'' -> escUnknown ("\\C'" ++ glyph ++ "'")
+ '[' -> escUnknown ("\\[" <> glyph <> "]")
+ '(' -> escUnknown ("\\(" <> glyph)
+ '\'' -> escUnknown ("\\C'" <> glyph <> "'")
_ -> Prelude.fail "resolveGlyph: unknown glyph delimiter"
-readUnicodeChar :: String -> Maybe Char
-readUnicodeChar ('u':cs@(_:_:_:_:_)) =
- case safeRead ('0':'x':cs) of
- Just i -> Just (chr i)
- Nothing -> Nothing
-readUnicodeChar _ = Nothing
+readUnicodeChar :: T.Text -> Maybe Char
+readUnicodeChar t = case T.uncons t of
+ Just ('u', cs) | T.length cs > 3 -> chr <$> safeRead ("0x" <> cs)
+ _ -> Nothing
escapeNormal :: PandocMonad m => RoffLexer m [LinePart]
escapeNormal = do
@@ -218,14 +217,14 @@ escapeNormal = do
NormalMode -> return [RoffStr "\\"]
'H' -> escIgnore 'H' [quoteArg]
'L' -> escIgnore 'L' [quoteArg]
- 'M' -> escIgnore 'M' [escapeArg, count 1 (satisfy (/='\n'))]
+ 'M' -> escIgnore 'M' [escapeArg, countChar 1 (satisfy (/='\n'))]
'N' -> escIgnore 'N' [quoteArg]
- 'O' -> escIgnore 'O' [count 1 (oneOf ['0','1'])]
+ 'O' -> escIgnore 'O' [countChar 1 (oneOf ['0','1'])]
'R' -> escIgnore 'R' [quoteArg]
'S' -> escIgnore 'S' [quoteArg]
- 'V' -> escIgnore 'V' [escapeArg, count 1 alphaNum]
+ 'V' -> escIgnore 'V' [escapeArg, countChar 1 alphaNum]
'X' -> escIgnore 'X' [quoteArg]
- 'Y' -> escIgnore 'Y' [escapeArg, count 1 (satisfy (/='\n'))]
+ 'Y' -> escIgnore 'Y' [escapeArg, countChar 1 (satisfy (/='\n'))]
'Z' -> escIgnore 'Z' [quoteArg]
'\'' -> return [RoffStr "`"]
'\n' -> return mempty -- line continuation
@@ -238,12 +237,12 @@ escapeNormal = do
'd' -> escIgnore 'd' [] -- forward down 1/2em
'e' -> return [RoffStr "\\"]
'f' -> escFont
- 'g' -> escIgnore 'g' [escapeArg, count 1 (satisfy (/='\n'))]
+ 'g' -> escIgnore 'g' [escapeArg, countChar 1 (satisfy (/='\n'))]
'h' -> escIgnore 'h' [quoteArg]
- 'k' -> escIgnore 'k' [escapeArg, count 1 (satisfy (/='\n'))]
+ 'k' -> escIgnore 'k' [escapeArg, countChar 1 (satisfy (/='\n'))]
'l' -> escIgnore 'l' [quoteArg]
- 'm' -> escIgnore 'm' [escapeArg, count 1 (satisfy (/='\n'))]
- 'n' -> escIgnore 'm' [escapeArg, count 1 (satisfy (/='\n'))]
+ 'm' -> escIgnore 'm' [escapeArg, countChar 1 (satisfy (/='\n'))]
+ 'n' -> escIgnore 'm' [escapeArg, countChar 1 (satisfy (/='\n'))]
'o' -> escIgnore 'o' [quoteArg]
'p' -> escIgnore 'p' []
'r' -> escIgnore 'r' []
@@ -253,7 +252,7 @@ escapeNormal = do
'v' -> escIgnore 'v' [quoteArg]
'w' -> escIgnore 'w' [quoteArg]
'x' -> escIgnore 'x' [quoteArg]
- 'z' -> escIgnore 'z' [count 1 anyChar]
+ 'z' -> escIgnore 'z' [countChar 1 anyChar]
'|' -> return [RoffStr "\x2006"] --1/6 em space
'~' -> return [RoffStr "\160"] -- nonbreaking space
'\\' -> do
@@ -262,40 +261,40 @@ escapeNormal = do
CopyMode -> char '\\'
NormalMode -> return '\\'
return [RoffStr "\\"]
- _ -> return [RoffStr [c]]
+ _ -> return [RoffStr $ T.singleton c]
-- man 7 groff: "If a backslash is followed by a character that
-- does not constitute a defined escape sequence, the backslash
-- is silently ignored and the character maps to itself."
escIgnore :: PandocMonad m
=> Char
- -> [RoffLexer m String]
+ -> [RoffLexer m T.Text]
-> RoffLexer m [LinePart]
escIgnore c argparsers = do
pos <- getPosition
arg <- snd <$> withRaw (choice argparsers) <|> return ""
- report $ SkippedContent ('\\':c:arg) pos
+ report $ SkippedContent ("\\" <> T.cons c arg) pos
return mempty
-escUnknown :: PandocMonad m => String -> RoffLexer m [LinePart]
+escUnknown :: PandocMonad m => T.Text -> RoffLexer m [LinePart]
escUnknown s = do
pos <- getPosition
report $ SkippedContent s pos
return [RoffStr "\xFFFD"]
-signedNumber :: PandocMonad m => RoffLexer m String
+signedNumber :: PandocMonad m => RoffLexer m T.Text
signedNumber = try $ do
sign <- option "" ("-" <$ char '-' <|> "" <$ char '+')
- ds <- many1 digit
- return (sign ++ ds)
+ ds <- many1Char digit
+ return (sign <> ds)
-- Parses: [..] or (..
-escapeArg :: PandocMonad m => RoffLexer m String
+escapeArg :: PandocMonad m => RoffLexer m T.Text
escapeArg = choice
[ char '[' *> optional expandString *>
- manyTill (noneOf ['\n',']']) (char ']')
+ manyTillChar (noneOf ['\n',']']) (char ']')
, char '(' *> optional expandString *>
- count 2 (satisfy (/='\n'))
+ countChar 2 (satisfy (/='\n'))
]
expandString :: PandocMonad m => RoffLexer m ()
@@ -303,21 +302,21 @@ expandString = try $ do
pos <- getPosition
char '\\'
char '*'
- cs <- escapeArg <|> count 1 anyChar
- s <- linePartsToString <$> resolveString cs pos
- getInput >>= setInput . (s ++)
+ cs <- escapeArg <|> countChar 1 anyChar
+ s <- linePartsToText <$> resolveText cs pos
+ getInput >>= setInput . (s <>)
return ()
-- Parses: '..'
-quoteArg :: PandocMonad m => RoffLexer m String
-quoteArg = char '\'' *> manyTill (noneOf ['\n','\'']) (char '\'')
+quoteArg :: PandocMonad m => RoffLexer m T.Text
+quoteArg = char '\'' *> manyTillChar (noneOf ['\n','\'']) (char '\'')
escFont :: PandocMonad m => RoffLexer m [LinePart]
escFont = do
- font <- escapeArg <|> count 1 alphaNum
- font' <- if null font || font == "P"
+ font <- escapeArg <|> countChar 1 alphaNum
+ font' <- if T.null font || font == "P"
then prevFont <$> getState
- else return $ foldr processFontLetter defaultFontSpec font
+ else return $ foldr processFontLetter defaultFontSpec $ T.unpack font
modifyState $ \st -> st{ prevFont = currentFont st
, currentFont = font' }
return [Font font']
@@ -345,7 +344,7 @@ lexMacro = do
guard $ sourceColumn pos == 1 || afterConditional st
char '.' <|> char '\''
skipMany spacetab
- macroName <- many (satisfy isAlphaNum)
+ macroName <- manyChar (satisfy isAlphaNum)
case macroName of
"nop" -> return mempty
"ie" -> lexConditional "ie"
@@ -374,8 +373,8 @@ lexTable pos = do
spaces
opts <- try tableOptions <|> [] <$ optional (char ';')
case lookup "tab" opts of
- Just (c:_) -> modifyState $ \st -> st{ tableTabChar = c }
- _ -> modifyState $ \st -> st{ tableTabChar = '\t' }
+ Just (T.uncons -> Just (c, _)) -> modifyState $ \st -> st{ tableTabChar = c }
+ _ -> modifyState $ \st -> st{ tableTabChar = '\t' }
spaces
skipMany lexComment
spaces
@@ -388,7 +387,7 @@ lexTable pos = do
string ".TE"
skipMany spacetab
eofline
- return $ singleTok $ Tbl opts (rows ++ concat morerows) pos
+ return $ singleTok $ Tbl opts (rows <> concat morerows) pos
lexTableRows :: PandocMonad m => RoffLexer m [TableRow]
lexTableRows = do
@@ -428,11 +427,11 @@ tableOptions = many1 tableOption <* spaces <* char ';'
tableOption :: PandocMonad m => RoffLexer m TableOption
tableOption = do
- k <- many1 letter
+ k <- many1Char letter
v <- option "" $ try $ do
skipMany spacetab
char '('
- manyTill anyChar (char ')')
+ manyTillChar anyChar (char ')')
skipMany spacetab
optional (char ',' >> skipMany spacetab)
return (k,v)
@@ -444,7 +443,7 @@ tableFormatSpec = do
let speclines = first:rest
spaces
char '.'
- return $ speclines ++ repeat (lastDef [] speclines) -- last line is default
+ return $ speclines <> repeat (lastDef [] speclines) -- last line is default
tableFormatSpecLine :: PandocMonad m => RoffLexer m [CellFormat]
tableFormatSpecLine =
@@ -456,19 +455,19 @@ tableColFormat = do
$ True <$ try (string "|" <* notFollowedBy spacetab)
c <- oneOf ['a','A','c','C','l','L','n','N','r','R','s','S','^','_','-',
'=','|']
- suffixes <- many $ try (skipMany spacetab *> count 1 digit) <|>
+ suffixes <- many $ try (skipMany spacetab *> countChar 1 digit) <|>
(do x <- oneOf ['b','B','d','D','e','E','f','F','i','I','m','M',
'p','P','t','T','u','U','v','V','w','W','x','X', 'z','Z']
num <- case toLower x of
'w' -> many1 digit <|>
(do char '('
xs <- manyTill anyChar (char ')')
- return ("(" ++ xs ++ ")")) <|>
+ return ("(" <> xs <> ")")) <|>
return ""
'f' -> count 1 alphaNum <* skipMany spacetab
'm' -> count 1 alphaNum <* skipMany spacetab
_ -> return ""
- return $ x : num)
+ return $ T.pack $ x : num)
pipeSuffix' <- option False $ True <$ string "|"
return $ CellFormat
{ columnType = c
@@ -479,7 +478,7 @@ tableColFormat = do
-- We don't fully handle the conditional. But we do
-- include everything under '.ie n', which occurs commonly
-- in man pages.
-lexConditional :: PandocMonad m => String -> RoffLexer m RoffTokens
+lexConditional :: PandocMonad m => T.Text -> RoffLexer m RoffTokens
lexConditional mname = do
pos <- getPosition
skipMany spacetab
@@ -498,7 +497,7 @@ lexConditional mname = do
case mbtest of
Nothing -> do
putState st -- reset state, so we don't record macros in skipped section
- report $ SkippedContent ('.':mname) pos
+ report $ SkippedContent (T.cons '.' mname) pos
return mempty
Just True -> return ifPart
Just False -> do
@@ -508,7 +507,7 @@ lexConditional mname = do
expression :: PandocMonad m => RoffLexer m (Maybe Bool)
expression = do
raw <- charsInBalanced '(' ')' (satisfy (/= '\n'))
- <|> many1 nonspaceChar
+ <|> many1Char nonspaceChar
returnValue $
case raw of
"1" -> Just True
@@ -533,17 +532,17 @@ lexIncludeFile args = do
pos <- getPosition
case args of
(f:_) -> do
- let fp = linePartsToString f
+ let fp = linePartsToText f
dirs <- getResourcePath
- result <- readFileFromDirs dirs fp
+ result <- readFileFromDirs dirs $ T.unpack fp
case result of
Nothing -> report $ CouldNotLoadIncludeFile fp pos
- Just s -> getInput >>= setInput . (s ++)
+ Just s -> getInput >>= setInput . (s <>)
return mempty
[] -> return mempty
resolveMacro :: PandocMonad m
- => String -> [Arg] -> SourcePos -> RoffLexer m RoffTokens
+ => T.Text -> [Arg] -> SourcePos -> RoffLexer m RoffTokens
resolveMacro macroName args pos = do
macros <- customMacros <$> getState
case M.lookup macroName macros of
@@ -552,7 +551,7 @@ resolveMacro macroName args pos = do
let fillLP (MacroArg i) zs =
case drop (i - 1) args of
[] -> zs
- (ys:_) -> ys ++ zs
+ (ys:_) -> ys <> zs
fillLP z zs = z : zs
let fillMacroArg (TextLine lineparts) =
TextLine (foldr fillLP [] lineparts)
@@ -565,7 +564,7 @@ lexStringDef args = do -- string definition
[] -> Prelude.fail "No argument to .ds"
(x:ys) -> do
let ts = singleTok $ TextLine (intercalate [RoffStr " " ] ys)
- let stringName = linePartsToString x
+ let stringName = linePartsToText x
modifyState $ \st ->
st{ customMacros = M.insert stringName ts (customMacros st) }
return mempty
@@ -575,14 +574,14 @@ lexMacroDef args = do -- macro definition
modifyState $ \st -> st{ roffMode = CopyMode }
(macroName, stopMacro) <-
case args of
- (x : y : _) -> return (linePartsToString x, linePartsToString y)
+ (x : y : _) -> return (linePartsToText x, linePartsToText y)
-- optional second arg
- (x:_) -> return (linePartsToString x, ".")
+ (x:_) -> return (linePartsToText x, ".")
[] -> Prelude.fail "No argument to .de"
let stop = try $ do
char '.' <|> char '\''
skipMany spacetab
- string stopMacro
+ textStr stopMacro
_ <- lexArgs
return ()
ts <- mconcat <$> manyTill manToken stop
@@ -628,7 +627,7 @@ lexArgs = do
char '"'
return [RoffStr "\""]
-checkDefined :: PandocMonad m => String -> RoffLexer m [LinePart]
+checkDefined :: PandocMonad m => T.Text -> RoffLexer m [LinePart]
checkDefined name = do
macros <- customMacros <$> getState
case M.lookup name macros of
@@ -638,19 +637,19 @@ checkDefined name = do
escString :: PandocMonad m => RoffLexer m [LinePart]
escString = try $ do
pos <- getPosition
- (do cs <- escapeArg <|> count 1 anyChar
- resolveString cs pos)
+ (do cs <- escapeArg <|> countChar 1 anyChar
+ resolveText cs pos)
<|> mempty <$ char 'S'
-- strings and macros share namespace
-resolveString :: PandocMonad m
- => String -> SourcePos -> RoffLexer m [LinePart]
-resolveString stringname pos = do
+resolveText :: PandocMonad m
+ => T.Text -> SourcePos -> RoffLexer m [LinePart]
+resolveText stringname pos = do
RoffTokens ts <- resolveMacro stringname [] pos
case Foldable.toList ts of
[TextLine xs] -> return xs
_ -> do
- report $ SkippedContent ("unknown string " ++ stringname) pos
+ report $ SkippedContent ("unknown string " <> stringname) pos
return mempty
lexLine :: PandocMonad m => RoffLexer m RoffTokens
@@ -688,16 +687,16 @@ macroArg = try $ do
pos <- getPosition
backslash
char '$'
- x <- escapeArg <|> count 1 digit
+ x <- escapeArg <|> countChar 1 digit
case safeRead x of
Just i -> return [MacroArg i]
Nothing -> do
- report $ SkippedContent ("illegal macro argument " ++ x) pos
+ report $ SkippedContent ("illegal macro argument " <> x) pos
return []
regularText :: PandocMonad m => RoffLexer m [LinePart]
regularText = do
- s <- many1 $ noneOf "\n\r\t \\\""
+ s <- many1Char $ noneOf "\n\r\t \\\""
return [RoffStr s]
quoteChar :: PandocMonad m => RoffLexer m [LinePart]
@@ -708,7 +707,7 @@ quoteChar = do
spaceTabChar :: PandocMonad m => RoffLexer m [LinePart]
spaceTabChar = do
c <- spacetab
- return [RoffStr [c]]
+ return [RoffStr $ T.singleton c]
lexEmptyLine :: PandocMonad m => RoffLexer m RoffTokens
lexEmptyLine = newline >> return (singleTok EmptyLine)
@@ -716,8 +715,8 @@ lexEmptyLine = newline >> return (singleTok EmptyLine)
manToken :: PandocMonad m => RoffLexer m RoffTokens
manToken = lexComment <|> lexMacro <|> lexLine <|> lexEmptyLine
-linePartsToString :: [LinePart] -> String
-linePartsToString = mconcat . map go
+linePartsToText :: [LinePart] -> T.Text
+linePartsToText = mconcat . map go
where
go (RoffStr s) = s
go _ = mempty
@@ -726,7 +725,7 @@ linePartsToString = mconcat . map go
lexRoff :: PandocMonad m => SourcePos -> T.Text -> m RoffTokens
lexRoff pos txt = do
eithertokens <- readWithM (do setPosition pos
- mconcat <$> many manToken) def (T.unpack txt)
+ mconcat <$> many manToken) def txt
case eithertokens of
Left e -> throwError e
Right tokenz -> return tokenz
diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs
index 9796de4b9..d587bc41b 100644
--- a/src/Text/Pandoc/Readers/TWiki.hs
+++ b/src/Text/Pandoc/Readers/TWiki.hs
@@ -1,5 +1,6 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RelaxedPolyRec #-}
-- RelaxedPolyRec needed for inlinesBetween on GHC < 7
{- |
@@ -31,7 +32,7 @@ import Text.Pandoc.Definition
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed, nested)
import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag)
-import Text.Pandoc.Shared (crFilter)
+import Text.Pandoc.Shared (crFilter, tshow)
import Text.Pandoc.XML (fromEntities)
-- | Read twiki from an input string and return a Pandoc document.
@@ -41,19 +42,19 @@ readTWiki :: PandocMonad m
-> m Pandoc
readTWiki opts s = do
res <- readWithM parseTWiki def{ stateOptions = opts }
- (T.unpack (crFilter s) ++ "\n\n")
+ (crFilter s <> "\n\n")
case res of
Left e -> throwError e
Right d -> return d
-type TWParser = ParserT [Char] ParserState
+type TWParser = ParserT Text ParserState
--
-- utility functions
--
-tryMsg :: String -> TWParser m a -> TWParser m a
-tryMsg msg p = try p <?> msg
+tryMsg :: Text -> TWParser m a -> TWParser m a
+tryMsg msg p = try p <?> T.unpack msg
nested :: PandocMonad m => TWParser m a -> TWParser m a
nested p = do
@@ -64,25 +65,25 @@ nested p = do
updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
return res
-htmlElement :: PandocMonad m => String -> TWParser m (Attr, String)
+htmlElement :: PandocMonad m => Text -> TWParser m (Attr, Text)
htmlElement tag = tryMsg tag $ do
(TagOpen _ attr, _) <- htmlTag (~== TagOpen tag [])
- content <- manyTill anyChar (endtag <|> endofinput)
+ content <- T.pack <$> manyTill anyChar (endtag <|> endofinput)
return (htmlAttrToPandoc attr, trim content)
where
endtag = void $ htmlTag (~== TagClose tag)
endofinput = lookAhead $ try $ skipMany blankline >> skipSpaces >> eof
- trim = dropWhile (=='\n') . reverse . dropWhile (=='\n') . reverse
+ trim = T.dropAround (=='\n')
-htmlAttrToPandoc :: [Attribute String] -> Attr
+htmlAttrToPandoc :: [Attribute Text] -> Attr
htmlAttrToPandoc attrs = (ident, classes, keyvals)
where
ident = fromMaybe "" $ lookup "id" attrs
- classes = maybe [] words $ lookup "class" attrs
+ classes = maybe [] T.words $ lookup "class" attrs
keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
parseHtmlContentWithAttrs :: PandocMonad m
- => String -> TWParser m a -> TWParser m (Attr, [a])
+ => Text -> TWParser m a -> TWParser m (Attr, [a])
parseHtmlContentWithAttrs tag parser = do
(attr, content) <- htmlElement tag
parsedContent <- try $ parseContent content
@@ -91,7 +92,13 @@ parseHtmlContentWithAttrs tag parser = do
parseContent = parseFromString' $ nested $ manyTill parser endOfContent
endOfContent = try $ skipMany blankline >> skipSpaces >> eof
-parseHtmlContent :: PandocMonad m => String -> TWParser m a -> TWParser m [a]
+parseCharHtmlContentWithAttrs :: PandocMonad m
+ => Text -> TWParser m Char -> TWParser m (Attr, Text)
+parseCharHtmlContentWithAttrs tag = fmap go . parseHtmlContentWithAttrs tag
+ where
+ go (x, y) = (x, T.pack y)
+
+parseHtmlContent :: PandocMonad m => Text -> TWParser m a -> TWParser m [a]
parseHtmlContent tag p = snd <$> parseHtmlContentWithAttrs tag p
--
@@ -113,7 +120,7 @@ block = do
<|> blockElements
<|> para
skipMany blankline
- trace (take 60 $ show $ B.toList res)
+ trace (T.take 60 $ tshow $ B.toList res)
return res
blockElements :: PandocMonad m => TWParser m B.Blocks
@@ -150,38 +157,38 @@ literal = rawBlock <$> htmlElement "literal"
format (_, _, kvs) = fromMaybe "html" $ lookup "format" kvs
rawBlock (attrs, content) = B.rawBlock (format attrs) content
-list :: PandocMonad m => String -> TWParser m B.Blocks
+list :: PandocMonad m => Text -> TWParser m B.Blocks
list prefix = choice [ bulletList prefix
, orderedList prefix
, definitionList prefix]
-definitionList :: PandocMonad m => String -> TWParser m B.Blocks
+definitionList :: PandocMonad m => Text -> TWParser m B.Blocks
definitionList prefix = tryMsg "definitionList" $ do
- indent <- lookAhead $ string prefix *> many1 (string " ") <* string "$ "
- elements <- many $ parseDefinitionListItem (prefix ++ concat indent)
+ indent <- lookAhead $ textStr prefix *> many1 (textStr " ") <* textStr "$ "
+ elements <- many $ parseDefinitionListItem (prefix <> T.concat indent)
return $ B.definitionList elements
where
parseDefinitionListItem :: PandocMonad m
- => String -> TWParser m (B.Inlines, [B.Blocks])
+ => Text -> TWParser m (B.Inlines, [B.Blocks])
parseDefinitionListItem indent = do
- string (indent ++ "$ ") >> skipSpaces
+ textStr (indent <> "$ ") >> skipSpaces
term <- many1Till inline $ string ": "
line <- listItemLine indent $ string "$ "
return (mconcat term, [line])
-bulletList :: PandocMonad m => String -> TWParser m B.Blocks
+bulletList :: PandocMonad m => Text -> TWParser m B.Blocks
bulletList prefix = tryMsg "bulletList" $
parseList prefix (char '*') (char ' ')
-orderedList :: PandocMonad m => String -> TWParser m B.Blocks
+orderedList :: PandocMonad m => Text -> TWParser m B.Blocks
orderedList prefix = tryMsg "orderedList" $
parseList prefix (oneOf "1iIaA") (string ". ")
parseList :: PandocMonad m
- => String -> TWParser m Char -> TWParser m a -> TWParser m B.Blocks
+ => Text -> TWParser m Char -> TWParser m a -> TWParser m B.Blocks
parseList prefix marker delim = do
- (indent, style) <- lookAhead $ string prefix *> listStyle <* delim
- blocks <- many $ parseListItem (prefix ++ indent) (char style <* delim)
+ (indent, style) <- lookAhead $ textStr prefix *> listStyle <* delim
+ blocks <- many $ parseListItem (prefix <> indent) (char style <* delim)
return $ case style of
'1' -> B.orderedListWith (1, DefaultStyle, DefaultDelim) blocks
'i' -> B.orderedListWith (1, LowerRoman, DefaultDelim) blocks
@@ -191,24 +198,24 @@ parseList prefix marker delim = do
_ -> B.bulletList blocks
where
listStyle = do
- indent <- many1 $ string " "
+ indent <- many1 $ textStr " "
style <- marker
- return (concat indent, style)
+ return (T.concat indent, style)
parseListItem :: (PandocMonad m, Show a)
- => String -> TWParser m a -> TWParser m B.Blocks
-parseListItem prefix marker = string prefix >> marker >> listItemLine prefix marker
+ => Text -> TWParser m a -> TWParser m B.Blocks
+parseListItem prefix marker = textStr prefix >> marker >> listItemLine prefix marker
listItemLine :: (PandocMonad m, Show a)
- => String -> TWParser m a -> TWParser m B.Blocks
+ => Text -> TWParser m a -> TWParser m B.Blocks
listItemLine prefix marker = mconcat <$> (lineContent >>= parseContent)
where
lineContent = do
content <- anyLine
continuation <- optionMaybe listContinuation
- return $ filterSpaces content ++ "\n" ++ maybe "" (" " ++) continuation
- filterSpaces = reverse . dropWhile (== ' ') . reverse
- listContinuation = notFollowedBy (string prefix >> marker) >>
+ return $ filterSpaces content <> "\n" <> maybe "" (" " <>) continuation
+ filterSpaces = T.dropWhileEnd (== ' ')
+ listContinuation = notFollowedBy (textStr prefix >> marker) >>
string " " >> lineContent
parseContent = parseFromString' $ many1 $ nestedList <|> parseInline
parseInline = (B.plain . mconcat) <$> many1Till inline (lastNewline <|> newlineBeforeNestedList)
@@ -352,29 +359,29 @@ macroWithParameters = try $ do
char '%'
return $ buildSpan name kvs $ B.str content
-buildSpan :: String -> [(String, String)] -> B.Inlines -> B.Inlines
+buildSpan :: Text -> [(Text, Text)] -> B.Inlines -> B.Inlines
buildSpan className kvs = B.spanWith attrs
where
attrs = ("", ["twiki-macro", className] ++ additionalClasses, kvsWithoutClasses)
- additionalClasses = maybe [] words $ lookup "class" kvs
+ additionalClasses = maybe [] T.words $ lookup "class" kvs
kvsWithoutClasses = [(k,v) | (k,v) <- kvs, k /= "class"]
-macroName :: PandocMonad m => TWParser m String
+macroName :: PandocMonad m => TWParser m Text
macroName = do
first <- letter
rest <- many $ alphaNum <|> char '_'
- return (first:rest)
+ return $ T.pack $ first:rest
-attributes :: PandocMonad m => TWParser m (String, [(String, String)])
-attributes = foldr (either mkContent mkKvs) ([], [])
+attributes :: PandocMonad m => TWParser m (Text, [(Text, Text)])
+attributes = foldr (either mkContent mkKvs) ("", [])
<$> (char '{' *> spnl *> many (attribute <* spnl) <* char '}')
where
spnl = skipMany (spaceChar <|> newline)
- mkContent c ([], kvs) = (c, kvs)
- mkContent c (rest, kvs) = (c ++ " " ++ rest, kvs)
+ mkContent c ("", kvs) = (c, kvs)
+ mkContent c (rest, kvs) = (c <> " " <> rest, kvs)
mkKvs kv (cont, rest) = (cont, kv : rest)
-attribute :: PandocMonad m => TWParser m (Either String (String, String))
+attribute :: PandocMonad m => TWParser m (Either Text (Text, Text))
attribute = withKey <|> withoutKey
where
withKey = try $ do
@@ -383,10 +390,10 @@ attribute = withKey <|> withoutKey
curry Right key <$> parseValue False
withoutKey = try $ Left <$> parseValue True
parseValue allowSpaces = fromEntities <$> (withQuotes <|> withoutQuotes allowSpaces)
- withQuotes = between (char '"') (char '"') (\_ -> count 1 $ noneOf ['"'])
+ withQuotes = between (char '"') (char '"') (\_ -> countChar 1 $ noneOf ['"'])
withoutQuotes allowSpaces
- | allowSpaces = many1 $ noneOf "}"
- | otherwise = many1 $ noneOf " }"
+ | allowSpaces = many1Char $ noneOf "}"
+ | otherwise = many1Char $ noneOf " }"
nestedInlines :: (Show a, PandocMonad m)
=> TWParser m a -> TWParser m B.Inlines
@@ -413,10 +420,10 @@ emphHtml :: PandocMonad m => TWParser m B.Inlines
emphHtml = B.emph . mconcat <$> (parseHtmlContent "em" inline <|> parseHtmlContent "i" inline)
nestedString :: (Show a, PandocMonad m)
- => TWParser m a -> TWParser m String
-nestedString end = innerSpace <|> count 1 nonspaceChar
+ => TWParser m a -> TWParser m Text
+nestedString end = innerSpace <|> countChar 1 nonspaceChar
where
- innerSpace = try $ many1 spaceChar <* notFollowedBy end
+ innerSpace = try $ many1Char spaceChar <* notFollowedBy end
boldCode :: PandocMonad m => TWParser m B.Inlines
boldCode = try $ (B.strong . B.code . fromEntities) <$> enclosed (string "==") nestedString
@@ -429,7 +436,7 @@ code = try $ (B.code . fromEntities) <$> enclosed (char '=') nestedString
codeHtml :: PandocMonad m => TWParser m B.Inlines
codeHtml = do
- (attrs, content) <- parseHtmlContentWithAttrs "code" anyChar
+ (attrs, content) <- parseCharHtmlContentWithAttrs "code" anyChar
return $ B.codeWith attrs $ fromEntities content
autoLink :: PandocMonad m => TWParser m B.Inlines
@@ -437,7 +444,7 @@ autoLink = try $ do
state <- getState
guard $ stateAllowLinks state
(text, url) <- parseLink
- guard $ checkLink (last url)
+ guard $ checkLink (T.last url)
return $ makeLink (text, url)
where
parseLink = notFollowedBy nop >> (uri <|> emailAddress)
@@ -447,17 +454,17 @@ autoLink = try $ do
| otherwise = isAlphaNum c
str :: PandocMonad m => TWParser m B.Inlines
-str = B.str <$> (many1 alphaNum <|> count 1 characterReference)
+str = B.str <$> (many1Char alphaNum <|> countChar 1 characterReference)
nop :: PandocMonad m => TWParser m B.Inlines
nop = try $ (void exclamation <|> void nopTag) >> followContent
where
exclamation = char '!'
nopTag = stringAnyCase "<nop>"
- followContent = B.str . fromEntities <$> many1 nonspaceChar
+ followContent = B.str . fromEntities <$> many1Char nonspaceChar
symbol :: PandocMonad m => TWParser m B.Inlines
-symbol = B.str <$> count 1 nonspaceChar
+symbol = B.str <$> countChar 1 nonspaceChar
smart :: PandocMonad m => TWParser m B.Inlines
smart = do
@@ -491,13 +498,13 @@ link = try $ do
setState $ st{ stateAllowLinks = True }
return $ B.link url title content
-linkText :: PandocMonad m => TWParser m (String, String, B.Inlines)
+linkText :: PandocMonad m => TWParser m (Text, Text, B.Inlines)
linkText = do
string "[["
- url <- many1Till anyChar (char ']')
+ url <- T.pack <$> many1Till anyChar (char ']')
content <- option (B.str url) (mconcat <$> linkContent)
char ']'
return (url, "", content)
where
- linkContent = char '[' >> many1Till anyChar (char ']') >>= parseLinkContent
+ linkContent = char '[' >> many1Till anyChar (char ']') >>= parseLinkContent . T.pack
parseLinkContent = parseFromString' $ many1 inline
diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs
index a638fdf40..5e7aaf910 100644
--- a/src/Text/Pandoc/Readers/Textile.hs
+++ b/src/Text/Pandoc/Readers/Textile.hs
@@ -1,4 +1,6 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.Textile
Copyright : Copyright (C) 2010-2012 Paul Rivier
@@ -38,7 +40,7 @@ import Prelude
import Control.Monad (guard, liftM)
import Control.Monad.Except (throwError)
import Data.Char (digitToInt, isUpper)
-import Data.List (intercalate, intersperse, transpose)
+import Data.List (intersperse, transpose)
import Data.Text (Text)
import qualified Data.Text as T
import Text.HTML.TagSoup (Tag (..), fromAttrib)
@@ -52,7 +54,7 @@ import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag)
import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline)
-import Text.Pandoc.Shared (crFilter, trim, underlineSpan)
+import Text.Pandoc.Shared (crFilter, trim, underlineSpan, tshow)
-- | Parse a Textile text and return a Pandoc document.
readTextile :: PandocMonad m
@@ -61,21 +63,21 @@ readTextile :: PandocMonad m
-> m Pandoc
readTextile opts s = do
parsed <- readWithM parseTextile def{ stateOptions = opts }
- (T.unpack (crFilter s) ++ "\n\n")
+ (crFilter s <> "\n\n")
case parsed of
Right result -> return result
Left e -> throwError e
-- | Generate a Pandoc ADT from a textile document
-parseTextile :: PandocMonad m => ParserT [Char] ParserState m Pandoc
+parseTextile :: PandocMonad m => ParserT Text ParserState m Pandoc
parseTextile = do
many blankline
startPos <- getPosition
-- go through once just to get list of reference keys and notes
-- docMinusKeys is the raw document with blanks where the keys/notes were...
let firstPassParser = noteBlock <|> lineClump
- manyTill firstPassParser eof >>= setInput . concat
+ manyTill firstPassParser eof >>= setInput . T.concat
setPosition startPos
st' <- getState
let reversedNotes = stateNotes st'
@@ -84,29 +86,29 @@ parseTextile = do
blocks <- parseBlocks
return $ Pandoc nullMeta (B.toList blocks) -- FIXME
-noteMarker :: PandocMonad m => ParserT [Char] ParserState m [Char]
-noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.')
+noteMarker :: PandocMonad m => ParserT Text ParserState m Text
+noteMarker = skipMany spaceChar >> string "fn" >> T.pack <$> manyTill digit (char '.')
-noteBlock :: PandocMonad m => ParserT [Char] ParserState m [Char]
+noteBlock :: PandocMonad m => ParserT Text ParserState m Text
noteBlock = try $ do
startPos <- getPosition
ref <- noteMarker
optional blankline
- contents <- unlines <$> many1Till anyLine (blanklines <|> noteBlock)
+ contents <- T.unlines <$> many1Till anyLine (blanklines <|> noteBlock)
endPos <- getPosition
- let newnote = (ref, contents ++ "\n")
+ let newnote = (ref, contents <> "\n")
st <- getState
let oldnotes = stateNotes st
updateState $ \s -> s { stateNotes = newnote : oldnotes }
-- return blanks so line count isn't affected
- return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
+ return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n"
-- | Parse document blocks
-parseBlocks :: PandocMonad m => ParserT [Char] ParserState m Blocks
+parseBlocks :: PandocMonad m => ParserT Text ParserState m Blocks
parseBlocks = mconcat <$> manyTill block eof
-- | Block parsers list tried in definition order
-blockParsers :: PandocMonad m => [ParserT [Char] ParserState m Blocks]
+blockParsers :: PandocMonad m => [ParserT Text ParserState m Blocks]
blockParsers = [ codeBlock
, header
, blockQuote
@@ -121,22 +123,22 @@ blockParsers = [ codeBlock
]
-- | Any block in the order of definition of blockParsers
-block :: PandocMonad m => ParserT [Char] ParserState m Blocks
+block :: PandocMonad m => ParserT Text ParserState m Blocks
block = do
res <- choice blockParsers <?> "block"
- trace (take 60 $ show $ B.toList res)
+ trace (T.take 60 $ tshow $ B.toList res)
return res
-commentBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks
+commentBlock :: PandocMonad m => ParserT Text ParserState m Blocks
commentBlock = try $ do
string "###."
manyTill anyLine blanklines
return mempty
-codeBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks
+codeBlock :: PandocMonad m => ParserT Text ParserState m Blocks
codeBlock = codeBlockBc <|> codeBlockPre
-codeBlockBc :: PandocMonad m => ParserT [Char] ParserState m Blocks
+codeBlockBc :: PandocMonad m => ParserT Text ParserState m Blocks
codeBlockBc = try $ do
string "bc."
extended <- option False (True <$ char '.')
@@ -150,31 +152,31 @@ codeBlockBc = try $ do
rest <- many (notFollowedBy ender *> anyLine)
return (f:rest)
else manyTill anyLine blanklines
- return $ B.codeBlock (trimTrailingNewlines (unlines contents))
+ return $ B.codeBlock (trimTrailingNewlines (T.unlines contents))
-trimTrailingNewlines :: String -> String
-trimTrailingNewlines = reverse . dropWhile (=='\n') . reverse
+trimTrailingNewlines :: Text -> Text
+trimTrailingNewlines = T.dropWhileEnd (=='\n')
-- | Code Blocks in Textile are between <pre> and </pre>
-codeBlockPre :: PandocMonad m => ParserT [Char] ParserState m Blocks
+codeBlockPre :: PandocMonad m => ParserT Text ParserState m Blocks
codeBlockPre = try $ do
(t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True))
- result' <- manyTill anyChar (htmlTag (tagClose (=="pre")))
+ result' <- T.pack <$> manyTill anyChar (htmlTag (tagClose (=="pre")))
-- drop leading newline if any
- let result'' = case result' of
- '\n':xs -> xs
- _ -> result'
+ let result'' = case T.uncons result' of
+ Just ('\n', xs) -> xs
+ _ -> result'
-- drop trailing newline if any
- let result''' = case reverse result'' of
- '\n':_ -> init result''
- _ -> result''
- let classes = words $ fromAttrib "class" t
+ let result''' = case T.unsnoc result'' of
+ Just (xs, '\n') -> xs
+ _ -> result''
+ let classes = T.words $ fromAttrib "class" t
let ident = fromAttrib "id" t
let kvs = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
return $ B.codeBlockWith (ident,classes,kvs) result'''
-- | Header of the form "hN. content" with N in 1..6
-header :: PandocMonad m => ParserT [Char] ParserState m Blocks
+header :: PandocMonad m => ParserT Text ParserState m Blocks
header = try $ do
char 'h'
level <- digitToInt <$> oneOf "123456"
@@ -186,14 +188,14 @@ header = try $ do
return $ B.headerWith attr' level name
-- | Blockquote of the form "bq. content"
-blockQuote :: PandocMonad m => ParserT [Char] ParserState m Blocks
+blockQuote :: PandocMonad m => ParserT Text ParserState m Blocks
blockQuote = try $ do
string "bq" >> attributes >> char '.' >> whitespace
B.blockQuote <$> para
-- Horizontal rule
-hrule :: PandocMonad m => ParserT [Char] st m Blocks
+hrule :: PandocMonad m => ParserT Text st m Blocks
hrule = try $ do
skipSpaces
start <- oneOf "-*"
@@ -208,39 +210,39 @@ hrule = try $ do
-- | Can be a bullet list or an ordered list. This implementation is
-- strict in the nesting, sublist must start at exactly "parent depth
-- plus one"
-anyList :: PandocMonad m => ParserT [Char] ParserState m Blocks
+anyList :: PandocMonad m => ParserT Text ParserState m Blocks
anyList = try $ anyListAtDepth 1 <* blanklines
-- | This allow one type of list to be nested into an other type,
-- provided correct nesting
-anyListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
+anyListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
anyListAtDepth depth = choice [ bulletListAtDepth depth,
orderedListAtDepth depth,
definitionList ]
-- | Bullet List of given depth, depth being the number of leading '*'
-bulletListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
+bulletListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth depth)
-- | Bullet List Item of given depth, depth being the number of
-- leading '*'
-bulletListItemAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
+bulletListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
bulletListItemAtDepth = genericListItemAtDepth '*'
-- | Ordered List of given depth, depth being the number of
-- leading '#'
-orderedListAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
+orderedListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
orderedListAtDepth depth = try $ do
items <- many1 (orderedListItemAtDepth depth)
return $ B.orderedList items
-- | Ordered List Item of given depth, depth being the number of
-- leading '#'
-orderedListItemAtDepth :: PandocMonad m => Int -> ParserT [Char] ParserState m Blocks
+orderedListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks
orderedListItemAtDepth = genericListItemAtDepth '#'
-- | Common implementation of list items
-genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT [Char] ParserState m Blocks
+genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT Text ParserState m Blocks
genericListItemAtDepth c depth = try $ do
count depth (char c) >> attributes >> whitespace
contents <- mconcat <$> many ((B.plain . mconcat <$> many1 inline) <|>
@@ -250,25 +252,25 @@ genericListItemAtDepth c depth = try $ do
return $ contents <> sublist
-- | A definition list is a set of consecutive definition items
-definitionList :: PandocMonad m => ParserT [Char] ParserState m Blocks
+definitionList :: PandocMonad m => ParserT Text ParserState m Blocks
definitionList = try $ B.definitionList <$> many1 definitionListItem
-- | List start character.
-listStart :: PandocMonad m => ParserT [Char] ParserState m ()
+listStart :: PandocMonad m => ParserT Text ParserState m ()
listStart = genericListStart '*'
<|> () <$ genericListStart '#'
<|> () <$ definitionListStart
-genericListStart :: PandocMonad m => Char -> ParserT [Char] st m ()
+genericListStart :: PandocMonad m => Char -> ParserT Text st m ()
genericListStart c = () <$ try (many1 (char c) >> whitespace)
-basicDLStart :: PandocMonad m => ParserT [Char] ParserState m ()
+basicDLStart :: PandocMonad m => ParserT Text ParserState m ()
basicDLStart = do
char '-'
whitespace
notFollowedBy newline
-definitionListStart :: PandocMonad m => ParserT [Char] ParserState m Inlines
+definitionListStart :: PandocMonad m => ParserT Text ParserState m Inlines
definitionListStart = try $ do
basicDLStart
trimInlines . mconcat <$>
@@ -281,26 +283,26 @@ definitionListStart = try $ do
-- the term defined, then spaces and ":=". The definition follows, on
-- the same single line, or spaned on multiple line, after a line
-- break.
-definitionListItem :: PandocMonad m => ParserT [Char] ParserState m (Inlines, [Blocks])
+definitionListItem :: PandocMonad m => ParserT Text ParserState m (Inlines, [Blocks])
definitionListItem = try $ do
term <- (mconcat . intersperse B.linebreak) <$> many1 definitionListStart
def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef)
return (term, def')
- where inlineDef :: PandocMonad m => ParserT [Char] ParserState m [Blocks]
+ where inlineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
inlineDef = liftM (\d -> [B.plain d])
$ optional whitespace >> (trimInlines . mconcat <$> many inline) <* newline
- multilineDef :: PandocMonad m => ParserT [Char] ParserState m [Blocks]
+ multilineDef :: PandocMonad m => ParserT Text ParserState m [Blocks]
multilineDef = try $ do
optional whitespace >> newline
- s <- many1Till anyChar (try (string "=:" >> newline))
- -- this ++ "\n\n" does not look very good
- ds <- parseFromString' parseBlocks (s ++ "\n\n")
+ s <- T.pack <$> many1Till anyChar (try (string "=:" >> newline))
+ -- this <> "\n\n" does not look very good
+ ds <- parseFromString' parseBlocks (s <> "\n\n")
return [ds]
-- raw content
-- | A raw Html Block, optionally followed by blanklines
-rawHtmlBlock :: PandocMonad m => ParserT [Char] ParserState m Blocks
+rawHtmlBlock :: PandocMonad m => ParserT Text ParserState m Blocks
rawHtmlBlock = try $ do
skipMany spaceChar
(_,b) <- htmlTag isBlockTag
@@ -308,14 +310,14 @@ rawHtmlBlock = try $ do
return $ B.rawBlock "html" b
-- | Raw block of LaTeX content
-rawLaTeXBlock' :: PandocMonad m => ParserT [Char] ParserState m Blocks
+rawLaTeXBlock' :: PandocMonad m => ParserT Text ParserState m Blocks
rawLaTeXBlock' = do
guardEnabled Ext_raw_tex
B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces)
-- | In textile, paragraphs are separated by blank lines.
-para :: PandocMonad m => ParserT [Char] ParserState m Blocks
+para :: PandocMonad m => ParserT Text ParserState m Blocks
para = B.para . trimInlines . mconcat <$> many1 inline
-- Tables
@@ -326,7 +328,7 @@ toAlignment '>' = AlignRight
toAlignment '=' = AlignCenter
toAlignment _ = AlignDefault
-cellAttributes :: PandocMonad m => ParserT [Char] ParserState m (Bool, Alignment)
+cellAttributes :: PandocMonad m => ParserT Text ParserState m (Bool, Alignment)
cellAttributes = try $ do
isHeader <- option False (True <$ char '_')
-- we just ignore colspan and rowspan markers:
@@ -339,18 +341,18 @@ cellAttributes = try $ do
return (isHeader, alignment)
-- | A table cell spans until a pipe |
-tableCell :: PandocMonad m => ParserT [Char] ParserState m ((Bool, Alignment), Blocks)
+tableCell :: PandocMonad m => ParserT Text ParserState m ((Bool, Alignment), Blocks)
tableCell = try $ do
char '|'
(isHeader, alignment) <- option (False, AlignDefault) cellAttributes
notFollowedBy blankline
- raw <- trim <$>
+ raw <- trim . T.pack <$>
many (noneOf "|\n" <|> try (char '\n' <* notFollowedBy blankline))
content <- mconcat <$> parseFromString' (many inline) raw
return ((isHeader, alignment), B.plain content)
-- | A table row is made of many table cells
-tableRow :: PandocMonad m => ParserT [Char] ParserState m [((Bool, Alignment), Blocks)]
+tableRow :: PandocMonad m => ParserT Text ParserState m [((Bool, Alignment), Blocks)]
tableRow = try $ do
-- skip optional row attributes
optional $ try $ do
@@ -360,7 +362,7 @@ tableRow = try $ do
many1 tableCell <* char '|' <* blankline
-- | A table with an optional header.
-table :: PandocMonad m => ParserT [Char] ParserState m Blocks
+table :: PandocMonad m => ParserT Text ParserState m Blocks
table = try $ do
-- ignore table attributes
caption <- option mempty $ try $ do
@@ -384,7 +386,7 @@ table = try $ do
(map (map snd) rows)
-- | Ignore markers for cols, thead, tfoot.
-ignorableRow :: PandocMonad m => ParserT [Char] ParserState m ()
+ignorableRow :: PandocMonad m => ParserT Text ParserState m ()
ignorableRow = try $ do
char '|'
oneOf ":^-~"
@@ -393,9 +395,9 @@ ignorableRow = try $ do
_ <- anyLine
return ()
-explicitBlockStart :: PandocMonad m => String -> ParserT [Char] ParserState m ()
+explicitBlockStart :: PandocMonad m => Text -> ParserT Text ParserState m ()
explicitBlockStart name = try $ do
- string name
+ string (T.unpack name)
attributes
char '.'
optional whitespace
@@ -404,9 +406,9 @@ explicitBlockStart name = try $ do
-- | Blocks like 'p' and 'table' do not need explicit block tag.
-- However, they can be used to set HTML/CSS attributes when needed.
maybeExplicitBlock :: PandocMonad m
- => String -- ^ block tag name
- -> ParserT [Char] ParserState m Blocks -- ^ implicit block
- -> ParserT [Char] ParserState m Blocks
+ => Text -- ^ block tag name
+ -> ParserT Text ParserState m Blocks -- ^ implicit block
+ -> ParserT Text ParserState m Blocks
maybeExplicitBlock name blk = try $ do
optional $ explicitBlockStart name
blk
@@ -419,11 +421,11 @@ maybeExplicitBlock name blk = try $ do
-- | Any inline element
-inline :: PandocMonad m => ParserT [Char] ParserState m Inlines
+inline :: PandocMonad m => ParserT Text ParserState m Inlines
inline = choice inlineParsers <?> "inline"
-- | Inline parsers tried in order
-inlineParsers :: PandocMonad m => [ParserT [Char] ParserState m Inlines]
+inlineParsers :: PandocMonad m => [ParserT Text ParserState m Inlines]
inlineParsers = [ str
, whitespace
, endline
@@ -437,13 +439,13 @@ inlineParsers = [ str
, link
, image
, mark
- , (B.str . (:[])) <$> characterReference
+ , (B.str . T.singleton) <$> characterReference
, smartPunctuation inline
, symbol
]
-- | Inline markups
-inlineMarkup :: PandocMonad m => ParserT [Char] ParserState m Inlines
+inlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
, simpleInline (string "**") B.strong
, simpleInline (string "__") B.emph
@@ -457,33 +459,33 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
]
-- | Trademark, registered, copyright
-mark :: PandocMonad m => ParserT [Char] st m Inlines
+mark :: PandocMonad m => ParserT Text st m Inlines
mark = try $ char '(' >> (try tm <|> try reg <|> copy)
-reg :: PandocMonad m => ParserT [Char] st m Inlines
+reg :: PandocMonad m => ParserT Text st m Inlines
reg = do
oneOf "Rr"
char ')'
return $ B.str "\174"
-tm :: PandocMonad m => ParserT [Char] st m Inlines
+tm :: PandocMonad m => ParserT Text st m Inlines
tm = do
oneOf "Tt"
oneOf "Mm"
char ')'
return $ B.str "\8482"
-copy :: PandocMonad m => ParserT [Char] st m Inlines
+copy :: PandocMonad m => ParserT Text st m Inlines
copy = do
oneOf "Cc"
char ')'
return $ B.str "\169"
-note :: PandocMonad m => ParserT [Char] ParserState m Inlines
+note :: PandocMonad m => ParserT Text ParserState m Inlines
note = try $ do
ref <- char '[' *> many1 digit <* char ']'
notes <- stateNotes <$> getState
- case lookup ref notes of
+ case lookup (T.pack ref) notes of
Nothing -> Prelude.fail "note not found"
Just raw -> B.note <$> parseFromString' parseBlocks raw
@@ -500,42 +502,42 @@ stringBreakers :: [Char]
stringBreakers = " \t\n\r.,\"'?!;:<>«»„“”‚‘’()[]"
wordBoundaries :: [Char]
-wordBoundaries = markupChars ++ stringBreakers
+wordBoundaries = markupChars <> stringBreakers
-- | Parse a hyphened sequence of words
-hyphenedWords :: PandocMonad m => ParserT [Char] ParserState m String
+hyphenedWords :: PandocMonad m => ParserT Text ParserState m Text
hyphenedWords = do
x <- wordChunk
xs <- many (try $ char '-' >> wordChunk)
- return $ intercalate "-" (x:xs)
+ return $ T.intercalate "-" (x:xs)
-wordChunk :: PandocMonad m => ParserT [Char] ParserState m String
+wordChunk :: PandocMonad m => ParserT Text ParserState m Text
wordChunk = try $ do
hd <- noneOf wordBoundaries
tl <- many ( noneOf wordBoundaries <|>
try (notFollowedBy' note *> oneOf markupChars
<* lookAhead (noneOf wordBoundaries) ) )
- return $ hd:tl
+ return $ T.pack $ hd:tl
-- | Any string
-str :: PandocMonad m => ParserT [Char] ParserState m Inlines
+str :: PandocMonad m => ParserT Text ParserState m Inlines
str = do
baseStr <- hyphenedWords
-- RedCloth compliance : if parsed word is uppercase and immediately
-- followed by parens, parens content is unconditionally word acronym
fullStr <- option baseStr $ try $ do
- guard $ all isUpper baseStr
- acro <- enclosed (char '(') (char ')') anyChar'
- return $ concat [baseStr, " (", acro, ")"]
+ guard $ T.all isUpper baseStr
+ acro <- T.pack <$> enclosed (char '(') (char ')') anyChar'
+ return $ T.concat [baseStr, " (", acro, ")"]
updateLastStrPos
return $ B.str fullStr
-- | Some number of space chars
-whitespace :: PandocMonad m => ParserT [Char] st m Inlines
+whitespace :: PandocMonad m => ParserT Text st m Inlines
whitespace = many1 spaceChar >> return B.space <?> "whitespace"
-- | In Textile, an isolated endline character is a line break
-endline :: PandocMonad m => ParserT [Char] ParserState m Inlines
+endline :: PandocMonad m => ParserT Text ParserState m Inlines
endline = try $ do
newline
notFollowedBy blankline
@@ -543,18 +545,18 @@ endline = try $ do
notFollowedBy rawHtmlBlock
return B.linebreak
-rawHtmlInline :: PandocMonad m => ParserT [Char] ParserState m Inlines
+rawHtmlInline :: PandocMonad m => ParserT Text ParserState m Inlines
rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
-- | Raw LaTeX Inline
-rawLaTeXInline' :: PandocMonad m => ParserT [Char] ParserState m Inlines
+rawLaTeXInline' :: PandocMonad m => ParserT Text ParserState m Inlines
rawLaTeXInline' = try $ do
guardEnabled Ext_raw_tex
B.rawInline "latex" <$> rawLaTeXInline
-- | Textile standard link syntax is "label":target. But we
-- can also have ["label":target].
-link :: PandocMonad m => ParserT [Char] ParserState m Inlines
+link :: PandocMonad m => ParserT Text ParserState m Inlines
link = try $ do
bracketed <- (True <$ char '[') <|> return False
char '"' *> notFollowedBy (oneOf " \t\n\r")
@@ -567,121 +569,122 @@ link = try $ do
else lookAhead $ space <|> eof' <|>
try (oneOf "!.,;:" *>
(space <|> newline <|> eof'))
- url <- many1Till nonspaceChar stop
+ url <- T.pack <$> many1Till nonspaceChar stop
let name' = if B.toList name == [Str "$"] then B.str url else name
return $ if attr == nullAttr
then B.link url "" name'
else B.spanWith attr $ B.link url "" name'
-- | image embedding
-image :: PandocMonad m => ParserT [Char] ParserState m Inlines
+image :: PandocMonad m => ParserT Text ParserState m Inlines
image = try $ do
char '!' >> notFollowedBy space
(ident, cls, kvs) <- attributes
let attr = case lookup "style" kvs of
Just stls -> (ident, cls, pickStylesToKVs ["width", "height"] stls)
Nothing -> (ident, cls, kvs)
- src <- many1 (noneOf " \t\n\r!(")
- alt <- option "" $ try $ char '(' *> manyTill anyChar (char ')')
+ src <- T.pack <$> many1 (noneOf " \t\n\r!(")
+ alt <- fmap T.pack $ option "" $ try $ char '(' *> manyTill anyChar (char ')')
char '!'
return $ B.imageWith attr src alt (B.str alt)
-escapedInline :: PandocMonad m => ParserT [Char] ParserState m Inlines
+escapedInline :: PandocMonad m => ParserT Text ParserState m Inlines
escapedInline = escapedEqs <|> escapedTag
-escapedEqs :: PandocMonad m => ParserT [Char] ParserState m Inlines
-escapedEqs = B.str <$>
+escapedEqs :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedEqs = B.str . T.pack <$>
try (string "==" *> manyTill anyChar' (try $ string "=="))
-- | literal text escaped btw <notextile> tags
-escapedTag :: PandocMonad m => ParserT [Char] ParserState m Inlines
-escapedTag = B.str <$>
+escapedTag :: PandocMonad m => ParserT Text ParserState m Inlines
+escapedTag = B.str . T.pack <$>
try (string "<notextile>" *>
manyTill anyChar' (try $ string "</notextile>"))
-- | Any special symbol defined in wordBoundaries
-symbol :: PandocMonad m => ParserT [Char] ParserState m Inlines
-symbol = B.str . singleton <$> (notFollowedBy newline *>
- notFollowedBy rawHtmlBlock *>
- oneOf wordBoundaries)
+symbol :: PandocMonad m => ParserT Text ParserState m Inlines
+symbol = B.str . T.singleton <$> (notFollowedBy newline *>
+ notFollowedBy rawHtmlBlock *>
+ oneOf wordBoundaries)
-- | Inline code
-code :: PandocMonad m => ParserT [Char] ParserState m Inlines
+code :: PandocMonad m => ParserT Text ParserState m Inlines
code = code1 <|> code2
-- any character except a newline before a blank line
-anyChar' :: PandocMonad m => ParserT [Char] ParserState m Char
+anyChar' :: PandocMonad m => ParserT Text ParserState m Char
anyChar' =
satisfy (/='\n') <|>
try (char '\n' <* notFollowedBy blankline)
-code1 :: PandocMonad m => ParserT [Char] ParserState m Inlines
-code1 = B.code <$> surrounded (char '@') anyChar'
+code1 :: PandocMonad m => ParserT Text ParserState m Inlines
+code1 = B.code . T.pack <$> surrounded (char '@') anyChar'
-code2 :: PandocMonad m => ParserT [Char] ParserState m Inlines
+code2 :: PandocMonad m => ParserT Text ParserState m Inlines
code2 = do
htmlTag (tagOpen (=="tt") null)
- B.code <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt"))
+ B.code . T.pack <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt"))
-- | Html / CSS attributes
-attributes :: PandocMonad m => ParserT [Char] ParserState m Attr
+attributes :: PandocMonad m => ParserT Text ParserState m Attr
attributes = foldl (flip ($)) ("",[],[]) <$>
try (do special <- option id specialAttribute
attrs <- many attribute
return (special : attrs))
-specialAttribute :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
+specialAttribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
specialAttribute = do
alignStr <- ("center" <$ char '=') <|>
("justify" <$ try (string "<>")) <|>
("right" <$ char '>') <|>
("left" <$ char '<')
notFollowedBy spaceChar
- return $ addStyle ("text-align:" ++ alignStr)
+ return $ addStyle $ T.pack $ "text-align:" ++ alignStr
-attribute :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
+attribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
attribute = try $
(classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar
-classIdAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
+classIdAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
classIdAttr = try $ do -- (class class #id)
char '('
- ws <- words `fmap` manyTill anyChar' (char ')')
+ ws <- T.words `fmap` T.pack <$> manyTill anyChar' (char ')')
case reverse ws of
- [] -> return $ \(_,_,keyvals) -> ("",[],keyvals)
- (('#':ident'):classes') -> return $ \(_,_,keyvals) ->
- (ident',classes',keyvals)
- classes' -> return $ \(_,_,keyvals) ->
- ("",classes',keyvals)
-
-styleAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
+ []
+ -> return $ \(_,_,keyvals) -> ("",[],keyvals)
+ ((T.uncons -> Just ('#', ident')):classes')
+ -> return $ \(_,_,keyvals) -> (ident',classes',keyvals)
+ classes'
+ -> return $ \(_,_,keyvals) -> ("",classes',keyvals)
+
+styleAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
styleAttr = do
style <- try $ enclosed (char '{') (char '}') anyChar'
- return $ addStyle style
+ return $ addStyle $ T.pack style
-addStyle :: String -> Attr -> Attr
+addStyle :: Text -> Attr -> Attr
addStyle style (id',classes,keyvals) =
(id',classes,keyvals')
where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"]
- style' = style ++ ";" ++ concat [v | ("style",v) <- keyvals]
+ style' = style <> ";" <> T.concat [v | ("style",v) <- keyvals]
-langAttr :: PandocMonad m => ParserT [Char] ParserState m (Attr -> Attr)
+langAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr)
langAttr = do
lang <- try $ enclosed (char '[') (char ']') alphaNum
- return $ \(id',classes,keyvals) -> (id',classes,("lang",lang):keyvals)
+ return $ \(id',classes,keyvals) -> (id',classes,("lang",T.pack lang):keyvals)
-- | Parses material surrounded by a parser.
surrounded :: (PandocMonad m, Show t)
- => ParserT [Char] st m t -- ^ surrounding parser
- -> ParserT [Char] st m a -- ^ content parser (to be used repeatedly)
- -> ParserT [Char] st m [a]
+ => ParserT Text st m t -- ^ surrounding parser
+ -> ParserT Text st m a -- ^ content parser (to be used repeatedly)
+ -> ParserT Text st m [a]
surrounded border =
enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
simpleInline :: PandocMonad m
- => ParserT [Char] ParserState m t -- ^ surrounding parser
+ => ParserT Text ParserState m t -- ^ surrounding parser
-> (Inlines -> Inlines) -- ^ Inline constructor
- -> ParserT [Char] ParserState m Inlines -- ^ content parser (to be used repeatedly)
+ -> ParserT Text ParserState m Inlines -- ^ content parser (to be used repeatedly)
simpleInline border construct = try $ do
notAfterString
border *> notFollowedBy (oneOf " \t\n\r")
@@ -695,7 +698,7 @@ simpleInline border construct = try $ do
then body
else B.spanWith attr body
-groupedInlineMarkup :: PandocMonad m => ParserT [Char] ParserState m Inlines
+groupedInlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines
groupedInlineMarkup = try $ do
char '['
sp1 <- option mempty $ B.space <$ whitespace
@@ -704,9 +707,5 @@ groupedInlineMarkup = try $ do
char ']'
return $ sp1 <> result <> sp2
--- | Create a singleton list
-singleton :: a -> [a]
-singleton x = [x]
-
-eof' :: Monad m => ParserT [Char] s m Char
+eof' :: Monad m => ParserT Text s m Char
eof' = '\n' <$ eof
diff --git a/src/Text/Pandoc/Readers/TikiWiki.hs b/src/Text/Pandoc/Readers/TikiWiki.hs
index 5daf6b0bb..501c204f5 100644
--- a/src/Text/Pandoc/Readers/TikiWiki.hs
+++ b/src/Text/Pandoc/Readers/TikiWiki.hs
@@ -43,19 +43,19 @@ readTikiWiki :: PandocMonad m
-> m Pandoc
readTikiWiki opts s = do
res <- readWithM parseTikiWiki def{ stateOptions = opts }
- (T.unpack (crFilter s) ++ "\n\n")
+ (crFilter s <> "\n\n")
case res of
Left e -> throwError e
Right d -> return d
-type TikiWikiParser = ParserT [Char] ParserState
+type TikiWikiParser = ParserT Text ParserState
--
-- utility functions
--
-tryMsg :: String -> TikiWikiParser m a -> TikiWikiParser m a
-tryMsg msg p = try p <?> msg
+tryMsg :: Text -> TikiWikiParser m a -> TikiWikiParser m a
+tryMsg msg p = try p <?> (T.unpack msg)
skip :: TikiWikiParser m a -> TikiWikiParser m ()
skip parser = Control.Monad.void parser
@@ -89,7 +89,7 @@ block = do
<|> para
skipMany blankline
when (verbosity >= INFO) $
- trace (printf "line %d: %s" (sourceLine pos) (take 60 $ show $ B.toList res))
+ trace (T.pack $ printf "line %d: %s" (sourceLine pos) (take 60 $ show $ B.toList res))
return res
blockElements :: PandocMonad m => TikiWikiParser m B.Blocks
@@ -133,7 +133,7 @@ tableRow :: PandocMonad m => TikiWikiParser m [B.Blocks]
tableRow = try $ do
-- row <- sepBy1 (many1Till inline $ oneOf "\n|") (try $ string "|" <* notFollowedBy (oneOf "|\n"))
-- return $ map (B.plain . mconcat) row
- row <- sepBy1 (many1 (noneOf "\n|") >>= parseColumn) (try $ string "|" <* notFollowedBy (oneOf "|\n"))
+ row <- sepBy1 (many1 (noneOf "\n|") >>= parseColumn . T.pack) (try $ string "|" <* notFollowedBy (oneOf "|\n"))
return $ map B.plain row
where
parseColumn x = do
@@ -342,15 +342,15 @@ listItemLine nest = lineContent >>= parseContent
lineContent = do
content <- anyLine
continuation <- optionMaybe listContinuation
- return $ filterSpaces content ++ "\n" ++ Data.Maybe.fromMaybe "" continuation
- filterSpaces = reverse . dropWhile (== ' ') . reverse
+ return $ filterSpaces content <> "\n" <> Data.Maybe.fromMaybe "" continuation
+ filterSpaces = T.dropWhileEnd (== ' ')
listContinuation = string (replicate nest '+') >> lineContent
parseContent x = do
parsed <- parseFromString (many1 inline) x
return $ mconcat $ dropWhileEnd (== B.space) parsed
-- Turn the CODE macro attributes into Pandoc code block attributes.
-mungeAttrs :: [(String, String)] -> (String, [String], [(String, String)])
+mungeAttrs :: [(Text, Text)] -> (Text, [Text], [(Text, Text)])
mungeAttrs rawAttrs = ("", classes, rawAttrs)
where
-- "colors" is TikiWiki CODE macro for "name of language to do
@@ -370,7 +370,7 @@ codeMacro = try $ do
string "{CODE("
rawAttrs <- macroAttrs
string ")}"
- body <- manyTill anyChar (try (string "{CODE}"))
+ body <- T.pack <$> manyTill anyChar (try (string "{CODE}"))
newline
if not (null rawAttrs)
then
@@ -428,9 +428,9 @@ nbsp = try $ do
htmlComment :: PandocMonad m => TikiWikiParser m B.Inlines
htmlComment = try $ do
string "~hc~"
- inner <- many1 $ noneOf "~"
+ inner <- fmap T.pack $ many1 $ noneOf "~"
string "~/hc~"
- return $ B.str $ " NOT SUPPORTED: ~hc~ (html comment opener) BEGIN: " ++ inner ++ " ~/hc~ :END "
+ return $ B.str $ " NOT SUPPORTED: ~hc~ (html comment opener) BEGIN: " <> inner <> " ~/hc~ :END "
linebreak :: PandocMonad m => TikiWikiParser m B.Inlines
linebreak = newline >> notFollowedBy newline >> (lastNewline <|> innerNewline)
@@ -469,15 +469,15 @@ image = try $ do
let title = fromMaybe src $ lookup "desc" rawAttrs
let alt = fromMaybe title $ lookup "alt" rawAttrs
let classes = map fst $ filter (\(_,b) -> b == "" || b == "y") rawAttrs
- if not (null src)
+ if not (T.null src)
then
return $ B.imageWith ("", classes, rawAttrs) src title (B.str alt)
else
- return $ B.str $ " NOT SUPPORTED: image without src attribute BEGIN: {img " ++ printAttrs rawAttrs ++ "} :END "
+ return $ B.str $ " NOT SUPPORTED: image without src attribute BEGIN: {img " <> printAttrs rawAttrs <> "} :END "
where
- printAttrs attrs = unwords $ map (\(a, b) -> a ++ "=\"" ++ b ++ "\"") attrs
+ printAttrs attrs = T.unwords $ map (\(a, b) -> a <> "=\"" <> b <> "\"") attrs
-imageAttr :: PandocMonad m => TikiWikiParser m (String, String)
+imageAttr :: PandocMonad m => TikiWikiParser m (Text, Text)
imageAttr = try $ do
key <- many1 (noneOf "=} \t\n")
char '='
@@ -485,7 +485,7 @@ imageAttr = try $ do
value <- many1 (noneOf "}\"\n")
optional $ char '"'
optional $ char ','
- return (key, value)
+ return (T.pack key, T.pack value)
-- __strong__
@@ -500,57 +500,57 @@ emph = try $ fmap B.emph (enclosed (string "''") nestedInlines)
escapedChar :: PandocMonad m => TikiWikiParser m B.Inlines
escapedChar = try $ do
string "~"
- mNumber <- safeRead <$> many1 digit
+ mNumber <- safeRead . T.pack <$> many1 digit
string "~"
return $ B.str $
case mNumber of
- Just number -> [toEnum (number :: Int) :: Char]
- Nothing -> []
+ Just number -> T.singleton $ toEnum (number :: Int)
+ Nothing -> ""
-- UNSUPPORTED, as there doesn't seem to be any facility in calibre
-- for this
centered :: PandocMonad m => TikiWikiParser m B.Inlines
centered = try $ do
string "::"
- inner <- many1 $ noneOf ":\n"
+ inner <- fmap T.pack $ many1 $ noneOf ":\n"
string "::"
- return $ B.str $ " NOT SUPPORTED: :: (centered) BEGIN: ::" ++ inner ++ ":: :END "
+ return $ B.str $ " NOT SUPPORTED: :: (centered) BEGIN: ::" <> inner <> ":: :END "
-- UNSUPPORTED, as there doesn't seem to be any facility in calibre
-- for this
colored :: PandocMonad m => TikiWikiParser m B.Inlines
colored = try $ do
string "~~"
- inner <- many1 $ noneOf "~\n"
+ inner <- fmap T.pack $ many1 $ noneOf "~\n"
string "~~"
- return $ B.str $ " NOT SUPPORTED: ~~ (colored) BEGIN: ~~" ++ inner ++ "~~ :END "
+ return $ B.str $ " NOT SUPPORTED: ~~ (colored) BEGIN: ~~" <> inner <> "~~ :END "
-- UNSUPPORTED, as there doesn't seem to be any facility in calibre
-- for this
underlined :: PandocMonad m => TikiWikiParser m B.Inlines
underlined = try $ do
string "==="
- inner <- many1 $ noneOf "=\n"
+ inner <- fmap T.pack $ many1 $ noneOf "=\n"
string "==="
- return $ B.str $ " NOT SUPPORTED: ==== (underlined) BEGIN: ===" ++ inner ++ "=== :END "
+ return $ B.str $ " NOT SUPPORTED: ==== (underlined) BEGIN: ===" <> inner <> "=== :END "
-- UNSUPPORTED, as there doesn't seem to be any facility in calibre
-- for this
boxed :: PandocMonad m => TikiWikiParser m B.Inlines
boxed = try $ do
string "^"
- inner <- many1 $ noneOf "^\n"
+ inner <- fmap T.pack $ many1 $ noneOf "^\n"
string "^"
- return $ B.str $ " NOT SUPPORTED: ^ (boxed) BEGIN: ^" ++ inner ++ "^ :END "
+ return $ B.str $ " NOT SUPPORTED: ^ (boxed) BEGIN: ^" <> inner <> "^ :END "
-- --text--
strikeout :: PandocMonad m => TikiWikiParser m B.Inlines
strikeout = try $ fmap B.strikeout (enclosed (string "--") nestedInlines)
-nestedString :: (Show a, PandocMonad m) => TikiWikiParser m a -> TikiWikiParser m String
-nestedString end = innerSpace <|> count 1 nonspaceChar
+nestedString :: (Show a, PandocMonad m) => TikiWikiParser m a -> TikiWikiParser m Text
+nestedString end = innerSpace <|> countChar 1 nonspaceChar
where
- innerSpace = try $ many1 spaceChar <* notFollowedBy end
+ innerSpace = try $ T.pack <$> many1 spaceChar <* notFollowedBy end
breakChars :: PandocMonad m => TikiWikiParser m B.Inlines
breakChars = try $ string "%%%" >> return B.linebreak
@@ -564,7 +564,7 @@ superMacro = try $ do
string "{SUP("
manyTill anyChar (string ")}")
body <- manyTill anyChar (string "{SUP}")
- return $ B.superscript $ B.text body
+ return $ B.superscript $ B.text $ T.pack body
-- subscript: baz{TAG(tag=>sub)}sub{TAG}qux / qux{SUB()}sub2{SUB}qux
subTag :: PandocMonad m => TikiWikiParser m B.Inlines
@@ -575,22 +575,22 @@ subMacro = try $ do
string "{SUB("
manyTill anyChar (string ")}")
body <- manyTill anyChar (string "{SUB}")
- return $ B.subscript $ B.text body
+ return $ B.subscript $ B.text $ T.pack body
-- -+text+-
code :: PandocMonad m => TikiWikiParser m B.Inlines
code = try $ fmap (B.code . fromEntities) ( between (string "-+") (string "+-") nestedString)
-macroAttr :: PandocMonad m => TikiWikiParser m (String, String)
+macroAttr :: PandocMonad m => TikiWikiParser m (Text, Text)
macroAttr = try $ do
key <- many1 (noneOf "=)")
char '='
optional $ char '"'
value <- many1 (noneOf " )\"")
optional $ char '"'
- return (key, value)
+ return (T.pack key, T.pack value)
-macroAttrs :: PandocMonad m => TikiWikiParser m [(String, String)]
+macroAttrs :: PandocMonad m => TikiWikiParser m [(Text, Text)]
macroAttrs = try $ sepEndBy macroAttr spaces
-- ~np~ __not bold__ ~/np~
@@ -598,13 +598,13 @@ noparse :: PandocMonad m => TikiWikiParser m B.Inlines
noparse = try $ do
string "~np~"
body <- manyTill anyChar (string "~/np~")
- return $ B.str body
+ return $ B.str $ T.pack body
str :: PandocMonad m => TikiWikiParser m B.Inlines
-str = fmap B.str (many1 alphaNum <|> count 1 characterReference)
+str = fmap B.str (T.pack <$> many1 alphaNum <|> countChar 1 characterReference)
symbol :: PandocMonad m => TikiWikiParser m B.Inlines
-symbol = fmap B.str (count 1 nonspaceChar)
+symbol = fmap B.str (countChar 1 nonspaceChar)
-- [[not a link]
notExternalLink :: PandocMonad m => TikiWikiParser m B.Inlines
@@ -612,14 +612,14 @@ notExternalLink = try $ do
start <- string "[["
body <- many (noneOf "\n[]")
end <- string "]"
- return $ B.text (start ++ body ++ end)
+ return $ B.text $ T.pack $ start ++ body ++ end
-- [http://www.somesite.org url|Some Site title]
-- ((internal link))
--
-- The ((...)) wiki links and [...] external links are handled
-- exactly the same; this abstracts that out
-makeLink :: PandocMonad m => String -> String -> String -> TikiWikiParser m B.Inlines
+makeLink :: PandocMonad m => Text -> Text -> Text -> TikiWikiParser m B.Inlines
makeLink start middle end = try $ do
st <- getState
guard $ stateAllowLinks st
@@ -627,15 +627,15 @@ makeLink start middle end = try $ do
(url, title, anchor) <- wikiLinkText start middle end
parsedTitle <- parseFromString (many1 inline) title
setState $ st{ stateAllowLinks = True }
- return $ B.link (url++anchor) "" $mconcat parsedTitle
+ return $ B.link (url <> anchor) "" $ mconcat parsedTitle
-wikiLinkText :: PandocMonad m => String -> String -> String -> TikiWikiParser m (String, String, String)
+wikiLinkText :: PandocMonad m => Text -> Text -> Text -> TikiWikiParser m (Text, Text, Text)
wikiLinkText start middle end = do
- string start
- url <- many1 (noneOf $ middle ++ "\n")
+ string (T.unpack start)
+ url <- T.pack <$> many1 (noneOf $ T.unpack middle ++ "\n")
seg1 <- option url linkContent
seg2 <- option "" linkContent
- string end
+ string (T.unpack end)
if seg2 /= ""
then
return (url, seg2, seg1)
@@ -644,7 +644,7 @@ wikiLinkText start middle end = do
where
linkContent = do
char '|'
- many (noneOf middle)
+ T.pack <$> many (noneOf $ T.unpack middle)
externalLink :: PandocMonad m => TikiWikiParser m B.Inlines
externalLink = makeLink "[" "]|" "]"
diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs
index 0af52e046..996a818fd 100644
--- a/src/Text/Pandoc/Readers/Txt2Tags.hs
+++ b/src/Text/Pandoc/Readers/Txt2Tags.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Txt2Tags
Copyright : Copyright (C) 2014 Matthew Pickering
@@ -18,7 +19,6 @@ import Prelude
import Control.Monad (guard, void, when)
import Control.Monad.Except (catchError, throwError)
import Control.Monad.Reader (Reader, asks, runReader)
-import Data.Char (toLower)
import Data.Default
import Data.List (intercalate, transpose)
import Data.Maybe (fromMaybe)
@@ -36,13 +36,13 @@ import Text.Pandoc.Parsing hiding (space, spaces, uri)
import Text.Pandoc.Shared (compactify, compactifyDL, crFilter, escapeURI,
underlineSpan)
-type T2T = ParserT String ParserState (Reader T2TMeta)
+type T2T = ParserT Text ParserState (Reader T2TMeta)
-- | An object for the T2T macros meta information
-- the contents of each field is simply substituted verbatim into the file
data T2TMeta = T2TMeta {
- date :: String -- ^ Current date
- , mtime :: String -- ^ Last modification time of infile
+ date :: Text -- ^ Current date
+ , mtime :: Text -- ^ Last modification time of infile
, infile :: FilePath -- ^ Input file
, outfile :: FilePath -- ^ Output file
} deriving Show
@@ -63,7 +63,7 @@ getT2TMeta = do
_ -> catchError
(maximum <$> mapM getModTime inps)
(const (return ""))
- return $ T2TMeta curDate curMtime (intercalate ", " inps) outp
+ return $ T2TMeta (T.pack curDate) (T.pack curMtime) (intercalate ", " inps) outp
-- | Read Txt2Tags from an input string returning a Pandoc document
readTxt2Tags :: PandocMonad m
@@ -74,14 +74,14 @@ readTxt2Tags opts s = do
meta <- getT2TMeta
let parsed = flip runReader meta $
readWithM parseT2T (def {stateOptions = opts}) $
- T.unpack (crFilter s) ++ "\n\n"
+ crFilter s <> "\n\n"
case parsed of
Right result -> return result
Left e -> throwError e
-- | Read Txt2Tags (ignoring all macros) from an input string returning
-- a Pandoc document
--- readTxt2TagsNoMacros :: PandocMonad m => ReaderOptions -> String -> m Pandoc
+-- readTxt2TagsNoMacros :: PandocMonad m => ReaderOptions -> Text -> m Pandoc
-- readTxt2TagsNoMacros = readTxt2Tags
parseT2T :: T2T Pandoc
@@ -106,7 +106,7 @@ parseHeader = do
header :: T2T ()
header = titleline >> authorline >> dateline
-headerline :: B.ToMetaValue a => String -> T2T a -> T2T ()
+headerline :: B.ToMetaValue a => Text -> T2T a -> T2T ()
headerline field p = (() <$ try blankline)
<|> (p >>= updateState . B.setMeta field)
@@ -123,15 +123,15 @@ authorline =
dateline :: T2T ()
dateline = headerline "date" (trimInlines . mconcat <$> manyTill inline newline)
-type Keyword = String
-type Value = String
+type Keyword = Text
+type Value = Text
setting :: T2T (Keyword, Value)
setting = do
string "%!"
- keyword <- ignoreSpacesCap (many1 alphaNum)
+ keyword <- ignoreSpacesCap (many1Char alphaNum)
char ':'
- value <- ignoreSpacesCap (manyTill anyChar newline)
+ value <- ignoreSpacesCap (manyTillChar anyChar newline)
return (keyword, value)
-- Blocks
@@ -163,10 +163,10 @@ balancedTitle c = try $ do
spaces
level <- length <$> many1 (char c)
guard (level <= 5) -- Max header level 5
- heading <- manyTill (noneOf "\n\r") (count level (char c))
+ heading <- manyTillChar (noneOf "\n\r") (count level (char c))
label <- optionMaybe (enclosed (char '[') (char ']') (alphaNum <|> oneOf "_-"))
many spaceChar *> newline
- let attr = maybe nullAttr (\x -> (x, [], [])) label
+ let attr = maybe nullAttr (\x -> (T.pack x, [], [])) label
return $ B.headerWith attr level (trimInlines $ B.text heading)
para :: T2T Blocks
@@ -192,7 +192,7 @@ quote :: T2T Blocks
quote = try $ do
lookAhead tab
rawQuote <- many1 (tab *> optional spaces *> anyLine)
- contents <- parseFromString' parseBlocks (intercalate "\n" rawQuote ++ "\n\n")
+ contents <- parseFromString' parseBlocks (T.intercalate "\n" rawQuote <> "\n\n")
return $ B.blockQuote contents
commentLine :: T2T Inlines
@@ -243,17 +243,17 @@ listItem start end = try $ do
markerLength <- try start
firstLine <- anyLineNewline
blank <- option "" ("\n" <$ blankline)
- rest <- concat <$> many (listContinuation markerLength)
- parseFromString' end $ firstLine ++ blank ++ rest
+ rest <- T.concat <$> many (listContinuation markerLength)
+ parseFromString' end $ firstLine <> blank <> rest
-- continuation of a list item - indented and separated by blankline or endline.
-- Note: nested lists are parsed as continuations.
listContinuation :: Int
- -> T2T String
+ -> T2T Text
listContinuation markerLength = try $
notFollowedBy' (blankline >> blankline)
- *> (mappend <$> (concat <$> many1 listLine)
- <*> many blankline)
+ *> (mappend <$> (T.concat <$> many1 listLine)
+ <*> manyChar blankline)
where listLine = try $ indentWith markerLength *> anyLineNewline
-- Table
@@ -327,16 +327,16 @@ taggedBlock = do
-- Generic
-genericBlock :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks
+genericBlock :: Monoid a => T2T a -> (a -> Blocks) -> Text -> T2T Blocks
genericBlock p f s = blockMarkupArea p f s <|> blockMarkupLine p f s
-blockMarkupArea :: Monoid a => T2T a -> (a -> Blocks) -> String -> T2T Blocks
+blockMarkupArea :: Monoid a => T2T a -> (a -> Blocks) -> Text -> T2T Blocks
blockMarkupArea p f s = try (do
- string s *> blankline
- f . mconcat <$> manyTill p (eof <|> void (string s *> blankline)))
+ textStr s *> blankline
+ f . mconcat <$> manyTill p (eof <|> void (textStr s *> blankline)))
-blockMarkupLine :: T2T a -> (a -> Blocks) -> String -> T2T Blocks
-blockMarkupLine p f s = try (f <$> (string s *> space *> p))
+blockMarkupLine :: T2T a -> (a -> Blocks) -> Text -> T2T Blocks
+blockMarkupLine p f s = try (f <$> (textStr s *> space *> p))
-- Can be in either block or inline position
comment :: Monoid a => T2T a
@@ -385,15 +385,15 @@ italic :: T2T Inlines
italic = inlineMarkup inline B.emph '/' B.str
code :: T2T Inlines
-code = inlineMarkup ((:[]) <$> anyChar) B.code '`' id
+code = inlineMarkup (T.singleton <$> anyChar) B.code '`' id
raw :: T2T Inlines
-raw = inlineMarkup ((:[]) <$> anyChar) B.text '"' id
+raw = inlineMarkup (T.singleton <$> anyChar) B.text '"' id
tagged :: T2T Inlines
tagged = do
target <- getTarget
- inlineMarkup ((:[]) <$> anyChar) (B.rawInline target) '\'' id
+ inlineMarkup (T.singleton <$> anyChar) (B.rawInline target) '\'' id
-- Parser for markup indicated by a double character.
-- Inline markup is greedy and glued
@@ -404,33 +404,33 @@ inlineMarkup :: Monoid a
=> T2T a -- Content parser
-> (a -> Inlines) -- Constructor
-> Char -- Fence
- -> (String -> a) -- Special Case to handle ******
+ -> (Text -> a) -- Special Case to handle ******
-> T2T Inlines
inlineMarkup p f c special = try $ do
- start <- many1 (char c)
- let l = length start
+ start <- many1Char (char c)
+ let l = T.length start
guard (l >= 2)
when (l == 2) (void $ notFollowedBy space)
-- We must make sure that there is no space before the start of the
-- closing tags
- body <- optionMaybe (try $ manyTill (noneOf "\n\r")
+ body <- optionMaybe (try $ manyTillChar (noneOf "\n\r")
(try $ lookAhead (noneOf " " >> string [c,c] )))
case body of
Just middle -> do
lastChar <- anyChar
- end <- many1 (char c)
+ end <- many1Char (char c)
let parser inp = parseFromString' (mconcat <$> many p) inp
- let start' = case drop 2 start of
+ let start' = case T.drop 2 start of
"" -> mempty
xs -> special xs
- body' <- parser (middle ++ [lastChar])
- let end' = case drop 2 end of
+ body' <- parser (middle <> T.singleton lastChar)
+ let end' = case T.drop 2 end of
"" -> mempty
xs -> special xs
return $ f (start' `mappend` body' `mappend` end')
Nothing -> do -- Either bad or case such as *****
guard (l >= 5)
- let body' = replicate (l - 4) c
+ let body' = T.replicate (l - 4) $ T.singleton c
return $ f (special body')
link :: T2T Inlines
@@ -441,12 +441,12 @@ titleLink :: T2T Inlines
titleLink = try $ do
char '['
notFollowedBy space
- tokens <- sepBy1 (many $ noneOf " ]") space
+ tokens <- sepBy1 (manyChar $ noneOf " ]") space
guard (length tokens >= 2)
char ']'
let link' = last tokens
- guard $ not $ null link'
- let tit = unwords (init tokens)
+ guard $ not $ T.null link'
+ let tit = T.unwords (init tokens)
return $ B.link link' "" (B.text tit)
-- Link with image
@@ -455,7 +455,7 @@ imageLink = try $ do
char '['
body <- image
many1 space
- l <- manyTill (noneOf "\n\r ") (char ']')
+ l <- manyTillChar (noneOf "\n\r ") (char ']')
return (B.link l "" body)
macro :: T2T Inlines
@@ -466,7 +466,7 @@ macro = try $ do
maybe (return mempty) (\f -> B.str <$> asks f) (lookup name commands)
where
commands = [ ("date", date), ("mtime", mtime)
- , ("infile", infile), ("outfile", outfile)]
+ , ("infile", T.pack . infile), ("outfile", T.pack . outfile)]
-- raw URLs in text are automatically linked
url :: T2T Inlines
@@ -474,7 +474,7 @@ url = try $ do
(rawUrl, escapedUrl) <- try uri <|> emailAddress
return $ B.link rawUrl "" (B.str escapedUrl)
-uri :: T2T (String, String)
+uri :: T2T (Text, Text)
uri = try $ do
address <- t2tURI
return (address, escapeURI address)
@@ -486,25 +486,25 @@ uri = try $ do
--isT2TURI (parse t2tURI "" -> Right _) = True
--isT2TURI _ = False
-t2tURI :: T2T String
+t2tURI :: T2T Text
t2tURI = do
- start <- try ((++) <$> proto <*> urlLogin) <|> guess
- domain <- many1 chars
- sep <- many (char '/')
- form' <- option mempty ((:) <$> char '?' <*> many1 form)
- anchor' <- option mempty ((:) <$> char '#' <*> many anchor)
- return (start ++ domain ++ sep ++ form' ++ anchor')
+ start <- try ((<>) <$> proto <*> urlLogin) <|> guess
+ domain <- many1Char chars
+ sep <- manyChar (char '/')
+ form' <- option mempty (T.cons <$> char '?' <*> many1Char form)
+ anchor' <- option mempty (T.cons <$> char '#' <*> manyChar anchor)
+ return (start <> domain <> sep <> form' <> anchor')
where
protos = ["http", "https", "ftp", "telnet", "gopher", "wais"]
- proto = (++) <$> oneOfStrings protos <*> string "://"
- guess = (++) <$> (((++) <$> stringAnyCase "www" <*> option mempty ((:[]) <$> oneOf "23"))
- <|> stringAnyCase "ftp") <*> ((:[]) <$> char '.')
+ proto = (<>) <$> oneOfStrings protos <*> textStr "://"
+ guess = (<>) <$> (((<>) <$> stringAnyCase "www" <*> option mempty (T.singleton <$> oneOf "23"))
+ <|> stringAnyCase "ftp") <*> (T.singleton <$> char '.')
login = alphaNum <|> oneOf "_.-"
- pass = many (noneOf " @")
+ pass = manyChar (noneOf " @")
chars = alphaNum <|> oneOf "%._/~:,=$@&+-"
anchor = alphaNum <|> oneOf "%._0"
form = chars <|> oneOf ";*"
- urlLogin = option mempty $ try ((\x y z -> x ++ y ++ [z]) <$> many1 login <*> option mempty ((:) <$> char ':' <*> pass) <*> char '@')
+ urlLogin = option mempty $ try ((\x y z -> x <> y <> T.singleton z) <$> many1Char login <*> option mempty (T.cons <$> char ':' <*> pass) <*> char '@')
image :: T2T Inlines
@@ -512,12 +512,12 @@ image = try $ do
-- List taken from txt2tags source
let extensions = [".jpg", ".jpeg", ".gif", ".png", ".eps", ".bmp"]
char '['
- (path, ext) <- manyUntil (noneOf "\n\t\r ") (oneOfStrings extensions)
+ (path, ext) <- manyUntilChar (noneOf "\n\t\r ") (oneOfStrings extensions)
char ']'
- return $ B.image (path ++ ext) "" mempty
+ return $ B.image (path <> ext) "" mempty
-- Characters used in markup
-specialChars :: String
+specialChars :: [Char]
specialChars = "%*-_/|:+;"
tab :: T2T Char
@@ -526,8 +526,8 @@ tab = char '\t'
space :: T2T Char
space = char ' '
-spaces :: T2T String
-spaces = many space
+spaces :: T2T Text
+spaces = manyChar space
endline :: T2T Inlines
endline = try $ do
@@ -544,17 +544,17 @@ endline = try $ do
return B.softbreak
str :: T2T Inlines
-str = try $ B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
+str = try $ B.str <$> many1Char (noneOf $ specialChars ++ "\n\r ")
whitespace :: T2T Inlines
whitespace = try $ B.space <$ spaceChar
symbol :: T2T Inlines
-symbol = B.str . (:[]) <$> oneOf specialChars
+symbol = B.str . T.singleton <$> oneOf specialChars
-- Utility
-getTarget :: T2T String
+getTarget :: T2T Text
getTarget = do
mv <- lookupMeta "target" . stateMeta <$> getState
return $ case mv of
@@ -565,5 +565,5 @@ getTarget = do
atStart :: T2T ()
atStart = (sourceColumn <$> getPosition) >>= guard . (== 1)
-ignoreSpacesCap :: T2T String -> T2T String
-ignoreSpacesCap p = map toLower <$> (spaces *> p <* spaces)
+ignoreSpacesCap :: T2T Text -> T2T Text
+ignoreSpacesCap p = T.toLower <$> (spaces *> p <* spaces)
diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs
index 27b7d7245..f7edabc48 100644
--- a/src/Text/Pandoc/Readers/Vimwiki.hs
+++ b/src/Text/Pandoc/Readers/Vimwiki.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE CPP #-}
{- |
Module : Text.Pandoc.Readers.Vimwiki
@@ -51,9 +52,10 @@ import Prelude
import Control.Monad (guard)
import Control.Monad.Except (throwError)
import Data.Default
-import Data.List (isInfixOf, isPrefixOf)
+import Data.List (isInfixOf)
import Data.Maybe
import Data.Text (Text)
+import qualified Data.Text as T
import Text.Pandoc.Builder (Blocks, Inlines, fromList, toList, trimInlines)
import qualified Text.Pandoc.Builder as B (blockQuote, bulletList, code,
codeBlockWith, definitionList,
@@ -73,12 +75,13 @@ import Text.Pandoc.Options (ReaderOptions)
import Text.Pandoc.Parsing (ParserState, ParserT, blanklines, emailAddress,
many1Till, orderedListMarker, readWithM,
registerHeader, spaceChar, stateMeta,
- stateOptions, uri)
-import Text.Pandoc.Shared (crFilter, splitBy, stringify, stripFirstAndLast,
- isURI)
+ stateOptions, uri, manyTillChar, manyChar, textStr,
+ many1Char, countChar, many1TillChar)
+import Text.Pandoc.Shared (crFilter, splitTextBy, stringify, stripFirstAndLast,
+ isURI, tshow)
import Text.Parsec.Char (alphaNum, anyChar, char, newline, noneOf, oneOf, space,
spaces, string)
-import Text.Parsec.Combinator (between, choice, count, eof, lookAhead, many1,
+import Text.Parsec.Combinator (between, choice, eof, lookAhead, many1,
manyTill, notFollowedBy, option, skipMany1)
import Text.Parsec.Prim (getState, many, try, updateState, (<|>))
@@ -128,7 +131,7 @@ block = do
, definitionList
, para
]
- trace (take 60 $ show $ toList res)
+ trace (T.take 60 $ tshow $ toList res)
return res
blockML :: PandocMonad m => VwParser m Blocks
@@ -218,32 +221,32 @@ defMarkerM = string "::" >> spaceChar
defMarkerE :: PandocMonad m => VwParser m Char
defMarkerE = string "::" >> newline
-hasDefMarkerM :: PandocMonad m => VwParser m String
-hasDefMarkerM = manyTill (noneOf "\n") (try defMarkerM)
+hasDefMarkerM :: PandocMonad m => VwParser m Text
+hasDefMarkerM = manyTillChar (noneOf "\n") (try defMarkerM)
preformatted :: PandocMonad m => VwParser m Blocks
preformatted = try $ do
many spaceChar >> string "{{{"
- attrText <- many (noneOf "\n")
+ attrText <- manyChar (noneOf "\n")
lookAhead newline
- contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}}"
+ contents <- manyTillChar anyChar (try (char '\n' >> many spaceChar >> string "}}}"
>> many spaceChar >> newline))
- if (contents /= "") && (head contents == '\n')
- then return $ B.codeBlockWith (makeAttr attrText) (tail contents)
+ if (contents /= "") && (T.head contents == '\n')
+ then return $ B.codeBlockWith (makeAttr attrText) (T.tail contents)
else return $ B.codeBlockWith (makeAttr attrText) contents
-makeAttr :: String -> Attr
+makeAttr :: Text -> Attr
makeAttr s =
- let xs = splitBy (`elem` " \t") s in
+ let xs = splitTextBy (`elem` (" \t" :: String)) s in
("", [], mapMaybe nameValue xs)
-nameValue :: String -> Maybe (String, String)
+nameValue :: Text -> Maybe (Text, Text)
nameValue s =
- let t = splitBy (== '=') s in
+ let t = splitTextBy (== '=') s in
if length t /= 2
then Nothing
else let (a, b) = (head t, last t) in
- if (length b < 2) || ((head b, last b) /= ('"', '"'))
+ if (T.length b < 2) || ((T.head b, T.last b) /= ('"', '"'))
then Nothing
else Just (a, stripFirstAndLast b)
@@ -253,16 +256,16 @@ displayMath = try $ do
many spaceChar >> string "{{$"
mathTag <- option "" mathTagParser
many space
- contents <- manyTill anyChar (try (char '\n' >> many spaceChar >> string "}}$"
+ contents <- manyTillChar anyChar (try (char '\n' >> many spaceChar >> string "}}$"
>> many spaceChar >> newline))
let contentsWithTags
| mathTag == "" = contents
- | otherwise = "\\begin{" ++ mathTag ++ "}\n" ++ contents
- ++ "\n\\end{" ++ mathTag ++ "}"
+ | otherwise = "\\begin{" <> mathTag <> "}\n" <> contents
+ <> "\n\\end{" <> mathTag <> "}"
return $ B.para $ B.displayMath contentsWithTags
-mathTagLaTeX :: String -> String
+mathTagLaTeX :: Text -> Text
mathTagLaTeX s = case s of
"equation" -> ""
"equation*" -> ""
@@ -360,17 +363,17 @@ combineList x [y] = case toList y of
_ -> x:[y]
combineList x xs = x:xs
-listStart :: PandocMonad m => VwParser m (Int, String)
+listStart :: PandocMonad m => VwParser m (Int, Text)
listStart = try $ do
s <- many spaceChar
listType <- bulletListMarkers <|> orderedListMarkers
spaceChar
return (length s, listType)
-bulletListMarkers :: PandocMonad m => VwParser m String
+bulletListMarkers :: PandocMonad m => VwParser m Text
bulletListMarkers = "ul" <$ (char '*' <|> char '-')
-orderedListMarkers :: PandocMonad m => VwParser m String
+orderedListMarkers :: PandocMonad m => VwParser m Text
orderedListMarkers =
("ol" <$choice (orderedListMarker Decimal Period:(($OneParen) . orderedListMarker <$> [Decimal, LowerRoman, UpperRoman, LowerAlpha, UpperAlpha])))
<|> ("ol" <$ char '#')
@@ -421,9 +424,9 @@ placeholder :: PandocMonad m => VwParser m ()
placeholder = try $
choice (ph <$> ["title", "date"]) <|> noHtmlPh <|> templatePh
-ph :: PandocMonad m => String -> VwParser m ()
+ph :: PandocMonad m => Text -> VwParser m ()
ph s = try $ do
- many spaceChar >>string ('%':s) >> spaceChar
+ many spaceChar >> textStr (T.cons '%' s) >> spaceChar
contents <- trimInlines . mconcat <$> manyTill inline (lookAhead newline)
--use lookAhead because of placeholder in the whitespace parser
let meta' = B.setMeta s contents nullMeta
@@ -476,7 +479,7 @@ inlineML :: PandocMonad m => VwParser m Inlines
inlineML = choice $ whitespace endlineML:inlineList
str :: PandocMonad m => VwParser m Inlines
-str = B.str <$>many1 (noneOf $ spaceChars ++ specialChars)
+str = B.str <$> many1Char (noneOf $ spaceChars ++ specialChars)
whitespace :: PandocMonad m => VwParser m () -> VwParser m Inlines
whitespace endline = B.space <$ (skipMany1 spaceChar <|>
@@ -487,7 +490,7 @@ whitespace' :: PandocMonad m => VwParser m Inlines
whitespace' = B.space <$ skipMany1 spaceChar
special :: PandocMonad m => VwParser m Inlines
-special = B.str <$> count 1 (oneOf specialChars)
+special = B.str <$> countChar 1 (oneOf specialChars)
bareURL :: PandocMonad m => VwParser m Inlines
bareURL = try $ do
@@ -505,8 +508,8 @@ strong = try $ do
return $ B.spanWith (makeId contents, [], []) mempty
<> B.strong contents
-makeId :: Inlines -> String
-makeId i = concat (stringify <$> toList i)
+makeId :: Inlines -> Text
+makeId i = T.concat (stringify <$> toList i)
emph :: PandocMonad m => VwParser m Inlines
emph = try $ do
@@ -527,7 +530,7 @@ strikeout = try $ do
code :: PandocMonad m => VwParser m Inlines
code = try $ do
char '`'
- contents <- many1Till (noneOf "\n") (char '`')
+ contents <- many1TillChar (noneOf "\n") (char '`')
return $ B.code contents
superscript :: PandocMonad m => VwParser m Inlines
@@ -542,8 +545,8 @@ subscript = try $
link :: PandocMonad m => VwParser m Inlines
link = try $ do
string "[["
- contents <- lookAhead $ manyTill anyChar (string "]]")
- case '|' `elem` contents of
+ contents <- lookAhead $ manyTillChar anyChar (string "]]")
+ case T.any (== '|') contents of
False -> do
manyTill anyChar (string "]]")
-- not using try here because [[hell]o]] is not rendered as a link in vimwiki
@@ -552,7 +555,7 @@ link = try $ do
else "wikilink"
return $ B.link (procLink contents) tit (B.str contents)
True -> do
- url <- manyTill anyChar $ char '|'
+ url <- manyTillChar anyChar $ char '|'
lab <- mconcat <$> manyTill inline (string "]]")
let tit = if isURI url
then ""
@@ -568,52 +571,52 @@ image = try $ do
images :: PandocMonad m => Int -> VwParser m Inlines
images k
| k == 0 = do
- imgurl <- manyTill anyChar (try $ string "}}")
+ imgurl <- manyTillChar anyChar (try $ string "}}")
return $ B.image (procImgurl imgurl) "" (B.str "")
| k == 1 = do
- imgurl <- manyTill anyChar (char '|')
+ imgurl <- manyTillChar anyChar (char '|')
alt <- mconcat <$> manyTill inline (try $ string "}}")
return $ B.image (procImgurl imgurl) "" alt
| k == 2 = do
- imgurl <- manyTill anyChar (char '|')
- alt <- mconcat <$>manyTill inline (char '|')
- attrText <- manyTill anyChar (try $ string "}}")
+ imgurl <- manyTillChar anyChar (char '|')
+ alt <- mconcat <$> manyTill inline (char '|')
+ attrText <- manyTillChar anyChar (try $ string "}}")
return $ B.imageWith (makeAttr attrText) (procImgurl imgurl) "" alt
| otherwise = do
- imgurl <- manyTill anyChar (char '|')
- alt <- mconcat <$>manyTill inline (char '|')
- attrText <- manyTill anyChar (char '|')
+ imgurl <- manyTillChar anyChar (char '|')
+ alt <- mconcat <$> manyTill inline (char '|')
+ attrText <- manyTillChar anyChar (char '|')
manyTill anyChar (try $ string "}}")
return $ B.imageWith (makeAttr attrText) (procImgurl imgurl) "" alt
-procLink' :: String -> String
+procLink' :: Text -> Text
procLink' s
- | take 6 s == "local:" = "file" ++ drop 5 s
- | take 6 s == "diary:" = "diary/" ++ drop 6 s
- | or ((`isPrefixOf` s) <$> [ "http:", "https:", "ftp:", "file:", "mailto:",
+ | T.take 6 s == "local:" = "file" <> T.drop 5 s
+ | T.take 6 s == "diary:" = "diary/" <> T.drop 6 s
+ | or ((`T.isPrefixOf` s) <$> [ "http:", "https:", "ftp:", "file:", "mailto:",
"news:", "telnet:" ])
= s
| s == "" = ""
- | last s == '/' = s
+ | T.last s == '/' = s
| otherwise = s
-procLink :: String -> String
-procLink s = procLink' x ++ y
- where (x, y) = break (=='#') s
+procLink :: Text -> Text
+procLink s = procLink' x <> y
+ where (x, y) = T.break (=='#') s
-procImgurl :: String -> String
-procImgurl s = if take 6 s == "local:" then "file" ++ drop 5 s else s
+procImgurl :: Text -> Text
+procImgurl s = if T.take 6 s == "local:" then "file" <> T.drop 5 s else s
inlineMath :: PandocMonad m => VwParser m Inlines
inlineMath = try $
- B.math <$ char '$' <*> many1Till (noneOf "\n") (char '$')
+ B.math <$ char '$' <*> many1TillChar (noneOf "\n") (char '$')
tag :: PandocMonad m => VwParser m Inlines
tag = try $ do
char ':'
- s <- manyTill (noneOf spaceChars) (try (char ':' >> lookAhead space))
- guard $ not $ "::" `isInfixOf` (":" ++ s ++ ":")
- let ss = splitBy (==':') s
+ s <- manyTillChar (noneOf spaceChars) (try (char ':' >> lookAhead space))
+ guard $ not $ "::" `T.isInfixOf` (":" <> s <> ":")
+ let ss = splitTextBy (==':') s
return $ mconcat $ makeTagSpan' (head ss):(makeTagSpan <$> tail ss)
todoMark :: PandocMonad m => VwParser m Inlines
@@ -646,16 +649,16 @@ nFBTTBSB =
hasDefMarker :: PandocMonad m => VwParser m ()
hasDefMarker = () <$ manyTill (noneOf "\n") (string "::" >> oneOf spaceChars)
-makeTagSpan' :: String -> Inlines
-makeTagSpan' s = B.spanWith ('-' : s, [], []) (B.str "") <>
+makeTagSpan' :: Text -> Inlines
+makeTagSpan' s = B.spanWith (T.cons '-' s, [], []) (B.str "") <>
B.spanWith (s, ["tag"], []) (B.str s)
-makeTagSpan :: String -> Inlines
+makeTagSpan :: Text -> Inlines
makeTagSpan s = B.space <> makeTagSpan' s
-mathTagParser :: PandocMonad m => VwParser m String
+mathTagParser :: PandocMonad m => VwParser m Text
mathTagParser = do
- s <- try $ lookAhead (char '%' >> manyTill (noneOf spaceChars)
+ s <- try $ lookAhead (char '%' >> manyTillChar (noneOf spaceChars)
(try $ char '%' >> many (noneOf $ '%':spaceChars) >> space))
- char '%' >> string s >> char '%'
+ char '%' >> textStr s >> char '%'
return $ mathTagLaTeX s