aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-02-14 22:29:21 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2021-02-16 16:55:20 -0800
commit967e7f5fb990b29de48b37be1db40fb149a8cf55 (patch)
treeb9f903a5f2af14f20e769903e80659b9bffd59ff /src/Text/Pandoc/Readers
parentb5b576184c3c1668aad0c904e186136b81a0dd54 (diff)
downloadpandoc-967e7f5fb990b29de48b37be1db40fb149a8cf55.tar.gz
Rename Text.Pandoc.XMLParser -> Text.Pandoc.XML.Light...
..and add new definitions isomorphic to xml-light's, but with Text instead of String. This allows us to keep most of the code in existing readers that use xml-light, but avoid lots of unnecessary allocation. We also add versions of the functions from xml-light's Text.XML.Light.Output and Text.XML.Light.Proc that operate on our modified XML types, and functions that convert xml-light types to our types (since some of our dependencies, like texmath, use xml-light). Update golden tests for docx and pptx. OOXML test: Use `showContent` instead of `ppContent` in `displayDiff`. Docx: Do a manual traversal to unwrap sdt and smartTag. This is faster, and needed to pass the tests. Benchmarks: A = prior to 8ca191604dcd13af27c11d2da225da646ebce6fc (Feb 8) B = as of 8ca191604dcd13af27c11d2da225da646ebce6fc (Feb 8) C = this commit | Reader | A | B | C | | ------- | ----- | ------ | ----- | | docbook | 18 ms | 12 ms | 10 ms | | opml | 65 ms | 62 ms | 35 ms | | jats | 15 ms | 11 ms | 9 ms | | docx | 72 ms | 69 ms | 44 ms | | odt | 78 ms | 41 ms | 28 ms | | epub | 64 ms | 61 ms | 56 ms | | fb2 | 14 ms | 5 ms | 4 ms |
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs76
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse.hs163
-rw-r--r--src/Text/Pandoc/Readers/Docx/Parse/Styles.hs31
-rw-r--r--src/Text/Pandoc/Readers/Docx/Util.hs27
-rw-r--r--src/Text/Pandoc/Readers/EPUB.hs65
-rw-r--r--src/Text/Pandoc/Readers/FB2.hs93
-rw-r--r--src/Text/Pandoc/Readers/JATS.hs58
-rw-r--r--src/Text/Pandoc/Readers/OPML.hs29
-rw-r--r--src/Text/Pandoc/Readers/Odt.hs5
-rw-r--r--src/Text/Pandoc/Readers/Odt/ContentReader.hs13
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs3
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/Utils.hs33
-rw-r--r--src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs23
-rw-r--r--src/Text/Pandoc/Readers/Odt/Namespaces.hs11
-rw-r--r--src/Text/Pandoc/Readers/Odt/StyleReader.hs23
15 files changed, 309 insertions, 344 deletions
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index ad0108843..e201b54fe 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -31,8 +31,7 @@ import Text.Pandoc.Options
import Text.Pandoc.Logging (LogMessage(..))
import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
import Text.TeXMath (readMathML, writeTeX)
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLContents)
+import Text.Pandoc.XML.Light
{-
@@ -578,26 +577,27 @@ normalizeTree = everywhere (mkT go)
where go :: [Content] -> [Content]
go (Text (CData CDataRaw _ _):xs) = xs
go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
- Text (CData CDataText (s1 ++ s2) z):xs
+ Text (CData CDataText (s1 <> s2) z):xs
go (Text (CData CDataText s1 z):CRef r:xs) =
- Text (CData CDataText (s1 ++ convertEntity r) z):xs
+ Text (CData CDataText (s1 <> convertEntity r) z):xs
go (CRef r:Text (CData CDataText s1 z):xs) =
- Text (CData CDataText (convertEntity r ++ s1) z):xs
+ Text (CData CDataText (convertEntity r <> s1) z):xs
go (CRef r1:CRef r2:xs) =
- Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
+ Text (CData CDataText (convertEntity r1 <>
+ convertEntity r2) Nothing):xs
go xs = xs
-convertEntity :: String -> String
-convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
+convertEntity :: Text -> Text
+convertEntity e = maybe (T.map toUpper e) T.pack (lookupEntity $ T.unpack e)
-- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> Text
+attrValue :: Text -> Element -> Text
attrValue attr elt =
- maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
+ fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
-- convenience function
named :: Text -> Element -> Bool
-named s e = qName (elName e) == T.unpack s
+named s e = qName (elName e) == s
--
@@ -634,7 +634,7 @@ isBlockElement :: Content -> Bool
isBlockElement (Elem e) = qName (elName e) `elem` blockTags
isBlockElement _ = False
-blockTags :: [String]
+blockTags :: [Text]
blockTags =
[ "abstract"
, "ackno"
@@ -721,7 +721,7 @@ blockTags =
, "variablelist"
] ++ admonitionTags
-admonitionTags :: [String]
+admonitionTags :: [Text]
admonitionTags = ["important","caution","note","tip","warning"]
-- Trim leading and trailing newline characters
@@ -779,10 +779,10 @@ getBlocks e = mconcat <$>
parseBlock :: PandocMonad m => Content -> DB m Blocks
parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
-parseBlock (Text (CData _ s _)) = if all isSpace s
+parseBlock (Text (CData _ s _)) = if T.all isSpace s
then return mempty
- else return $ plain $ trimInlines $ text $ T.pack s
-parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x
+ else return $ plain $ trimInlines $ text s
+parseBlock (CRef x) = return $ plain $ str $ T.toUpper x
parseBlock (Elem e) =
case qName (elName e) of
"toc" -> skip -- skip TOC, since in pandoc it's autogenerated
@@ -837,7 +837,7 @@ parseBlock (Elem e) =
"refsect2" -> sect 2
"refsect3" -> sect 3
"refsection" -> gets dbSectionLevel >>= sect . (+1)
- l | l `elem` admonitionTags -> parseAdmonition $ T.pack l
+ l | l `elem` admonitionTags -> parseAdmonition l
"area" -> skip
"areaset" -> skip
"areaspec" -> skip
@@ -899,7 +899,7 @@ parseBlock (Elem e) =
"subtitle" -> return mempty -- handled in parent element
_ -> skip >> getBlocks e
where skip = do
- let qn = T.pack $ qName $ elName e
+ let qn = qName $ elName e
let name = if "pi-" `T.isPrefixOf` qn
then "<?" <> qn <> "?>"
else qn
@@ -911,7 +911,7 @@ parseBlock (Elem e) =
"" -> []
x -> [x]
return $ codeBlockWith (attrValue "id" e, classes', [])
- $ trimNl $ T.pack $ strContentRecursive e
+ $ trimNl $ strContentRecursive e
parseBlockquote = do
attrib <- case filterChild (named "attribution") e of
Nothing -> return mempty
@@ -965,7 +965,7 @@ parseBlock (Elem e) =
w <- findAttr (unqual "colwidth") c
n <- safeRead $ "0" <> T.filter (\x ->
(x >= '0' && x <= '9')
- || x == '.') (T.pack w)
+ || x == '.') w
if n > 0 then Just n else Nothing
let numrows = case bodyrows of
[] -> 0
@@ -1048,12 +1048,12 @@ parseMixed container conts = do
x <- parseMixed container rs
return $ p <> b <> x
-parseRow :: PandocMonad m => [String] -> Element -> DB m [Cell]
+parseRow :: PandocMonad m => [Text] -> Element -> DB m [Cell]
parseRow cn = do
let isEntry x = named "entry" x || named "td" x || named "th" x
mapM (parseEntry cn) . filterChildren isEntry
-parseEntry :: PandocMonad m => [String] -> Element -> DB m Cell
+parseEntry :: PandocMonad m => [Text] -> Element -> DB m Cell
parseEntry cn el = do
let colDistance sa ea = do
let iStrt = elemIndex sa cn
@@ -1075,7 +1075,7 @@ getInlines :: PandocMonad m => Element -> DB m Inlines
getInlines e' = trimInlines . mconcat <$>
mapM parseInline (elContent e')
-strContentRecursive :: Element -> String
+strContentRecursive :: Element -> Text
strContentRecursive = strContent .
(\e' -> e'{ elContent = map elementToStr $ elContent e' })
@@ -1084,9 +1084,9 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
elementToStr x = x
parseInline :: PandocMonad m => Content -> DB m Inlines
-parseInline (Text (CData _ s _)) = return $ text $ T.pack s
+parseInline (Text (CData _ s _)) = return $ text s
parseInline (CRef ref) =
- return $ text $ maybe (T.toUpper $ T.pack ref) T.pack $ lookupEntity ref
+ return $ text $ maybe (T.toUpper ref) T.pack $ lookupEntity (T.unpack ref)
parseInline (Elem e) =
case qName (elName e) of
"anchor" -> do
@@ -1138,7 +1138,7 @@ parseInline (Elem e) =
"userinput" -> codeWithLang
"systemitem" -> codeWithLang
"varargs" -> return $ code "(...)"
- "keycap" -> return (str $ T.pack $ strContent e)
+ "keycap" -> return (str $ strContent e)
"keycombo" -> keycombo <$>
mapM parseInline (elContent e)
"menuchoice" -> menuchoice <$>
@@ -1150,17 +1150,17 @@ parseInline (Elem e) =
let title = case attrValue "endterm" e of
"" -> maybe "???" xrefTitleByElem
(findElementById linkend content)
- endterm -> maybe "???" (T.pack . strContent)
+ endterm -> maybe "???" strContent
(findElementById endterm content)
return $ link ("#" <> linkend) "" (text title)
- "email" -> return $ link ("mailto:" <> T.pack (strContent e)) ""
- $ str $ T.pack $ strContent e
- "uri" -> return $ link (T.pack $ strContent e) "" $ str $ T.pack $ strContent e
+ "email" -> return $ link ("mailto:" <> strContent e) ""
+ $ str $ strContent e
+ "uri" -> return $ link (strContent e) "" $ str $ strContent e
"ulink" -> innerInlines (link (attrValue "url" e) "")
"link" -> do
ils <- innerInlines id
let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
- Just h -> T.pack h
+ Just h -> h
_ -> "#" <> attrValue "linkend" e
let ils' = if ils == mempty then str href else ils
let attr = (attrValue "id" e, T.words $ attrValue "role" e, [])
@@ -1180,7 +1180,7 @@ parseInline (Elem e) =
"pi-asciidoc-br" -> return linebreak
_ -> skip >> innerInlines id
where skip = do
- let qn = T.pack $ qName $ elName e
+ let qn = qName $ elName e
let name = if "pi-" `T.isPrefixOf` qn
then "<?" <> qn <> "?>"
else qn
@@ -1193,7 +1193,7 @@ parseInline (Elem e) =
let classes' = case attrValue "language" e of
"" -> []
l -> [l]
- return $ codeWith (attrValue "id" e,classes',[]) $ T.pack $ strContentRecursive e
+ return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e
simpleList = mconcat . intersperse (str "," <> space) <$> mapM getInlines
(filterChildren (named "member") e)
segmentedList = do
@@ -1234,10 +1234,10 @@ parseInline (Elem e) =
"sect5" -> descendantContent "title" el
"cmdsynopsis" -> descendantContent "command" el
"funcsynopsis" -> descendantContent "function" el
- _ -> T.pack $ qName (elName el) ++ "_title"
+ _ -> qName (elName el) <> "_title"
where
xrefLabel = attrValue "xreflabel" el
- descendantContent name = maybe "???" (T.pack . strContent)
+ descendantContent name = maybe "???" strContent
. filterElementName (\n -> qName n == name)
-- | Extract a math equation from an element
@@ -1258,7 +1258,7 @@ equation e constructor =
mathMLEquations :: [Text]
mathMLEquations = map writeTeX $ rights $ readMath
(\x -> qName (elName x) == "math" && qPrefix (elName x) == Just "mml")
- (readMathML . T.pack . showElement)
+ (readMathML . showElement)
latexEquations :: [Text]
latexEquations = readMath (\x -> qName (elName x) == "mathphrase")
@@ -1272,8 +1272,8 @@ equation e constructor =
-- | Get the actual text stored in a CData block. 'showContent'
-- returns the text still surrounded by the [[CDATA]] tags.
showVerbatimCData :: Content -> Text
-showVerbatimCData (Text (CData _ d _)) = T.pack d
-showVerbatimCData c = T.pack $ showContent c
+showVerbatimCData (Text (CData _ d _)) = d
+showVerbatimCData c = showContent c
-- | Set the prefix of a name to 'Nothing'
diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs
index 056dab6c2..c76f3c171 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse.hs
@@ -63,6 +63,7 @@ import Data.Char (chr, ord, readLitChar)
import Data.List
import qualified Data.Map as M
import qualified Data.Text as T
+import Data.Text (Text)
import Data.Maybe
import System.FilePath
import Text.Pandoc.Readers.Docx.Util
@@ -72,9 +73,7 @@ import qualified Text.Pandoc.UTF8 as UTF8
import Text.TeXMath (Exp)
import Text.TeXMath.Readers.OMML (readOMML)
import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont)
-import Text.XML.Light
-import qualified Text.XML.Light.Cursor as XMLC
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light
data ReaderEnv = ReaderEnv { envNotes :: Notes
, envComments :: Comments
@@ -128,37 +127,23 @@ mapD f xs =
in
concatMapM handler xs
-unwrap :: NameSpaces -> Content -> [Content]
-unwrap ns (Elem element)
+unwrapElement :: NameSpaces -> Element -> [Element]
+unwrapElement ns element
| isElem ns "w" "sdt" element
, Just sdtContent <- findChildByName ns "w" "sdtContent" element
- = concatMap (unwrap ns . Elem) (elChildren sdtContent)
+ = concatMap (unwrapElement ns) (elChildren sdtContent)
| isElem ns "w" "smartTag" element
- = concatMap (unwrap ns . Elem) (elChildren element)
-unwrap _ content = [content]
+ = concatMap (unwrapElement ns) (elChildren element)
+ | otherwise
+ = [element{ elContent = concatMap (unwrapContent ns) (elContent element) }]
-unwrapChild :: NameSpaces -> Content -> Content
-unwrapChild ns (Elem element) =
- Elem $ element { elContent = concatMap (unwrap ns) (elContent element) }
-unwrapChild _ content = content
+unwrapContent :: NameSpaces -> Content -> [Content]
+unwrapContent ns (Elem element) = map Elem $ unwrapElement ns element
+unwrapContent _ content = [content]
-walkDocument' :: NameSpaces -> XMLC.Cursor -> XMLC.Cursor
-walkDocument' ns cur =
- let modifiedCur = XMLC.modifyContent (unwrapChild ns) cur
- in
- case XMLC.nextDF modifiedCur of
- Just cur' -> walkDocument' ns cur'
- Nothing -> XMLC.root modifiedCur
-
-walkDocument :: NameSpaces -> Element -> Maybe Element
+walkDocument :: NameSpaces -> Element -> Element
walkDocument ns element =
- let cur = XMLC.fromContent (Elem element)
- cur' = walkDocument' ns cur
- in
- case XMLC.toTree cur' of
- Elem element' -> Just element'
- _ -> Nothing
-
+ element{ elContent = concatMap (unwrapContent ns) (elContent element) }
newtype Docx = Docx Document
deriving Show
@@ -361,9 +346,9 @@ getDocumentXmlPath zf = do
fp <- findAttr (QName "Target" Nothing Nothing) rel
-- sometimes there will be a leading slash, which windows seems to
-- have trouble with.
- return $ case fp of
+ return $ case T.unpack fp of
'/' : fp' -> fp'
- _ -> fp
+ fp' -> fp'
archiveToDocument :: Archive -> D Document
archiveToDocument zf = do
@@ -372,7 +357,7 @@ archiveToDocument zf = do
docElem <- maybeToD $ parseXMLFromEntry entry
let namespaces = elemToNameSpaces docElem
bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem
- let bodyElem' = fromMaybe bodyElem (walkDocument namespaces bodyElem)
+ let bodyElem' = walkDocument namespaces bodyElem
body <- elemToBody namespaces bodyElem'
return $ Document namespaces body
@@ -414,8 +399,8 @@ archiveToNotes zf =
fn_namespaces = maybe [] elemToNameSpaces fnElem
en_namespaces = maybe [] elemToNameSpaces enElem
ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces
- fn = fnElem >>= walkDocument ns >>= elemToNotes ns "footnote"
- en = enElem >>= walkDocument ns >>= elemToNotes ns "endnote"
+ fn = fnElem >>= elemToNotes ns "footnote" . walkDocument ns
+ en = enElem >>= elemToNotes ns "endnote" . walkDocument ns
in
Notes ns fn en
@@ -424,7 +409,8 @@ archiveToComments zf =
let cmtsElem = findEntryByPath "word/comments.xml" zf
>>= parseXMLFromEntry
cmts_namespaces = maybe [] elemToNameSpaces cmtsElem
- cmts = elemToComments cmts_namespaces <$> (cmtsElem >>= walkDocument cmts_namespaces)
+ cmts = elemToComments cmts_namespaces . walkDocument cmts_namespaces <$>
+ cmtsElem
in
case cmts of
Just c -> Comments cmts_namespaces c
@@ -443,8 +429,8 @@ filePathToRelType path docXmlPath =
relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship
relElemToRelationship relType element | qName (elName element) == "Relationship" =
do
- relId <- findAttrText (QName "Id" Nothing Nothing) element
- target <- findAttrText (QName "Target" Nothing Nothing) element
+ relId <- findAttr (QName "Id" Nothing Nothing) element
+ target <- findAttr (QName "Target" Nothing Nothing) element
return $ Relationship relType relId target
relElemToRelationship _ _ = Nothing
@@ -485,10 +471,10 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do
loElemToLevelOverride :: NameSpaces -> Element -> Maybe LevelOverride
loElemToLevelOverride ns element
| isElem ns "w" "lvlOverride" element = do
- ilvl <- findAttrTextByName ns "w" "ilvl" element
+ ilvl <- findAttrByName ns "w" "ilvl" element
let startOverride = findChildByName ns "w" "startOverride" element
>>= findAttrByName ns "w" "val"
- >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
+ >>= stringToInteger
lvl = findChildByName ns "w" "lvl" element
>>= levelElemToLevel ns
return $ LevelOverride ilvl startOverride lvl
@@ -497,9 +483,9 @@ loElemToLevelOverride _ _ = Nothing
numElemToNum :: NameSpaces -> Element -> Maybe Numb
numElemToNum ns element
| isElem ns "w" "num" element = do
- numId <- findAttrTextByName ns "w" "numId" element
+ numId <- findAttrByName ns "w" "numId" element
absNumId <- findChildByName ns "w" "abstractNumId" element
- >>= findAttrTextByName ns "w" "val"
+ >>= findAttrByName ns "w" "val"
let lvlOverrides = mapMaybe
(loElemToLevelOverride ns)
(findChildrenByName ns "w" "lvlOverride" element)
@@ -509,7 +495,7 @@ numElemToNum _ _ = Nothing
absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb
absNumElemToAbsNum ns element
| isElem ns "w" "abstractNum" element = do
- absNumId <- findAttrTextByName ns "w" "abstractNumId" element
+ absNumId <- findAttrByName ns "w" "abstractNumId" element
let levelElems = findChildrenByName ns "w" "lvl" element
levels = mapMaybe (levelElemToLevel ns) levelElems
return $ AbstractNumb absNumId levels
@@ -518,14 +504,14 @@ absNumElemToAbsNum _ _ = Nothing
levelElemToLevel :: NameSpaces -> Element -> Maybe Level
levelElemToLevel ns element
| isElem ns "w" "lvl" element = do
- ilvl <- findAttrTextByName ns "w" "ilvl" element
+ ilvl <- findAttrByName ns "w" "ilvl" element
fmt <- findChildByName ns "w" "numFmt" element
- >>= findAttrTextByName ns "w" "val"
+ >>= findAttrByName ns "w" "val"
txt <- findChildByName ns "w" "lvlText" element
- >>= findAttrTextByName ns "w" "val"
+ >>= findAttrByName ns "w" "val"
let start = findChildByName ns "w" "start" element
>>= findAttrByName ns "w" "val"
- >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)])))
+ >>= stringToInteger
return (Level ilvl fmt txt start)
levelElemToLevel _ _ = Nothing
@@ -546,11 +532,11 @@ archiveToNumbering :: Archive -> Numbering
archiveToNumbering archive =
fromMaybe (Numbering [] [] []) (archiveToNumbering' archive)
-elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map T.Text Element)
+elemToNotes :: NameSpaces -> Text -> Element -> Maybe (M.Map T.Text Element)
elemToNotes ns notetype element
| isElem ns "w" (notetype <> "s") element =
let pairs = mapMaybe
- (\e -> findAttrTextByName ns "w" "id" e >>=
+ (\e -> findAttrByName ns "w" "id" e >>=
(\a -> Just (a, e)))
(findChildrenByName ns "w" notetype element)
in
@@ -562,7 +548,7 @@ elemToComments :: NameSpaces -> Element -> M.Map T.Text Element
elemToComments ns element
| isElem ns "w" "comments" element =
let pairs = mapMaybe
- (\e -> findAttrTextByName ns "w" "id" e >>=
+ (\e -> findAttrByName ns "w" "id" e >>=
(\a -> Just (a, e)))
(findChildrenByName ns "w" "comment" element)
in
@@ -622,12 +608,12 @@ elemToParIndentation ns element | isElem ns "w" "ind" element =
stringToInteger
, hangingParIndent =
findAttrByName ns "w" "hanging" element >>=
- stringToInteger}
+ stringToInteger }
elemToParIndentation _ _ = Nothing
-testBitMask :: String -> Int -> Bool
+testBitMask :: Text -> Int -> Bool
testBitMask bitMaskS n =
- case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of
+ case (reads ("0x" ++ T.unpack bitMaskS) :: [(Int, String)]) of
[] -> False
((n', _) : _) -> (n' .|. n) /= 0
@@ -642,7 +628,7 @@ elemToBodyPart ns element
| isElem ns "w" "p" element
, (c:_) <- findChildrenByName ns "m" "oMathPara" element =
do
- expsLst <- eitherToD $ readOMML $ T.pack $ showElement c
+ expsLst <- eitherToD $ readOMML $ showElement c
return $ OMathPara expsLst
elemToBodyPart ns element
| isElem ns "w" "p" element
@@ -666,7 +652,7 @@ elemToBodyPart ns element
| isElem ns "w" "tbl" element = do
let caption' = findChildByName ns "w" "tblPr" element
>>= findChildByName ns "w" "tblCaption"
- >>= findAttrTextByName ns "w" "val"
+ >>= findAttrByName ns "w" "val"
caption = fromMaybe "" caption'
grid' = case findChildByName ns "w" "tblGrid" element of
Just g -> elemToTblGrid ns g
@@ -705,8 +691,8 @@ getTitleAndAlt :: NameSpaces -> Element -> (T.Text, T.Text)
getTitleAndAlt ns element =
let mbDocPr = findChildByName ns "wp" "inline" element >>=
findChildByName ns "wp" "docPr"
- title = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "title")
- alt = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "descr")
+ title = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "title")
+ alt = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "descr")
in (title, alt)
elemToParPart :: NameSpaces -> Element -> D ParPart
@@ -718,7 +704,7 @@ elemToParPart ns element
= let (title, alt) = getTitleAndAlt ns drawingElem
a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
- >>= findAttrTextByName ns "r" "embed"
+ >>= findAttrByName ns "r" "embed"
in
case drawing of
Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem)
@@ -728,7 +714,7 @@ elemToParPart ns element
| isElem ns "w" "r" element
, Just _ <- findChildByName ns "w" "pict" element =
let drawing = findElement (elemName ns "v" "imagedata") element
- >>= findAttrTextByName ns "r" "id"
+ >>= findAttrByName ns "r" "id"
in
case drawing of
-- Todo: check out title and attr for deprecated format.
@@ -797,7 +783,7 @@ elemToParPart ns element
fldCharState <- gets stateFldCharState
case fldCharState of
FldCharOpen -> do
- info <- eitherToD $ parseFieldInfo $ T.pack $ strContent instrText
+ info <- eitherToD $ parseFieldInfo $ strContent instrText
modify $ \st -> st{stateFldCharState = FldCharFieldInfo info}
return NullParPart
_ -> return NullParPart
@@ -818,48 +804,48 @@ elemToParPart ns element
return $ ChangedRuns change runs
elemToParPart ns element
| isElem ns "w" "bookmarkStart" element
- , Just bmId <- findAttrTextByName ns "w" "id" element
- , Just bmName <- findAttrTextByName ns "w" "name" element =
+ , Just bmId <- findAttrByName ns "w" "id" element
+ , Just bmName <- findAttrByName ns "w" "name" element =
return $ BookMark bmId bmName
elemToParPart ns element
| isElem ns "w" "hyperlink" element
- , Just relId <- findAttrTextByName ns "r" "id" element = do
+ , Just relId <- findAttrByName ns "r" "id" element = do
location <- asks envLocation
runs <- mapD (elemToRun ns) (elChildren element)
rels <- asks envRelationships
case lookupRelationship location relId rels of
Just target ->
- case findAttrTextByName ns "w" "anchor" element of
+ case findAttrByName ns "w" "anchor" element of
Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) runs
Nothing -> return $ ExternalHyperLink target runs
Nothing -> return $ ExternalHyperLink "" runs
elemToParPart ns element
| isElem ns "w" "hyperlink" element
- , Just anchor <- findAttrTextByName ns "w" "anchor" element = do
+ , Just anchor <- findAttrByName ns "w" "anchor" element = do
runs <- mapD (elemToRun ns) (elChildren element)
return $ InternalHyperLink anchor runs
elemToParPart ns element
| isElem ns "w" "commentRangeStart" element
- , Just cmtId <- findAttrTextByName ns "w" "id" element = do
+ , Just cmtId <- findAttrByName ns "w" "id" element = do
(Comments _ commentMap) <- asks envComments
case M.lookup cmtId commentMap of
Just cmtElem -> elemToCommentStart ns cmtElem
Nothing -> throwError WrongElem
elemToParPart ns element
| isElem ns "w" "commentRangeEnd" element
- , Just cmtId <- findAttrTextByName ns "w" "id" element =
+ , Just cmtId <- findAttrByName ns "w" "id" element =
return $ CommentEnd cmtId
elemToParPart ns element
| isElem ns "m" "oMath" element =
- fmap PlainOMath (eitherToD $ readOMML $ T.pack $ showElement element)
+ fmap PlainOMath (eitherToD $ readOMML $ showElement element)
elemToParPart _ _ = throwError WrongElem
elemToCommentStart :: NameSpaces -> Element -> D ParPart
elemToCommentStart ns element
| isElem ns "w" "comment" element
- , Just cmtId <- findAttrTextByName ns "w" "id" element
- , Just cmtAuthor <- findAttrTextByName ns "w" "author" element
- , cmtDate <- findAttrTextByName ns "w" "date" element = do
+ , Just cmtId <- findAttrByName ns "w" "id" element
+ , Just cmtAuthor <- findAttrByName ns "w" "author" element
+ , cmtDate <- findAttrByName ns "w" "date" element = do
bps <- mapD (elemToBodyPart ns) (elChildren element)
return $ CommentStart cmtId cmtAuthor cmtDate bps
elemToCommentStart _ _ = throwError WrongElem
@@ -878,7 +864,7 @@ elemToExtent drawingElem =
where
wp_ns = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem
- >>= findAttr (QName at Nothing Nothing) >>= safeRead . T.pack
+ >>= findAttr (QName at Nothing Nothing) >>= safeRead
childElemToRun :: NameSpaces -> Element -> D Run
@@ -889,7 +875,7 @@ childElemToRun ns element
= let (title, alt) = getTitleAndAlt ns element
a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main"
drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem
- >>= findAttrText (QName "embed" (lookup "r" ns) (Just "r"))
+ >>= findAttr (QName "embed" (lookup "r" ns) (Just "r"))
in
case drawing of
Just s -> expandDrawingId s >>=
@@ -902,7 +888,7 @@ childElemToRun ns element
= return InlineChart
childElemToRun ns element
| isElem ns "w" "footnoteReference" element
- , Just fnId <- findAttrTextByName ns "w" "id" element = do
+ , Just fnId <- findAttrByName ns "w" "id" element = do
notes <- asks envNotes
case lookupFootnote fnId notes of
Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e)
@@ -910,7 +896,7 @@ childElemToRun ns element
Nothing -> return $ Footnote []
childElemToRun ns element
| isElem ns "w" "endnoteReference" element
- , Just enId <- findAttrTextByName ns "w" "id" element = do
+ , Just enId <- findAttrByName ns "w" "id" element = do
notes <- asks envNotes
case lookupEndnote enId notes of
Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e)
@@ -963,15 +949,15 @@ getParStyleField _ _ = Nothing
getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange
getTrackedChange ns element
| isElem ns "w" "ins" element || isElem ns "w" "moveTo" element
- , Just cId <- findAttrTextByName ns "w" "id" element
- , Just cAuthor <- findAttrTextByName ns "w" "author" element
- , mcDate <- findAttrTextByName ns "w" "date" element =
+ , Just cId <- findAttrByName ns "w" "id" element
+ , Just cAuthor <- findAttrByName ns "w" "author" element
+ , mcDate <- findAttrByName ns "w" "date" element =
Just $ TrackedChange Insertion (ChangeInfo cId cAuthor mcDate)
getTrackedChange ns element
| isElem ns "w" "del" element || isElem ns "w" "moveFrom" element
- , Just cId <- findAttrTextByName ns "w" "id" element
- , Just cAuthor <- findAttrTextByName ns "w" "author" element
- , mcDate <- findAttrTextByName ns "w" "date" element =
+ , Just cId <- findAttrByName ns "w" "id" element
+ , Just cAuthor <- findAttrByName ns "w" "author" element
+ , mcDate <- findAttrByName ns "w" "date" element =
Just $ TrackedChange Deletion (ChangeInfo cId cAuthor mcDate)
getTrackedChange _ _ = Nothing
@@ -980,7 +966,7 @@ elemToParagraphStyle ns element sty
| Just pPr <- findChildByName ns "w" "pPr" element =
let style =
mapMaybe
- (fmap ParaStyleId . findAttrTextByName ns "w" "val")
+ (fmap ParaStyleId . findAttrByName ns "w" "val")
(findChildrenByName ns "w" "pStyle" pPr)
in ParagraphStyle
{pStyle = mapMaybe (`M.lookup` sty) style
@@ -1012,7 +998,7 @@ elemToRunStyleD ns element
charStyles <- asks envCharStyles
let parentSty =
findChildByName ns "w" "rStyle" rPr >>=
- findAttrTextByName ns "w" "val" >>=
+ findAttrByName ns "w" "val" >>=
flip M.lookup charStyles . CharStyleId
return $ elemToRunStyle ns element parentSty
elemToRunStyleD _ _ = return defaultRunStyle
@@ -1022,7 +1008,7 @@ elemToRunElem ns element
| isElem ns "w" "t" element
|| isElem ns "w" "delText" element
|| isElem ns "m" "t" element = do
- let str = T.pack $ strContent element
+ let str = strContent element
font <- asks envFont
case font of
Nothing -> return $ TextRun str
@@ -1044,14 +1030,14 @@ getSymChar :: NameSpaces -> Element -> RunElem
getSymChar ns element
| Just s <- lowerFromPrivate <$> getCodepoint
, Just font <- getFont =
- case readLitChar ("\\x" ++ s) of
+ case readLitChar ("\\x" ++ T.unpack s) of
[(char, _)] -> TextRun . maybe "" T.singleton $ getUnicode font char
_ -> TextRun ""
where
getCodepoint = findAttrByName ns "w" "char" element
- getFont = textToFont . T.pack =<< findAttrByName ns "w" "font" element
- lowerFromPrivate ('F':xs) = '0':xs
- lowerFromPrivate xs = xs
+ getFont = textToFont =<< findAttrByName ns "w" "font" element
+ lowerFromPrivate t | "F" `T.isPrefixOf` t = "0" <> T.drop 1 t
+ | otherwise = t
getSymChar _ _ = TextRun ""
elemToRunElems :: NameSpaces -> Element -> D [RunElem]
@@ -1061,8 +1047,9 @@ elemToRunElems ns element
let qualName = elemName ns "w"
let font = do
fontElem <- findElement (qualName "rFonts") element
- textToFont . T.pack =<<
- foldr ((<|>) . (flip findAttr fontElem . qualName)) Nothing ["ascii", "hAnsi"]
+ textToFont =<<
+ foldr ((<|>) . (flip findAttr fontElem . qualName))
+ Nothing ["ascii", "hAnsi"]
local (setFont font) (mapD (elemToRunElem ns) (elChildren element))
elemToRunElems _ _ = throwError WrongElem
diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
index edade8654..0d7271d6a 100644
--- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
+++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs
@@ -48,12 +48,13 @@ import Data.Function (on)
import Data.String (IsString(..))
import qualified Data.Map as M
import qualified Data.Text as T
+import qualified Data.Text.Read
+import Data.Text (Text)
import Data.Maybe
import Data.Coerce
import Text.Pandoc.Readers.Docx.Util
import qualified Text.Pandoc.UTF8 as UTF8
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light
newtype CharStyleId = CharStyleId T.Text
deriving (Show, Eq, Ord, IsString, FromStyleId)
@@ -109,7 +110,7 @@ data RunStyle = RunStyle { isBold :: Maybe Bool
, isRTL :: Maybe Bool
, isForceCTL :: Maybe Bool
, rVertAlign :: Maybe VertAlign
- , rUnderline :: Maybe String
+ , rUnderline :: Maybe Text
, rParentStyle :: Maybe CharStyle
}
deriving Show
@@ -159,7 +160,7 @@ isBasedOnStyle ns element parentStyle
, Just styleType <- findAttrByName ns "w" "type" element
, styleType == cStyleType parentStyle
, Just basedOnVal <- findChildByName ns "w" "basedOn" element >>=
- findAttrTextByName ns "w" "val"
+ findAttrByName ns "w" "val"
, Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps)
| isElem ns "w" "style" element
, Just styleType <- findAttrByName ns "w" "type" element
@@ -169,7 +170,7 @@ isBasedOnStyle ns element parentStyle
| otherwise = False
class HasStyleId a => ElemToStyle a where
- cStyleType :: Maybe a -> String
+ cStyleType :: Maybe a -> Text
elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a
class FromStyleId (StyleId a) => HasStyleId a where
@@ -226,8 +227,10 @@ buildBasedOnList ns element rootStyle =
stys -> stys ++
concatMap (buildBasedOnList ns element . Just) stys
-stringToInteger :: String -> Maybe Integer
-stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)])
+stringToInteger :: Text -> Maybe Integer
+stringToInteger s = case Data.Text.Read.decimal s of
+ Right (x,_) -> Just x
+ Left _ -> Nothing
checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool
checkOnOff ns rPr tag
@@ -247,7 +250,7 @@ checkOnOff _ _ _ = Nothing
elemToCharStyle :: NameSpaces
-> Element -> Maybe CharStyle -> Maybe CharStyle
elemToCharStyle ns element parentStyle
- = CharStyle <$> (CharStyleId <$> findAttrTextByName ns "w" "styleId" element)
+ = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element)
<*> getElementStyleName ns element
<*> Just (elemToRunStyle ns element parentStyle)
@@ -281,7 +284,7 @@ elemToRunStyle _ _ _ = defaultRunStyle
getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int)
getHeaderLevel ns element
| Just styleName <- getElementStyleName ns element
- , Just n <- stringToInteger . T.unpack =<<
+ , Just n <- stringToInteger =<<
(T.stripPrefix "heading " . T.toLower $
fromStyleName styleName)
, n > 0 = Just (styleName, fromInteger n)
@@ -289,8 +292,8 @@ getHeaderLevel _ _ = Nothing
getElementStyleName :: Coercible T.Text a => NameSpaces -> Element -> Maybe a
getElementStyleName ns el = coerce <$>
- ((findChildByName ns "w" "name" el >>= findAttrTextByName ns "w" "val")
- <|> findAttrTextByName ns "w" "styleId" el)
+ ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val")
+ <|> findAttrByName ns "w" "styleId" el)
getNumInfo :: NameSpaces -> Element -> Maybe (T.Text, T.Text)
getNumInfo ns element = do
@@ -298,15 +301,15 @@ getNumInfo ns element = do
findChildByName ns "w" "numPr"
lvl = fromMaybe "0" (numPr >>=
findChildByName ns "w" "ilvl" >>=
- findAttrTextByName ns "w" "val")
+ findAttrByName ns "w" "val")
numId <- numPr >>=
findChildByName ns "w" "numId" >>=
- findAttrTextByName ns "w" "val"
+ findAttrByName ns "w" "val"
return (numId, lvl)
elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle
elemToParStyleData ns element parentStyle
- | Just styleId <- findAttrTextByName ns "w" "styleId" element
+ | Just styleId <- findAttrByName ns "w" "styleId" element
, Just styleName <- getElementStyleName ns element
= Just $ ParStyle
{
diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs
index f9c9a8e26..21df03d9e 100644
--- a/src/Text/Pandoc/Readers/Docx/Util.hs
+++ b/src/Text/Pandoc/Readers/Docx/Util.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Docx.StyleMaps
Copyright : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>,
@@ -18,51 +19,45 @@ module Text.Pandoc.Readers.Docx.Util (
, elemToNameSpaces
, findChildByName
, findChildrenByName
- , findAttrText
, findAttrByName
- , findAttrTextByName
) where
import Data.Maybe (mapMaybe)
import qualified Data.Text as T
-import Text.XML.Light
+import Data.Text (Text)
+import Text.Pandoc.XML.Light
-type NameSpaces = [(String, String)]
+type NameSpaces = [(Text, Text)]
elemToNameSpaces :: Element -> NameSpaces
elemToNameSpaces = mapMaybe attrToNSPair . elAttribs
-attrToNSPair :: Attr -> Maybe (String, String)
+attrToNSPair :: Attr -> Maybe (Text, Text)
attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val)
attrToNSPair _ = Nothing
-elemName :: NameSpaces -> String -> String -> QName
+elemName :: NameSpaces -> Text -> Text -> QName
elemName ns prefix name =
- QName name (lookup prefix ns) (if null prefix then Nothing else Just prefix)
+ QName name (lookup prefix ns) (if T.null prefix then Nothing else Just prefix)
-isElem :: NameSpaces -> String -> String -> Element -> Bool
+isElem :: NameSpaces -> Text -> Text -> Element -> Bool
isElem ns prefix name element =
let ns' = ns ++ elemToNameSpaces element
in qName (elName element) == name &&
qURI (elName element) == lookup prefix ns'
-findChildByName :: NameSpaces -> String -> String -> Element -> Maybe Element
+findChildByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element
findChildByName ns pref name el =
let ns' = ns ++ elemToNameSpaces el
in findChild (elemName ns' pref name) el
-findChildrenByName :: NameSpaces -> String -> String -> Element -> [Element]
+findChildrenByName :: NameSpaces -> Text -> Text -> Element -> [Element]
findChildrenByName ns pref name el =
let ns' = ns ++ elemToNameSpaces el
in findChildren (elemName ns' pref name) el
-findAttrText :: QName -> Element -> Maybe T.Text
-findAttrText x = fmap T.pack . findAttr x
-
-findAttrByName :: NameSpaces -> String -> String -> Element -> Maybe String
+findAttrByName :: NameSpaces -> Text -> Text -> Element -> Maybe Text
findAttrByName ns pref name el =
let ns' = ns ++ elemToNameSpaces el
in findAttr (elemName ns' pref name) el
-findAttrTextByName :: NameSpaces -> String -> String -> Element -> Maybe T.Text
-findAttrTextByName a b c = fmap T.pack . findAttrByName a b c
diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs
index 369c4f0c9..eb8d2405d 100644
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@@ -23,8 +23,8 @@ import Control.DeepSeq (NFData, deepseq)
import Control.Monad (guard, liftM, liftM2, mplus)
import Control.Monad.Except (throwError)
import qualified Data.ByteString.Lazy as BL (ByteString)
-import Data.List (isInfixOf)
import qualified Data.Text as T
+import Data.Text (Text)
import qualified Data.Map as M (Map, elems, fromList, lookup)
import Data.Maybe (mapMaybe)
import qualified Data.Text.Lazy as TL
@@ -40,13 +40,12 @@ import Text.Pandoc.Extensions (Extension (Ext_raw_html), enableExtension)
import Text.Pandoc.MIME (MimeType)
import Text.Pandoc.Options (ReaderOptions (..))
import Text.Pandoc.Readers.HTML (readHtml)
-import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI)
+import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI, tshow)
import qualified Text.Pandoc.UTF8 as UTF8 (toTextLazy)
import Text.Pandoc.Walk (query, walk)
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light
-type Items = M.Map String (FilePath, MimeType)
+type Items = M.Map Text (FilePath, MimeType)
readEPUB :: PandocMonad m => ReaderOptions -> BL.ByteString -> m Pandoc
readEPUB opts bytes = case toArchiveOrFail bytes of
@@ -126,26 +125,27 @@ imageToPandoc s = B.doc . B.para $ B.image (T.pack s) "" mempty
imageMimes :: [MimeType]
imageMimes = ["image/gif", "image/jpeg", "image/png"]
-type CoverId = String
+type CoverId = Text
type CoverImage = FilePath
-parseManifest :: (PandocMonad m) => Element -> Maybe CoverId -> m (Maybe CoverImage, Items)
+parseManifest :: (PandocMonad m)
+ => Element -> Maybe CoverId -> m (Maybe CoverImage, Items)
parseManifest content coverId = do
manifest <- findElementE (dfName "manifest") content
let items = findChildren (dfName "item") manifest
r <- mapM parseItem items
let cover = findAttr (emptyName "href") =<< filterChild findCover manifest
- return (cover `mplus` coverId, M.fromList r)
+ return (T.unpack <$> (cover `mplus` coverId), M.fromList r)
where
- findCover e = maybe False (isInfixOf "cover-image")
+ findCover e = maybe False (T.isInfixOf "cover-image")
(findAttr (emptyName "properties") e)
|| Just True == liftM2 (==) coverId (findAttr (emptyName "id") e)
parseItem e = do
uid <- findAttrE (emptyName "id") e
href <- findAttrE (emptyName "href") e
mime <- findAttrE (emptyName "media-type") e
- return (uid, (href, T.pack mime))
+ return (uid, (T.unpack href, mime))
parseSpine :: PandocMonad m => Items -> Element -> m [(FilePath, MimeType)]
parseSpine is e = do
@@ -173,11 +173,11 @@ parseMeta content = do
-- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem
parseMetaItem :: Element -> Meta -> Meta
parseMetaItem e@(stripNamespace . elName -> field) meta =
- addMetaField (renameMeta field) (B.str $ T.pack $ strContent e) meta
+ addMetaField (renameMeta field) (B.str $ strContent e) meta
-renameMeta :: String -> T.Text
+renameMeta :: Text -> Text
renameMeta "creator" = "author"
-renameMeta s = T.pack s
+renameMeta s = s
getManifest :: PandocMonad m => Archive -> m (String, Element)
getManifest archive = do
@@ -187,7 +187,7 @@ getManifest archive = do
ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces)
as <- fmap (map attrToPair . elAttribs)
(findElementE (QName "rootfile" (Just ns) Nothing) docElem)
- manifestFile <- mkE "Root not found" (lookup "full-path" as)
+ manifestFile <- T.unpack <$> mkE "Root not found" (lookup "full-path" as)
let rootdir = dropFileName manifestFile
--mime <- lookup "media-type" as
manifest <- findEntryByPathE manifestFile archive
@@ -201,7 +201,8 @@ fixInternalReferences pathToFile =
. walk (fixBlockIRs filename)
. walk (fixInlineIRs filename)
where
- (root, T.unpack . escapeURI . T.pack -> filename) = splitFileName pathToFile
+ (root, T.unpack . escapeURI . T.pack -> filename) =
+ splitFileName pathToFile
fixInlineIRs :: String -> Inline -> Inline
fixInlineIRs s (Span as v) =
@@ -214,7 +215,7 @@ fixInlineIRs s (Link as is t) =
Link (fixAttrs s as) is t
fixInlineIRs _ v = v
-prependHash :: [T.Text] -> Inline -> Inline
+prependHash :: [Text] -> Inline -> Inline
prependHash ps l@(Link attr is (url, tit))
| or [s `T.isPrefixOf` url | s <- ps] =
Link attr is ("#" <> url, tit)
@@ -231,16 +232,17 @@ fixBlockIRs s (CodeBlock as code) =
fixBlockIRs _ b = b
fixAttrs :: FilePath -> B.Attr -> B.Attr
-fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . T.null) cs, removeEPUBAttrs kvs)
+fixAttrs s (ident, cs, kvs) =
+ (addHash s ident, filter (not . T.null) cs, removeEPUBAttrs kvs)
-addHash :: String -> T.Text -> T.Text
+addHash :: FilePath -> Text -> Text
addHash _ "" = ""
addHash s ident = T.pack (takeFileName s) <> "#" <> ident
-removeEPUBAttrs :: [(T.Text, T.Text)] -> [(T.Text, T.Text)]
+removeEPUBAttrs :: [(Text, Text)] -> [(Text, Text)]
removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs
-isEPUBAttr :: (T.Text, a) -> Bool
+isEPUBAttr :: (Text, a) -> Bool
isEPUBAttr (k, _) = "epub:" `T.isPrefixOf` k
-- Library
@@ -257,33 +259,33 @@ uncurry3 f (a, b, c) = f a b c
-- Utility
-stripNamespace :: QName -> String
+stripNamespace :: QName -> Text
stripNamespace (QName v _ _) = v
-attrToNSPair :: Attr -> Maybe (String, String)
+attrToNSPair :: Attr -> Maybe (Text, Text)
attrToNSPair (Attr (QName "xmlns" _ _) val) = Just ("xmlns", val)
attrToNSPair _ = Nothing
-attrToPair :: Attr -> (String, String)
+attrToPair :: Attr -> (Text, Text)
attrToPair (Attr (QName name _ _) val) = (name, val)
-defaultNameSpace :: Maybe String
+defaultNameSpace :: Maybe Text
defaultNameSpace = Just "http://www.idpf.org/2007/opf"
-dfName :: String -> QName
+dfName :: Text -> QName
dfName s = QName s defaultNameSpace Nothing
-emptyName :: String -> QName
+emptyName :: Text -> QName
emptyName s = QName s Nothing Nothing
-- Convert Maybe interface to Either
-findAttrE :: PandocMonad m => QName -> Element -> m String
+findAttrE :: PandocMonad m => QName -> Element -> m Text
findAttrE q e = mkE "findAttr" $ findAttr q e
findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry
findEntryByPathE (normalise . unEscapeString -> path) a =
- mkE ("No entry on path: " ++ path) $ findEntryByPath path a
+ mkE ("No entry on path: " <> T.pack path) $ findEntryByPath path a
parseXMLDocE :: PandocMonad m => Entry -> m Element
parseXMLDocE entry =
@@ -293,7 +295,8 @@ parseXMLDocE entry =
fp = T.pack $ eRelativePath entry
findElementE :: PandocMonad m => QName -> Element -> m Element
-findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x
+findElementE e x =
+ mkE ("Unable to find element: " <> tshow e) $ findElement e x
-mkE :: PandocMonad m => String -> Maybe a -> m a
-mkE s = maybe (throwError . PandocParseError $ T.pack s) return
+mkE :: PandocMonad m => Text -> Maybe a -> m a
+mkE s = maybe (throwError . PandocParseError $ s) return
diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs
index b804eab4f..66e390bd7 100644
--- a/src/Text/Pandoc/Readers/FB2.hs
+++ b/src/Text/Pandoc/Readers/FB2.hs
@@ -25,7 +25,6 @@ TODO:
module Text.Pandoc.Readers.FB2 ( readFB2 ) where
import Control.Monad.Except (throwError)
import Control.Monad.State.Strict
-import Data.ByteString.Lazy.Char8 ( pack )
import Data.ByteString.Base64.Lazy
import Data.Functor
import Data.List (intersperse)
@@ -42,8 +41,8 @@ import Text.Pandoc.Error
import Text.Pandoc.Logging
import Text.Pandoc.Options
import Text.Pandoc.Shared (crFilter)
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light
+import qualified Text.Pandoc.UTF8 as UTF8
type FB2 m = StateT FB2State m
@@ -85,12 +84,12 @@ removeHash t = case T.uncons t of
Just ('#', xs) -> xs
_ -> t
-convertEntity :: String -> Text
-convertEntity e = maybe (T.toUpper $ T.pack e) T.pack $ lookupEntity e
+convertEntity :: Text -> Text
+convertEntity e = maybe (T.toUpper e) T.pack $ lookupEntity (T.unpack e)
parseInline :: PandocMonad m => Content -> FB2 m Inlines
parseInline (Elem e) =
- case T.pack $ qName $ elName e of
+ case qName $ elName e of
"strong" -> strong <$> parseStyleType e
"emphasis" -> emph <$> parseStyleType e
"style" -> parseNamedStyle e
@@ -98,12 +97,12 @@ parseInline (Elem e) =
"strikethrough" -> strikeout <$> parseStyleType e
"sub" -> subscript <$> parseStyleType e
"sup" -> superscript <$> parseStyleType e
- "code" -> pure $ code $ T.pack $ strContent e
+ "code" -> pure $ code $ strContent e
"image" -> parseInlineImageElement e
name -> do
report $ IgnoredElement name
pure mempty
-parseInline (Text x) = pure $ text $ T.pack $ cdData x
+parseInline (Text x) = pure $ text $ cdData x
parseInline (CRef r) = pure $ str $ convertEntity r
parseSubtitle :: PandocMonad m => Element -> FB2 m Blocks
@@ -113,7 +112,7 @@ parseSubtitle e = headerWith ("", ["unnumbered"], []) <$> gets fb2SectionLevel <
parseRootElement :: PandocMonad m => Element -> FB2 m Blocks
parseRootElement e =
- case T.pack $ qName $ elName e of
+ case qName $ elName e of
"FictionBook" -> do
-- Parse notes before parsing the rest of the content.
case filterChild isNotesBody e of
@@ -146,7 +145,7 @@ parseNote e =
Just sectionId -> do
content <- mconcat <$> mapM parseSectionChild (dropTitle $ elChildren e)
oldNotes <- gets fb2Notes
- modify $ \s -> s { fb2Notes = M.insert ("#" <> T.pack sectionId) content oldNotes }
+ modify $ \s -> s { fb2Notes = M.insert ("#" <> sectionId) content oldNotes }
pure ()
where
isTitle x = qName (elName x) == "title"
@@ -158,7 +157,7 @@ parseNote e =
-- | Parse a child of @\<FictionBook>@ element.
parseFictionBookChild :: PandocMonad m => Element -> FB2 m Blocks
parseFictionBookChild e =
- case T.pack $ qName $ elName e of
+ case qName $ elName e of
"stylesheet" -> pure mempty -- stylesheet is ignored
"description" -> mempty <$ mapM_ parseDescriptionChild (elChildren e)
"body" -> if isNotesBody e
@@ -170,7 +169,7 @@ parseFictionBookChild e =
-- | Parse a child of @\<description>@ element.
parseDescriptionChild :: PandocMonad m => Element -> FB2 m ()
parseDescriptionChild e =
- case T.pack $ qName $ elName e of
+ case qName $ elName e of
"title-info" -> mapM_ parseTitleInfoChild (elChildren e)
"src-title-info" -> pure () -- ignore
"document-info" -> pure ()
@@ -184,7 +183,7 @@ parseDescriptionChild e =
-- | Parse a child of @\<body>@ element.
parseBodyChild :: PandocMonad m => Element -> FB2 m Blocks
parseBodyChild e =
- case T.pack $ qName $ elName e of
+ case qName $ elName e of
"image" -> parseImageElement e
"title" -> header <$> gets fb2SectionLevel <*> parseTitleType (elContent e)
"epigraph" -> parseEpigraph e
@@ -198,7 +197,10 @@ parseBinaryElement e =
(Nothing, _) -> report $ IgnoredElement "binary without id attribute"
(Just _, Nothing) ->
report $ IgnoredElement "binary without content-type attribute"
- (Just filename, contentType) -> insertMedia filename (T.pack <$> contentType) (decodeLenient (pack (strContent e)))
+ (Just filename, contentType) ->
+ insertMedia (T.unpack filename) contentType
+ (decodeLenient
+ (UTF8.fromTextLazy . TL.fromStrict . strContent $ e))
-- * Type parsers
@@ -208,13 +210,13 @@ parseAuthor e = T.unwords . catMaybes <$> mapM parseAuthorChild (elChildren e)
parseAuthorChild :: PandocMonad m => Element -> FB2 m (Maybe Text)
parseAuthorChild e =
- case T.pack $ qName $ elName e of
- "first-name" -> pure $ Just $ T.pack $ strContent e
- "middle-name" -> pure $ Just $ T.pack $ strContent e
- "last-name" -> pure $ Just $ T.pack $ strContent e
- "nickname" -> pure $ Just $ T.pack $ strContent e
- "home-page" -> pure $ Just $ T.pack $ strContent e
- "email" -> pure $ Just $ T.pack $ strContent e
+ case qName $ elName e of
+ "first-name" -> pure $ Just $ strContent e
+ "middle-name" -> pure $ Just $ strContent e
+ "last-name" -> pure $ Just $ strContent e
+ "nickname" -> pure $ Just $ strContent e
+ "home-page" -> pure $ Just $ strContent e
+ "email" -> pure $ Just $ strContent e
name -> do
report $ IgnoredElement $ name <> " in author"
pure Nothing
@@ -238,13 +240,13 @@ parseTitleContent _ = pure Nothing
parseImageElement :: PandocMonad m => Element -> FB2 m Blocks
parseImageElement e =
case href of
- Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash $ T.pack src) title alt
+ Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash src) title alt
Nothing -> do
report $ IgnoredElement " image without href"
pure mempty
- where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e
- title = maybe "" T.pack $ findAttr (unqual "title") e
- imgId = maybe "" T.pack $ findAttr (unqual "id") e
+ where alt = maybe mempty str $ findAttr (unqual "alt") e
+ title = fromMaybe "" $ findAttr (unqual "title") e
+ imgId = fromMaybe "" $ findAttr (unqual "id") e
href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
-- | Parse @pType@
@@ -258,7 +260,7 @@ parseCite e = blockQuote . mconcat <$> mapM parseCiteChild (elChildren e)
-- | Parse @citeType@ child
parseCiteChild :: PandocMonad m => Element -> FB2 m Blocks
parseCiteChild e =
- case T.pack $ qName $ elName e of
+ case qName $ elName e of
"p" -> para <$> parsePType e
"poem" -> parsePoem e
"empty-line" -> pure horizontalRule
@@ -273,13 +275,13 @@ parsePoem e = mconcat <$> mapM parsePoemChild (elChildren e)
parsePoemChild :: PandocMonad m => Element -> FB2 m Blocks
parsePoemChild e =
- case T.pack $ qName $ elName e of
+ case qName $ elName e of
"title" -> parseTitle e
"subtitle" -> parseSubtitle e
"epigraph" -> parseEpigraph e
"stanza" -> parseStanza e
"text-author" -> para <$> parsePType e
- "date" -> pure $ para $ text $ T.pack $ strContent e
+ "date" -> pure $ para $ text $ strContent e
name -> report (UnexpectedXmlElement name "poem") $> mempty
parseStanza :: PandocMonad m => Element -> FB2 m Blocks
@@ -292,7 +294,7 @@ joinLineBlocks [] = []
parseStanzaChild :: PandocMonad m => Element -> FB2 m Blocks
parseStanzaChild e =
- case T.pack $ qName $ elName e of
+ case qName $ elName e of
"title" -> parseTitle e
"subtitle" -> parseSubtitle e
"v" -> lineBlock . (:[]) <$> parsePType e
@@ -302,11 +304,11 @@ parseStanzaChild e =
parseEpigraph :: PandocMonad m => Element -> FB2 m Blocks
parseEpigraph e =
divWith (divId, ["epigraph"], []) . mconcat <$> mapM parseEpigraphChild (elChildren e)
- where divId = maybe "" T.pack $ findAttr (unqual "id") e
+ where divId = fromMaybe "" $ findAttr (unqual "id") e
parseEpigraphChild :: PandocMonad m => Element -> FB2 m Blocks
parseEpigraphChild e =
- case T.pack $ qName $ elName e of
+ case qName $ elName e of
"p" -> para <$> parsePType e
"poem" -> parsePoem e
"cite" -> parseCite e
@@ -320,7 +322,7 @@ parseAnnotation e = mconcat <$> mapM parseAnnotationChild (elChildren e)
parseAnnotationChild :: PandocMonad m => Element -> FB2 m Blocks
parseAnnotationChild e =
- case T.pack $ qName $ elName e of
+ case qName $ elName e of
"p" -> para <$> parsePType e
"poem" -> parsePoem e
"cite" -> parseCite e
@@ -334,14 +336,14 @@ parseSection :: PandocMonad m => Element -> FB2 m Blocks
parseSection e = do
n <- gets fb2SectionLevel
modify $ \st -> st{ fb2SectionLevel = n + 1 }
- let sectionId = maybe "" T.pack $ findAttr (unqual "id") e
+ let sectionId = fromMaybe "" $ findAttr (unqual "id") e
bs <- divWith (sectionId, ["section"], []) . mconcat <$> mapM parseSectionChild (elChildren e)
modify $ \st -> st{ fb2SectionLevel = n }
pure bs
parseSectionChild :: PandocMonad m => Element -> FB2 m Blocks
parseSectionChild e =
- case T.pack $ qName $ elName e of
+ case qName $ elName e of
"title" -> parseBodyChild e
"epigraph" -> parseEpigraph e
"image" -> parseImageElement e
@@ -363,16 +365,16 @@ parseStyleType e = mconcat <$> mapM parseInline (elContent e)
parseNamedStyle :: PandocMonad m => Element -> FB2 m Inlines
parseNamedStyle e = do
content <- mconcat <$> mapM parseNamedStyleChild (elContent e)
- let lang = maybeToList $ ("lang",) . T.pack <$> findAttr (QName "lang" Nothing (Just "xml")) e
+ let lang = maybeToList $ ("lang",) <$> findAttr (QName "lang" Nothing (Just "xml")) e
case findAttr (unqual "name") e of
- Just name -> pure $ spanWith ("", [T.pack name], lang) content
+ Just name -> pure $ spanWith ("", [name], lang) content
Nothing -> do
report $ IgnoredElement "link without required name"
pure mempty
parseNamedStyleChild :: PandocMonad m => Content -> FB2 m Inlines
parseNamedStyleChild (Elem e) =
- case T.pack $ qName (elName e) of
+ case qName (elName e) of
"strong" -> strong <$> parseStyleType e
"emphasis" -> emph <$> parseStyleType e
"style" -> parseNamedStyle e
@@ -380,7 +382,7 @@ parseNamedStyleChild (Elem e) =
"strikethrough" -> strikeout <$> parseStyleType e
"sub" -> subscript <$> parseStyleType e
"sup" -> superscript <$> parseStyleType e
- "code" -> pure $ code $ T.pack $ strContent e
+ "code" -> pure $ code $ strContent e
"image" -> parseInlineImageElement e
name -> do
report $ IgnoredElement $ name <> " in style"
@@ -392,7 +394,7 @@ parseLinkType :: PandocMonad m => Element -> FB2 m Inlines
parseLinkType e = do
content <- mconcat <$> mapM parseStyleLinkType (elContent e)
notes <- gets fb2Notes
- case T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
+ case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
Just href -> case findAttr (QName "type" Nothing Nothing) e of
Just "note" -> case M.lookup href notes of
Nothing -> pure $ link href "" content
@@ -419,15 +421,14 @@ parseTable _ = pure mempty -- TODO: tables are not supported yet
-- | Parse @title-infoType@
parseTitleInfoChild :: PandocMonad m => Element -> FB2 m ()
parseTitleInfoChild e =
- case T.pack $ qName (elName e) of
+ case qName (elName e) of
"genre" -> pure ()
"author" -> parseAuthor e >>= \author -> modify (\st -> st {fb2Authors = author:fb2Authors st})
- "book-title" -> modify (setMeta "title" (text $ T.pack $ strContent e))
+ "book-title" -> modify (setMeta "title" (text $ strContent e))
"annotation" -> parseAnnotation e >>= modify . setMeta "abstract"
"keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ T.splitOn ","
- $ T.pack
$ strContent e))
- "date" -> modify (setMeta "date" (text $ T.pack $ strContent e))
+ "date" -> modify (setMeta "date" (text $ strContent e))
"coverpage" -> parseCoverPage e
"lang" -> pure ()
"src-lang" -> pure ()
@@ -441,7 +442,7 @@ parseCoverPage e =
Just img -> case href of
Just src -> modify (setMeta "cover-image" (MetaString $ removeHash src))
Nothing -> pure ()
- where href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img
+ where href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img
Nothing -> pure ()
-- | Parse @inlineImageType@ element
@@ -454,5 +455,5 @@ parseInlineImageElement e =
Nothing -> do
report $ IgnoredElement "inline image without href"
pure mempty
- where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e
- href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
+ where alt = maybe mempty str $ findAttr (unqual "alt") e
+ href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e
diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs
index dfd343b7a..5353f2001 100644
--- a/src/Text/Pandoc/Readers/JATS.hs
+++ b/src/Text/Pandoc/Readers/JATS.hs
@@ -16,7 +16,7 @@ module Text.Pandoc.Readers.JATS ( readJATS ) where
import Control.Monad.State.Strict
import Control.Monad.Except (throwError)
import Text.Pandoc.Error (PandocError(..))
-import Data.Char (isDigit, isSpace, toUpper)
+import Data.Char (isDigit, isSpace)
import Data.Default
import Data.Generics
import Data.List (foldl', intersperse)
@@ -31,8 +31,7 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Options
import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
import Text.TeXMath (readMathML, writeTeX)
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLContents)
+import Text.Pandoc.XML.Light
import qualified Data.Set as S (fromList, member)
import Data.Set ((\\))
@@ -67,29 +66,29 @@ normalizeTree = everywhere (mkT go)
where go :: [Content] -> [Content]
go (Text (CData CDataRaw _ _):xs) = xs
go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
- Text (CData CDataText (s1 ++ s2) z):xs
+ Text (CData CDataText (s1 <> s2) z):xs
go (Text (CData CDataText s1 z):CRef r:xs) =
- Text (CData CDataText (s1 ++ convertEntity r) z):xs
+ Text (CData CDataText (s1 <> convertEntity r) z):xs
go (CRef r:Text (CData CDataText s1 z):xs) =
- Text (CData CDataText (convertEntity r ++ s1) z):xs
+ Text (CData CDataText (convertEntity r <> s1) z):xs
go (CRef r1:CRef r2:xs) =
- Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
+ Text (CData CDataText (convertEntity r1 <> convertEntity r2) Nothing):xs
go xs = xs
-convertEntity :: String -> String
-convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
+convertEntity :: Text -> Text
+convertEntity e = maybe (T.toUpper e) T.pack (lookupEntity $ T.unpack e)
-- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> Text
+attrValue :: Text -> Element -> Text
attrValue attr =
fromMaybe "" . maybeAttrValue attr
-maybeAttrValue :: String -> Element -> Maybe Text
+maybeAttrValue :: Text -> Element -> Maybe Text
maybeAttrValue attr elt =
- T.pack <$> lookupAttrBy (\x -> qName x == attr) (elAttribs elt)
+ lookupAttrBy (\x -> qName x == attr) (elAttribs elt)
-- convenience function
-named :: String -> Element -> Bool
+named :: Text -> Element -> Bool
named s e = qName (elName e) == s
--
@@ -155,10 +154,10 @@ getBlocks e = mconcat <$>
parseBlock :: PandocMonad m => Content -> JATS m Blocks
parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
-parseBlock (Text (CData _ s _)) = if all isSpace s
+parseBlock (Text (CData _ s _)) = if T.all isSpace s
then return mempty
- else return $ plain $ trimInlines $ text $ T.pack s
-parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x
+ else return $ plain $ trimInlines $ text s
+parseBlock (CRef x) = return $ plain $ str $ T.toUpper x
parseBlock (Elem e) =
case qName (elName e) of
"p" -> parseMixed para (elContent e)
@@ -207,7 +206,7 @@ parseBlock (Elem e) =
"" -> []
x -> [x]
return $ codeBlockWith (attrValue "id" e, classes', [])
- $ trimNl $ textContentRecursive e
+ $ trimNl $ strContentRecursive e
parseBlockquote = do
attrib <- case filterChild (named "attribution") e of
Nothing -> return mempty
@@ -271,7 +270,7 @@ parseBlock (Elem e) =
Just "center" -> AlignCenter
_ -> AlignDefault
let toWidth c = do
- w <- findAttrText (unqual "colwidth") c
+ w <- findAttr (unqual "colwidth") c
n <- safeRead $ "0" <> T.filter (\x -> isDigit x || x == '.') w
if n > 0 then Just n else Nothing
let numrows = foldl' max 0 $ map length bodyrows
@@ -442,16 +441,10 @@ parseRef e = do
Nothing -> return $ Map.insert "id" (toMetaValue refId) mempty
-- TODO handle mixed-citation
-findAttrText :: QName -> Element -> Maybe Text
-findAttrText x = fmap T.pack . findAttr x
-
textContent :: Element -> Text
-textContent = T.pack . strContent
-
-textContentRecursive :: Element -> Text
-textContentRecursive = T.pack . strContentRecursive
+textContent = strContent
-strContentRecursive :: Element -> String
+strContentRecursive :: Element -> Text
strContentRecursive = strContent .
(\e' -> e'{ elContent = map elementToStr $ elContent e' })
@@ -460,9 +453,8 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
elementToStr x = x
parseInline :: PandocMonad m => Content -> JATS m Inlines
-parseInline (Text (CData _ s _)) = return $ text $ T.pack s
-parseInline (CRef ref) =
- return . text . maybe (T.toUpper $ T.pack ref) T.pack $ lookupEntity ref
+parseInline (Text (CData _ s _)) = return $ text s
+parseInline (CRef ref) = return . text . convertEntity $ ref
parseInline (Elem e) =
case qName (elName e) of
"italic" -> innerInlines emph
@@ -507,9 +499,9 @@ parseInline (Elem e) =
else linkWith attr ("#" <> rid) "" ils
"ext-link" -> do
ils <- innerInlines id
- let title = fromMaybe "" $ findAttrText (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e
+ let title = fromMaybe "" $ findAttr (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e
let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
- Just h -> T.pack h
+ Just h -> h
_ -> "#" <> attrValue "rid" e
let ils' = if ils == mempty then str href else ils
let attr = (attrValue "id" e, [], [])
@@ -529,7 +521,7 @@ parseInline (Elem e) =
where innerInlines f = extractSpaces f . mconcat <$>
mapM parseInline (elContent e)
mathML x =
- case readMathML . T.pack . showElement $ everywhere (mkT removePrefix) x of
+ case readMathML . showElement $ everywhere (mkT removePrefix) x of
Left _ -> mempty
Right m -> writeTeX m
formula constructor = do
@@ -547,4 +539,4 @@ parseInline (Elem e) =
let classes' = case attrValue "language" e of
"" -> []
l -> [l]
- return $ codeWith (attrValue "id" e,classes',[]) $ textContentRecursive e
+ return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e
diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs
index bdadc4dd9..184d5a63f 100644
--- a/src/Text/Pandoc/Readers/OPML.hs
+++ b/src/Text/Pandoc/Readers/OPML.hs
@@ -13,7 +13,6 @@ Conversion of OPML to 'Pandoc' document.
module Text.Pandoc.Readers.OPML ( readOPML ) where
import Control.Monad.State.Strict
-import Data.Char (toUpper)
import Data.Default
import Data.Generics
import Data.Maybe (fromMaybe)
@@ -28,8 +27,7 @@ import Text.Pandoc.Error (PandocError(..))
import Text.Pandoc.Readers.HTML (readHtml)
import Text.Pandoc.Readers.Markdown (readMarkdown)
import Text.Pandoc.Shared (crFilter, blocksToInlines')
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLContents)
+import Text.Pandoc.XML.Light
import Control.Monad.Except (throwError)
type OPML m = StateT OPMLState m
@@ -69,25 +67,22 @@ normalizeTree = everywhere (mkT go)
where go :: [Content] -> [Content]
go (Text (CData CDataRaw _ _):xs) = xs
go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
- Text (CData CDataText (s1 ++ s2) z):xs
+ Text (CData CDataText (s1 <> s2) z):xs
go (Text (CData CDataText s1 z):CRef r:xs) =
- Text (CData CDataText (s1 ++ convertEntity r) z):xs
+ Text (CData CDataText (s1 <> convertEntity r) z):xs
go (CRef r:Text (CData CDataText s1 z):xs) =
- Text (CData CDataText (convertEntity r ++ s1) z):xs
+ Text (CData CDataText (convertEntity r <> s1) z):xs
go (CRef r1:CRef r2:xs) =
- Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
+ Text (CData CDataText (convertEntity r1 <> convertEntity r2) Nothing):xs
go xs = xs
-convertEntity :: String -> String
-convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
+convertEntity :: Text -> Text
+convertEntity e = maybe (T.toUpper e) T.pack (lookupEntity (T.unpack e))
-- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> Text
+attrValue :: Text -> Element -> Text
attrValue attr elt =
- maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
-
-textContent :: Element -> Text
-textContent = T.pack . strContent
+ fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
-- exceptT :: PandocMonad m => Either PandocError a -> OPML m a
-- exceptT = either throwError return
@@ -111,11 +106,11 @@ parseBlock :: PandocMonad m => Content -> OPML m Blocks
parseBlock (Elem e) =
case qName (elName e) of
"ownerName" -> mempty <$ modify (\st ->
- st{opmlDocAuthors = [text $ textContent e]})
+ st{opmlDocAuthors = [text $ strContent e]})
"dateModified" -> mempty <$ modify (\st ->
- st{opmlDocDate = text $ textContent e})
+ st{opmlDocDate = text $ strContent e})
"title" -> mempty <$ modify (\st ->
- st{opmlDocTitle = text $ textContent e})
+ st{opmlDocTitle = text $ strContent e})
"outline" -> gets opmlSectionLevel >>= sect . (+1)
"?xml" -> return mempty
_ -> getBlocks e
diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs
index 85308deb1..c274b6fd4 100644
--- a/src/Text/Pandoc/Readers/Odt.hs
+++ b/src/Text/Pandoc/Readers/Odt.hs
@@ -14,8 +14,7 @@ Entry point to the odt reader.
module Text.Pandoc.Readers.Odt ( readOdt ) where
import Codec.Archive.Zip
-import qualified Text.XML.Light as XML
-import Text.Pandoc.XMLParser (parseXMLElement)
+import Text.Pandoc.XML.Light
import qualified Data.ByteString.Lazy as B
@@ -91,7 +90,7 @@ archiveToOdt archive = do
--
-entryToXmlElem :: Entry -> Either PandocError XML.Element
+entryToXmlElem :: Entry -> Either PandocError Element
entryToXmlElem entry =
case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of
Right x -> Right x
diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
index 43c44e7e9..df90880fa 100644
--- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs
@@ -29,14 +29,14 @@ import Control.Monad ((<=<))
import qualified Data.ByteString.Lazy as B
import Data.Foldable (fold)
-import Data.List (find, stripPrefix)
+import Data.List (find)
import qualified Data.Map as M
import qualified Data.Text as T
import Data.Maybe
import Data.Semigroup (First(..), Option(..))
import Text.TeXMath (readMathML, writeTeX)
-import qualified Text.XML.Light as XML
+import qualified Text.Pandoc.XML.Light as XML
import Text.Pandoc.Builder hiding (underline)
import Text.Pandoc.MediaBag (MediaBag, insertMedia)
@@ -557,7 +557,7 @@ read_plain_text = fst ^&&& read_plain_text' >>% recover
>>?% mappend
--
extractText :: XML.Content -> Fallible T.Text
- extractText (XML.Text cData) = succeedWith (T.pack $ XML.cdData cData)
+ extractText (XML.Text cData) = succeedWith (XML.cdData cData)
extractText _ = failEmpty
read_text_seq :: InlineMatcher
@@ -777,14 +777,14 @@ read_frame_img =
"" -> returnV mempty -< ()
src' -> do
let exts = extensionsFromList [Ext_auto_identifiers]
- resource <- lookupResource -< src'
+ resource <- lookupResource -< T.unpack src'
_ <- updateMediaWithResource -< resource
w <- findAttrText' NsSVG "width" -< ()
h <- findAttrText' NsSVG "height" -< ()
titleNodes <- matchChildContent' [ read_frame_title ] -< ()
alt <- matchChildContent [] read_plain_text -< ()
arr (firstMatch . uncurry4 imageWith) -<
- (image_attributes w h, T.pack src', inlineListToIdentifier exts (toList titleNodes), alt)
+ (image_attributes w h, src', inlineListToIdentifier exts (toList titleNodes), alt)
read_frame_title :: InlineMatcher
read_frame_title = matchingElement NsSVG "title" (matchChildContent [] read_plain_text)
@@ -804,7 +804,8 @@ read_frame_mathml =
case fold src of
"" -> returnV mempty -< ()
src' -> do
- let path = fromMaybe src' (stripPrefix "./" src') ++ "/content.xml"
+ let path = T.unpack $
+ fromMaybe src' (T.stripPrefix "./" src') <> "/content.xml"
(_, mathml) <- lookupResource -< path
case readMathML (UTF8.toText $ B.toStrict mathml) of
Left _ -> returnV mempty -< ()
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
index 77174c793..78a7fc0b2 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs
@@ -14,9 +14,10 @@ typesafe Haskell namespace identifiers and unsafe "real world" namespaces.
module Text.Pandoc.Readers.Odt.Generic.Namespaces where
import qualified Data.Map as M
+import Data.Text (Text)
--
-type NameSpaceIRI = String
+type NameSpaceIRI = Text
--
type NameSpaceIRIs nsID = M.Map nsID NameSpaceIRI
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
index 6dc56a0d9..edefe3c70 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs
@@ -20,7 +20,6 @@ module Text.Pandoc.Readers.Odt.Generic.Utils
, reverseComposition
, tryToRead
, Lookupable(..)
-, readLookupables
, readLookupable
, readPercent
, findBy
@@ -30,11 +29,11 @@ module Text.Pandoc.Readers.Odt.Generic.Utils
import Control.Category (Category, (<<<), (>>>))
import qualified Control.Category as Cat (id)
-import Control.Monad (msum)
-
+import Data.Char (isSpace)
import qualified Data.Foldable as F (Foldable, foldr)
import Data.Maybe
-
+import Data.Text (Text)
+import qualified Data.Text as T
-- | Equivalent to
-- > foldr (.) id
@@ -76,8 +75,8 @@ swing = flip.(.flip id)
-- (nobody wants that) while the latter returns "to much" for simple purposes.
-- This function instead applies 'reads' and returns the first match (if any)
-- in a 'Maybe'.
-tryToRead :: (Read r) => String -> Maybe r
-tryToRead = reads >>> listToMaybe >>> fmap fst
+tryToRead :: (Read r) => Text -> Maybe r
+tryToRead = (reads . T.unpack) >>> listToMaybe >>> fmap fst
-- | A version of 'reads' that requires a '%' sign after the number
readPercent :: ReadS Int
@@ -88,26 +87,12 @@ readPercent s = [ (i,s') | (i , r ) <- reads s
-- | Data that can be looked up.
-- This is mostly a utility to read data with kind *.
class Lookupable a where
- lookupTable :: [(String, a)]
-
--- | The idea is to use this function as if there was a declaration like
---
--- > instance (Lookupable a) => (Read a) where
--- > readsPrec _ = readLookupables
--- .
--- But including this code in this form would need UndecideableInstances.
--- That is a bad idea. Luckily 'readLookupable' (without the s at the end)
--- can be used directly in almost any case.
-readLookupables :: (Lookupable a) => String -> [(a,String)]
-readLookupables s = [ (a,rest) | (word,rest) <- lex s,
- a <- maybeToList (lookup word lookupTable)
- ]
+ lookupTable :: [(Text, a)]
-- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer.
-readLookupable :: (Lookupable a) => String -> Maybe a
-readLookupable s = msum
- $ map ((`lookup` lookupTable).fst)
- $ lex s
+readLookupable :: (Lookupable a) => Text -> Maybe a
+readLookupable s =
+ lookup (T.takeWhile (not . isSpace) $ T.dropWhile isSpace s) lookupTable
uncurry3 :: (a->b->c -> z) -> (a,b,c ) -> z
uncurry4 :: (a->b->c->d -> z) -> (a,b,c,d ) -> z
diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
index 00c636a0d..0d921e23b 100644
--- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
+++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
@@ -60,11 +61,11 @@ import Control.Arrow
import Data.Bool ( bool )
import Data.Either ( rights )
import qualified Data.Map as M
-import qualified Data.Text as T
+import Data.Text (Text)
import Data.Default
import Data.Maybe
-import qualified Text.XML.Light as XML
+import qualified Text.Pandoc.XML.Light as XML
import Text.Pandoc.Readers.Odt.Arrows.State
import Text.Pandoc.Readers.Odt.Arrows.Utils
@@ -78,13 +79,13 @@ import Text.Pandoc.Readers.Odt.Generic.Fallible
--------------------------------------------------------------------------------
--
-type ElementName = String
-type AttributeName = String
-type AttributeValue = String
-type TextAttributeValue = T.Text
+type ElementName = Text
+type AttributeName = Text
+type AttributeValue = Text
+type TextAttributeValue = Text
--
-type NameSpacePrefix = String
+type NameSpacePrefix = Text
--
type NameSpacePrefixes nsID = M.Map nsID NameSpacePrefix
@@ -461,7 +462,7 @@ lookupDefaultingAttr :: (NameSpaceID nsID, Lookupable a, Default a)
lookupDefaultingAttr nsID attrName
= lookupAttrWithDefault nsID attrName def
--- | Return value as a (Maybe String)
+-- | Return value as a (Maybe Text)
findAttr' :: (NameSpaceID nsID)
=> nsID -> AttributeName
-> XMLConverter nsID extraState x (Maybe AttributeValue)
@@ -477,7 +478,6 @@ findAttrText' nsID attrName
= qualifyName nsID attrName
&&& getCurrentElement
>>% XML.findAttr
- >>^ fmap T.pack
-- | Return value as string or fail
findAttr :: (NameSpaceID nsID)
@@ -492,7 +492,6 @@ findAttrText :: (NameSpaceID nsID)
-> FallibleXMLConverter nsID extraState x TextAttributeValue
findAttrText nsID attrName
= findAttr' nsID attrName
- >>^ fmap T.pack
>>> maybeToChoice
-- | Return value as string or return provided default value
@@ -511,7 +510,7 @@ findAttrTextWithDefault :: (NameSpaceID nsID)
-> XMLConverter nsID extraState x TextAttributeValue
findAttrTextWithDefault nsID attrName deflt
= findAttr' nsID attrName
- >>^ maybe deflt T.pack
+ >>^ fromMaybe deflt
-- | Read and return value or fail
readAttr :: (NameSpaceID nsID, Read attrValue)
@@ -748,7 +747,7 @@ matchContent lookups fallback
-- Internals
--------------------------------------------------------------------------------
-stringToBool' :: String -> Maybe Bool
+stringToBool' :: Text -> Maybe Bool
stringToBool' val | val `elem` trueValues = Just True
| val `elem` falseValues = Just False
| otherwise = Nothing
diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Namespaces.hs
index 3a24a1162..70741c28d 100644
--- a/src/Text/Pandoc/Readers/Odt/Namespaces.hs
+++ b/src/Text/Pandoc/Readers/Odt/Namespaces.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Reader.Odt.Namespaces
Copyright : Copyright (C) 2015 Martin Linnemann
@@ -13,10 +14,10 @@ Namespaces used in odt files.
module Text.Pandoc.Readers.Odt.Namespaces ( Namespace (..)
) where
-import Data.List (isPrefixOf)
import qualified Data.Map as M (empty, insert)
import Data.Maybe (fromMaybe, listToMaybe)
-
+import Data.Text (Text)
+import qualified Data.Text as T
import Text.Pandoc.Readers.Odt.Generic.Namespaces
@@ -30,7 +31,7 @@ instance NameSpaceID Namespace where
findID :: NameSpaceIRI -> Maybe Namespace
-findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `isPrefixOf` iri]
+findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `T.isPrefixOf` iri]
nsIDmap :: NameSpaceIRIs Namespace
nsIDmap = foldr (uncurry $ flip M.insert) M.empty nsIDs
@@ -54,12 +55,12 @@ data Namespace = -- Open Document core
-- Core XML (basically only for the 'id'-attribute)
| NsXML
-- Fallback
- | NsOther String
+ | NsOther Text
deriving ( Eq, Ord, Show )
-- | Not the actual iri's, but large prefixes of them - this way there are
-- less versioning problems and the like.
-nsIDs :: [(String,Namespace)]
+nsIDs :: [(Text, Namespace)]
nsIDs = [
("urn:oasis:names:tc:opendocument:xmlns:animation" , NsAnim ),
("urn:oasis:names:tc:opendocument:xmlns:chart" , NsChart ),
diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
index 46a777df1..5e10f896c 100644
--- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs
+++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs
@@ -2,6 +2,7 @@
{-# LANGUAGE Arrows #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Odt.StyleReader
Copyright : Copyright (C) 2015 Martin Linnemann
@@ -46,11 +47,13 @@ import qualified Data.Foldable as F
import Data.List (unfoldr)
import qualified Data.Map as M
import Data.Maybe
+import Data.Text (Text)
+import qualified Data.Text as T
import qualified Data.Set as S
-import qualified Text.XML.Light as XML
+import qualified Text.Pandoc.XML.Light as XML
-import Text.Pandoc.Shared (safeRead)
+import Text.Pandoc.Shared (safeRead, tshow)
import Text.Pandoc.Readers.Odt.Arrows.Utils
@@ -90,7 +93,7 @@ instance Default FontPitch where
--
-- Thus, we want
-type FontFaceName = String
+type FontFaceName = Text
type FontPitches = M.Map FontFaceName FontPitch
@@ -151,7 +154,7 @@ findPitch = ( lookupAttr NsStyle "font-pitch"
-- Definitions of main data
--------------------------------------------------------------------------------
-type StyleName = String
+type StyleName = Text
-- | There are two types of styles: named styles with a style family and an
-- optional style parent, and default styles for each style family,
@@ -355,8 +358,8 @@ getListLevelStyle level ListStyle{..} =
-- \^ simpler, but in general less efficient
data ListLevelStyle = ListLevelStyle { listLevelType :: ListLevelType
- , listItemPrefix :: Maybe String
- , listItemSuffix :: Maybe String
+ , listItemPrefix :: Maybe Text
+ , listItemSuffix :: Maybe Text
, listItemFormat :: ListItemNumberFormat
, listItemStart :: Int
}
@@ -366,9 +369,9 @@ instance Show ListLevelStyle where
show ListLevelStyle{..} = "<LLS|"
++ show listLevelType
++ "|"
- ++ maybeToString listItemPrefix
+ ++ maybeToString (T.unpack <$> listItemPrefix)
++ show listItemFormat
- ++ maybeToString listItemSuffix
+ ++ maybeToString (T.unpack <$> listItemSuffix)
++ ">"
where maybeToString = fromMaybe ""
@@ -471,7 +474,7 @@ readTextProperties =
)
where isFontEmphasised = [("normal",False),("italic",True),("oblique",True)]
isFontBold = ("normal",False):("bold",True)
- :map ((,True).show) ([100,200..900]::[Int])
+ :map ((,True) . tshow) ([100,200..900]::[Int])
readUnderlineMode :: StyleReaderSafe _x (Maybe UnderlineMode)
readUnderlineMode = readLineMode "text-underline-mode"
@@ -481,7 +484,7 @@ readStrikeThroughMode :: StyleReaderSafe _x (Maybe UnderlineMode)
readStrikeThroughMode = readLineMode "text-line-through-mode"
"text-line-through-style"
-readLineMode :: String -> String -> StyleReaderSafe _x (Maybe UnderlineMode)
+readLineMode :: Text -> Text -> StyleReaderSafe _x (Maybe UnderlineMode)
readLineMode modeAttr styleAttr = proc x -> do
isUL <- searchAttr NsStyle styleAttr False isLinePresent -< x
mode <- lookupAttr' NsStyle modeAttr -< x