aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/DocBook.hs
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-02-14 22:29:21 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2021-02-16 16:55:20 -0800
commit967e7f5fb990b29de48b37be1db40fb149a8cf55 (patch)
treeb9f903a5f2af14f20e769903e80659b9bffd59ff /src/Text/Pandoc/Readers/DocBook.hs
parentb5b576184c3c1668aad0c904e186136b81a0dd54 (diff)
downloadpandoc-967e7f5fb990b29de48b37be1db40fb149a8cf55.tar.gz
Rename Text.Pandoc.XMLParser -> Text.Pandoc.XML.Light...
..and add new definitions isomorphic to xml-light's, but with Text instead of String. This allows us to keep most of the code in existing readers that use xml-light, but avoid lots of unnecessary allocation. We also add versions of the functions from xml-light's Text.XML.Light.Output and Text.XML.Light.Proc that operate on our modified XML types, and functions that convert xml-light types to our types (since some of our dependencies, like texmath, use xml-light). Update golden tests for docx and pptx. OOXML test: Use `showContent` instead of `ppContent` in `displayDiff`. Docx: Do a manual traversal to unwrap sdt and smartTag. This is faster, and needed to pass the tests. Benchmarks: A = prior to 8ca191604dcd13af27c11d2da225da646ebce6fc (Feb 8) B = as of 8ca191604dcd13af27c11d2da225da646ebce6fc (Feb 8) C = this commit | Reader | A | B | C | | ------- | ----- | ------ | ----- | | docbook | 18 ms | 12 ms | 10 ms | | opml | 65 ms | 62 ms | 35 ms | | jats | 15 ms | 11 ms | 9 ms | | docx | 72 ms | 69 ms | 44 ms | | odt | 78 ms | 41 ms | 28 ms | | epub | 64 ms | 61 ms | 56 ms | | fb2 | 14 ms | 5 ms | 4 ms |
Diffstat (limited to 'src/Text/Pandoc/Readers/DocBook.hs')
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs76
1 files changed, 38 insertions, 38 deletions
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index ad0108843..e201b54fe 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -31,8 +31,7 @@ import Text.Pandoc.Options
import Text.Pandoc.Logging (LogMessage(..))
import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces)
import Text.TeXMath (readMathML, writeTeX)
-import Text.XML.Light
-import Text.Pandoc.XMLParser (parseXMLContents)
+import Text.Pandoc.XML.Light
{-
@@ -578,26 +577,27 @@ normalizeTree = everywhere (mkT go)
where go :: [Content] -> [Content]
go (Text (CData CDataRaw _ _):xs) = xs
go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
- Text (CData CDataText (s1 ++ s2) z):xs
+ Text (CData CDataText (s1 <> s2) z):xs
go (Text (CData CDataText s1 z):CRef r:xs) =
- Text (CData CDataText (s1 ++ convertEntity r) z):xs
+ Text (CData CDataText (s1 <> convertEntity r) z):xs
go (CRef r:Text (CData CDataText s1 z):xs) =
- Text (CData CDataText (convertEntity r ++ s1) z):xs
+ Text (CData CDataText (convertEntity r <> s1) z):xs
go (CRef r1:CRef r2:xs) =
- Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs
+ Text (CData CDataText (convertEntity r1 <>
+ convertEntity r2) Nothing):xs
go xs = xs
-convertEntity :: String -> String
-convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e)
+convertEntity :: Text -> Text
+convertEntity e = maybe (T.map toUpper e) T.pack (lookupEntity $ T.unpack e)
-- convenience function to get an attribute value, defaulting to ""
-attrValue :: String -> Element -> Text
+attrValue :: Text -> Element -> Text
attrValue attr elt =
- maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
+ fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt))
-- convenience function
named :: Text -> Element -> Bool
-named s e = qName (elName e) == T.unpack s
+named s e = qName (elName e) == s
--
@@ -634,7 +634,7 @@ isBlockElement :: Content -> Bool
isBlockElement (Elem e) = qName (elName e) `elem` blockTags
isBlockElement _ = False
-blockTags :: [String]
+blockTags :: [Text]
blockTags =
[ "abstract"
, "ackno"
@@ -721,7 +721,7 @@ blockTags =
, "variablelist"
] ++ admonitionTags
-admonitionTags :: [String]
+admonitionTags :: [Text]
admonitionTags = ["important","caution","note","tip","warning"]
-- Trim leading and trailing newline characters
@@ -779,10 +779,10 @@ getBlocks e = mconcat <$>
parseBlock :: PandocMonad m => Content -> DB m Blocks
parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
-parseBlock (Text (CData _ s _)) = if all isSpace s
+parseBlock (Text (CData _ s _)) = if T.all isSpace s
then return mempty
- else return $ plain $ trimInlines $ text $ T.pack s
-parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x
+ else return $ plain $ trimInlines $ text s
+parseBlock (CRef x) = return $ plain $ str $ T.toUpper x
parseBlock (Elem e) =
case qName (elName e) of
"toc" -> skip -- skip TOC, since in pandoc it's autogenerated
@@ -837,7 +837,7 @@ parseBlock (Elem e) =
"refsect2" -> sect 2
"refsect3" -> sect 3
"refsection" -> gets dbSectionLevel >>= sect . (+1)
- l | l `elem` admonitionTags -> parseAdmonition $ T.pack l
+ l | l `elem` admonitionTags -> parseAdmonition l
"area" -> skip
"areaset" -> skip
"areaspec" -> skip
@@ -899,7 +899,7 @@ parseBlock (Elem e) =
"subtitle" -> return mempty -- handled in parent element
_ -> skip >> getBlocks e
where skip = do
- let qn = T.pack $ qName $ elName e
+ let qn = qName $ elName e
let name = if "pi-" `T.isPrefixOf` qn
then "<?" <> qn <> "?>"
else qn
@@ -911,7 +911,7 @@ parseBlock (Elem e) =
"" -> []
x -> [x]
return $ codeBlockWith (attrValue "id" e, classes', [])
- $ trimNl $ T.pack $ strContentRecursive e
+ $ trimNl $ strContentRecursive e
parseBlockquote = do
attrib <- case filterChild (named "attribution") e of
Nothing -> return mempty
@@ -965,7 +965,7 @@ parseBlock (Elem e) =
w <- findAttr (unqual "colwidth") c
n <- safeRead $ "0" <> T.filter (\x ->
(x >= '0' && x <= '9')
- || x == '.') (T.pack w)
+ || x == '.') w
if n > 0 then Just n else Nothing
let numrows = case bodyrows of
[] -> 0
@@ -1048,12 +1048,12 @@ parseMixed container conts = do
x <- parseMixed container rs
return $ p <> b <> x
-parseRow :: PandocMonad m => [String] -> Element -> DB m [Cell]
+parseRow :: PandocMonad m => [Text] -> Element -> DB m [Cell]
parseRow cn = do
let isEntry x = named "entry" x || named "td" x || named "th" x
mapM (parseEntry cn) . filterChildren isEntry
-parseEntry :: PandocMonad m => [String] -> Element -> DB m Cell
+parseEntry :: PandocMonad m => [Text] -> Element -> DB m Cell
parseEntry cn el = do
let colDistance sa ea = do
let iStrt = elemIndex sa cn
@@ -1075,7 +1075,7 @@ getInlines :: PandocMonad m => Element -> DB m Inlines
getInlines e' = trimInlines . mconcat <$>
mapM parseInline (elContent e')
-strContentRecursive :: Element -> String
+strContentRecursive :: Element -> Text
strContentRecursive = strContent .
(\e' -> e'{ elContent = map elementToStr $ elContent e' })
@@ -1084,9 +1084,9 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing
elementToStr x = x
parseInline :: PandocMonad m => Content -> DB m Inlines
-parseInline (Text (CData _ s _)) = return $ text $ T.pack s
+parseInline (Text (CData _ s _)) = return $ text s
parseInline (CRef ref) =
- return $ text $ maybe (T.toUpper $ T.pack ref) T.pack $ lookupEntity ref
+ return $ text $ maybe (T.toUpper ref) T.pack $ lookupEntity (T.unpack ref)
parseInline (Elem e) =
case qName (elName e) of
"anchor" -> do
@@ -1138,7 +1138,7 @@ parseInline (Elem e) =
"userinput" -> codeWithLang
"systemitem" -> codeWithLang
"varargs" -> return $ code "(...)"
- "keycap" -> return (str $ T.pack $ strContent e)
+ "keycap" -> return (str $ strContent e)
"keycombo" -> keycombo <$>
mapM parseInline (elContent e)
"menuchoice" -> menuchoice <$>
@@ -1150,17 +1150,17 @@ parseInline (Elem e) =
let title = case attrValue "endterm" e of
"" -> maybe "???" xrefTitleByElem
(findElementById linkend content)
- endterm -> maybe "???" (T.pack . strContent)
+ endterm -> maybe "???" strContent
(findElementById endterm content)
return $ link ("#" <> linkend) "" (text title)
- "email" -> return $ link ("mailto:" <> T.pack (strContent e)) ""
- $ str $ T.pack $ strContent e
- "uri" -> return $ link (T.pack $ strContent e) "" $ str $ T.pack $ strContent e
+ "email" -> return $ link ("mailto:" <> strContent e) ""
+ $ str $ strContent e
+ "uri" -> return $ link (strContent e) "" $ str $ strContent e
"ulink" -> innerInlines (link (attrValue "url" e) "")
"link" -> do
ils <- innerInlines id
let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of
- Just h -> T.pack h
+ Just h -> h
_ -> "#" <> attrValue "linkend" e
let ils' = if ils == mempty then str href else ils
let attr = (attrValue "id" e, T.words $ attrValue "role" e, [])
@@ -1180,7 +1180,7 @@ parseInline (Elem e) =
"pi-asciidoc-br" -> return linebreak
_ -> skip >> innerInlines id
where skip = do
- let qn = T.pack $ qName $ elName e
+ let qn = qName $ elName e
let name = if "pi-" `T.isPrefixOf` qn
then "<?" <> qn <> "?>"
else qn
@@ -1193,7 +1193,7 @@ parseInline (Elem e) =
let classes' = case attrValue "language" e of
"" -> []
l -> [l]
- return $ codeWith (attrValue "id" e,classes',[]) $ T.pack $ strContentRecursive e
+ return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e
simpleList = mconcat . intersperse (str "," <> space) <$> mapM getInlines
(filterChildren (named "member") e)
segmentedList = do
@@ -1234,10 +1234,10 @@ parseInline (Elem e) =
"sect5" -> descendantContent "title" el
"cmdsynopsis" -> descendantContent "command" el
"funcsynopsis" -> descendantContent "function" el
- _ -> T.pack $ qName (elName el) ++ "_title"
+ _ -> qName (elName el) <> "_title"
where
xrefLabel = attrValue "xreflabel" el
- descendantContent name = maybe "???" (T.pack . strContent)
+ descendantContent name = maybe "???" strContent
. filterElementName (\n -> qName n == name)
-- | Extract a math equation from an element
@@ -1258,7 +1258,7 @@ equation e constructor =
mathMLEquations :: [Text]
mathMLEquations = map writeTeX $ rights $ readMath
(\x -> qName (elName x) == "math" && qPrefix (elName x) == Just "mml")
- (readMathML . T.pack . showElement)
+ (readMathML . showElement)
latexEquations :: [Text]
latexEquations = readMath (\x -> qName (elName x) == "mathphrase")
@@ -1272,8 +1272,8 @@ equation e constructor =
-- | Get the actual text stored in a CData block. 'showContent'
-- returns the text still surrounded by the [[CDATA]] tags.
showVerbatimCData :: Content -> Text
-showVerbatimCData (Text (CData _ d _)) = T.pack d
-showVerbatimCData c = T.pack $ showContent c
+showVerbatimCData (Text (CData _ d _)) = d
+showVerbatimCData c = showContent c
-- | Set the prefix of a name to 'Nothing'