aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <fiddlosopher@gmail.com>2012-05-07 10:02:48 -0700
committerJohn MacFarlane <fiddlosopher@gmail.com>2012-05-07 10:02:48 -0700
commit8d6cc370d4dda452d82e60a1c1fc44aee166eba6 (patch)
tree69f40fee09bdf79fbed4b9965b32ec28743b0579 /src
parent233c71b6a9a030d9913890ad58b0eb9b8b543385 (diff)
downloadpandoc-8d6cc370d4dda452d82e60a1c1fc44aee166eba6.tar.gz
DocBook reader: Added epigraph, fixed entities in plain contexts.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs42
1 files changed, 32 insertions, 10 deletions
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
index 6dbfa3192..7ddca20ff 100644
--- a/src/Text/Pandoc/Readers/DocBook.hs
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -5,6 +5,7 @@ import Text.Pandoc.Definition
import Text.Pandoc.Builder
import Text.XML.Light
import Text.HTML.TagSoup.Entity (lookupEntity)
+import Data.Generics
import Data.Monoid
import Data.Char (isSpace)
import Control.Monad.State
@@ -121,7 +122,7 @@ List of all DocBook tags, with [x] indicating implemented,
[x] entry - A cell in a table
[ ] entrytbl - A subtable appearing in place of an Entry in a table
[ ] envar - A software environment variable
-[ ] epigraph - A short inscription at the beginning of a document or component
+[x] epigraph - A short inscription at the beginning of a document or component
note: also handle embedded attribution tag
[ ] equation - A displayed mathematical equation
[ ] errorcode - An error code
@@ -507,7 +508,7 @@ readDocBook st inp = setTitle (dbDocTitle st')
$ setAuthors (dbDocAuthors st')
$ setDate (dbDocDate st')
$ doc $ mconcat bs
- where (bs, st') = runState (mapM parseBlock $ parseXML inp)
+ where (bs, st') = runState (mapM parseBlock $ normalizeTree $ parseXML inp)
DBState{ dbSectionLevel = 0
, dbQuoteType = DoubleQuote
, dbDocTitle = mempty
@@ -516,6 +517,25 @@ readDocBook st inp = setTitle (dbDocTitle st')
, dbBook = False
}
+-- normalize input, consolidating adjacent Text and CRef elements
+normalizeTree :: [Content] -> [Content]
+normalizeTree = everywhere (mkT go)
+ where go :: [Content] -> [Content]
+ go (Text (CData CDataRaw _ _):xs) = xs
+ go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) =
+ Text (CData CDataText (s1 ++ s2) z):xs
+ go (Text (CData CDataText s1 z):CRef r:xs) =
+ Text (CData CDataText (s1 ++ [c]) z):xs
+ where c = maybe '?' id (lookupEntity r)
+ go (CRef r:Text (CData CDataText s1 z):xs) =
+ Text (CData CDataText ([c] ++ s1) z):xs
+ where c = maybe '?' id (lookupEntity r)
+ go (CRef r1:CRef r2:xs) =
+ Text (CData CDataText [c1,c2] Nothing):xs
+ where c1 = maybe '?' id (lookupEntity r1)
+ c2 = maybe '?' id (lookupEntity r2)
+ go xs = xs
+
-- convenience function to get an attribute value, defaulting to ""
attrValue :: String -> Element -> String
attrValue attr elt =
@@ -553,19 +573,14 @@ parseBlock :: Content -> DB Blocks
parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
parseBlock (Text (CData _ s _)) = if all isSpace s
then return mempty
- else return $ plain $ text s
+ else return $ plain $ trimInlines $ text s
parseBlock (CRef _) = return mempty -- TODO need something better here
parseBlock (Elem e) =
case qName (elName e) of
"para" -> para <$> getInlines e
"ackno" -> para <$> getInlines e
- "blockquote" -> do
- attrib <- case filterChild (named "attribution") e of
- Nothing -> return mempty
- Just z -> (para . (str "— " <>) . mconcat)
- <$> (mapM parseInline $ elContent z)
- contents <- getBlocks e
- return $ blockQuote (contents <> attrib)
+ "epigraph" -> parseBlockquote
+ "blockquote" -> parseBlockquote
"attribution" -> return mempty
"titleabbrev" -> return mempty
"authorinitials" -> return mempty
@@ -618,6 +633,13 @@ parseBlock (Elem e) =
skipWhite (Text (CData _ s _):xs) | all isSpace s = skipWhite xs
| otherwise = xs
skipWhite xs = xs
+ parseBlockquote = do
+ attrib <- case filterChild (named "attribution") e of
+ Nothing -> return mempty
+ Just z -> (para . (str "— " <>) . mconcat)
+ <$> (mapM parseInline $ elContent z)
+ contents <- getBlocks e
+ return $ blockQuote (contents <> attrib)
listitems = mapM getBlocks $ filterChildren (named "listitem") e
deflistitems = mapM parseVarListEntry $ filterChildren
(named "varlistentry") e