blob: 5cb41638a7bf9eea855e99c433bc52b0ece446d1 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
|
module Text.Pandoc.Readers.DocBook ( readDocBook ) where
import Text.Pandoc.Parsing (ParserState(..))
import Text.Pandoc.Definition
import Text.Pandoc.Builder
import Text.XML.Light
import Data.Monoid
import Data.Char (isSpace)
import Control.Monad.State
import Control.Applicative ((<$>))
type DB = State DBState
data DBState = DBState{ dbSectionLevel :: Int
, dbDocTitle :: Inlines
, dbDocAuthors :: [Inlines]
, dbDocDate :: Inlines
} deriving Show
readDocBook :: ParserState -> String -> Pandoc
readDocBook st inp = setTitle (dbDocTitle st')
$ setAuthors (dbDocAuthors st')
$ setDate (dbDocDate st')
$ doc $ mconcat bs
where (bs, st') = runState (mapM parseBlock $ parseXML inp)
DBState{ dbSectionLevel = 0
, dbDocTitle = mempty
, dbDocAuthors = []
, dbDocDate = mempty
}
parseBlock :: Content -> DB Blocks
parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
parseBlock (Text (CData _ s _)) = if all isSpace s
then return mempty
else return $ plain $ text s
parseBlock (Elem e) =
case qName (elName e) of
"para" -> para <$> getInlines e
"blockquote" -> blockQuote <$> getBlocks e
"sect1" -> sect 1
"sect2" -> sect 2
"sect3" -> sect 3
"sect4" -> sect 4
"sect5" -> sect 5
"sect6" -> sect 6
"section" -> gets dbSectionLevel >>= sect . (+1)
"itemizedlist" -> bulletList <$> listitems
"articleinfo" -> getTitle >> getAuthors >> getDate >> return mempty
"programlisting" -> return $ codeBlock $ strContent e
"title" -> return mempty -- processed by sect
"?xml" -> return mempty
_ -> getBlocks e
where getBlocks e' = mconcat <$> (mapM parseBlock $ elContent e')
getInlines e' = (trimInlines . mconcat) <$>
(mapM parseInline $ elContent e')
isTitle e' = qName (elName e') == "title"
skipWhite (Text (CData _ s _):xs) | all isSpace s = skipWhite xs
| otherwise = xs
skipWhite xs = xs
listitems = mapM getBlocks $ findChildren (unqual "listitem") e
getTitle = case findChild (unqual "title") e of
Just t -> do
tit <- getInlines t
modify $ \st -> st{dbDocTitle = tit}
Nothing -> return ()
getAuthors = do
auths <- mapM getInlines
$ findChildren (unqual "author") e
modify $ \st -> st{dbDocAuthors = auths}
getDate = case findChild (unqual "date") e of
Just t -> do
dat <- getInlines t
modify $ \st -> st{dbDocDate = dat}
Nothing -> return ()
sect n = case skipWhite (elContent e) of
((Elem t):body)
| isTitle t -> do
h <- header n <$> (getInlines t)
modify $ \st -> st{ dbSectionLevel = n }
b <- mconcat <$> (mapM parseBlock body)
modify $ \st -> st{ dbSectionLevel = n - 1 }
return $ h <> b
body -> do
let h = header n mempty
modify $ \st -> st{ dbSectionLevel = n }
b <- mconcat <$> (mapM parseBlock body)
modify $ \st -> st{ dbSectionLevel = n - 1 }
return $ h <> b
parseBlock (CRef _) = return mempty
parseInline :: Content -> DB Inlines
parseInline (Text (CData _ s _)) = return $ text s
parseInline (Elem e) =
case qName (elName e) of
"subscript" -> subscript <$> innerInlines
"superscript" -> superscript <$> innerInlines
"emphasis" -> case lookupAttrBy (\attr -> qName attr == "role")
(elAttribs e) of
Just "strong" -> strong <$> innerInlines
_ -> emph <$> innerInlines
"footnote" -> (note . mconcat) <$> (mapM parseBlock $ elContent e)
_ -> innerInlines
where innerInlines = (trimInlines . mconcat) <$>
(mapM parseInline $ elContent e)
parseInline (CRef _) = return mempty
|