aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/DocBook.hs
blob: 5cb41638a7bf9eea855e99c433bc52b0ece446d1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
module Text.Pandoc.Readers.DocBook ( readDocBook ) where
import Text.Pandoc.Parsing (ParserState(..))
import Text.Pandoc.Definition
import Text.Pandoc.Builder
import Text.XML.Light
import Data.Monoid
import Data.Char (isSpace)
import Control.Monad.State
import Control.Applicative ((<$>))

type DB = State DBState

data DBState = DBState{ dbSectionLevel :: Int
                      , dbDocTitle     :: Inlines
                      , dbDocAuthors   :: [Inlines]
                      , dbDocDate      :: Inlines
                      } deriving Show

readDocBook :: ParserState -> String -> Pandoc
readDocBook st inp = setTitle (dbDocTitle st')
                   $ setAuthors (dbDocAuthors st')
                   $ setDate (dbDocDate st')
                   $ doc $ mconcat bs
  where (bs, st') = runState (mapM parseBlock $ parseXML inp)
                             DBState{ dbSectionLevel = 0
                                    , dbDocTitle = mempty
                                    , dbDocAuthors = []
                                    , dbDocDate = mempty
                                    }

parseBlock :: Content -> DB Blocks
parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
parseBlock (Text (CData _ s _)) = if all isSpace s
                                     then return mempty
                                     else return $ plain $ text s
parseBlock (Elem e) =
  case qName (elName e) of
        "para"  -> para <$> getInlines e
        "blockquote" -> blockQuote <$> getBlocks e
        "sect1" -> sect 1
        "sect2" -> sect 2
        "sect3" -> sect 3
        "sect4" -> sect 4
        "sect5" -> sect 5
        "sect6" -> sect 6
        "section" -> gets dbSectionLevel >>= sect . (+1)
        "itemizedlist" -> bulletList <$> listitems
        "articleinfo" -> getTitle >> getAuthors >> getDate >> return mempty
        "programlisting" -> return $ codeBlock $ strContent e
        "title" -> return mempty -- processed by sect
        "?xml"  -> return mempty
        _       -> getBlocks e
   where getBlocks e' =  mconcat <$> (mapM parseBlock $ elContent e')
         getInlines e' = (trimInlines . mconcat) <$>
                       (mapM parseInline $ elContent e')
         isTitle e' = qName (elName e') == "title"
         skipWhite (Text (CData _ s _):xs) | all isSpace s = skipWhite xs
                                           | otherwise     = xs
         skipWhite xs = xs
         listitems = mapM getBlocks $ findChildren (unqual "listitem") e
         getTitle = case findChild (unqual "title") e of
                         Just t  -> do
                            tit <- getInlines t
                            modify $ \st -> st{dbDocTitle = tit}
                         Nothing -> return ()
         getAuthors = do
                      auths <- mapM getInlines
                               $ findChildren (unqual "author") e
                      modify $ \st -> st{dbDocAuthors = auths}
         getDate = case findChild (unqual "date") e of
                         Just t  -> do
                            dat <- getInlines t
                            modify $ \st -> st{dbDocDate = dat}
                         Nothing -> return ()
         sect n = case skipWhite (elContent e) of
                        ((Elem t):body)
                          | isTitle t -> do
                                  h <- header n <$> (getInlines t)
                                  modify $ \st -> st{ dbSectionLevel = n }
                                  b <- mconcat <$> (mapM parseBlock body)
                                  modify $ \st -> st{ dbSectionLevel = n - 1 }
                                  return $ h <> b
                        body      -> do
                                  let h = header n mempty
                                  modify $ \st -> st{ dbSectionLevel = n }
                                  b <- mconcat <$> (mapM parseBlock body)
                                  modify $ \st -> st{ dbSectionLevel = n - 1 }
                                  return $ h <> b
parseBlock (CRef _) = return mempty

parseInline :: Content -> DB Inlines
parseInline (Text (CData _ s _)) = return $ text s
parseInline (Elem e) =
  case qName (elName e) of
        "subscript" -> subscript <$> innerInlines
        "superscript" -> superscript <$> innerInlines
        "emphasis" -> case lookupAttrBy (\attr -> qName attr == "role")
                           (elAttribs e) of
                             Just "strong" -> strong <$> innerInlines
                             _             -> emph <$> innerInlines
        "footnote" -> (note . mconcat) <$> (mapM parseBlock $ elContent e)
        _          -> innerInlines
   where innerInlines = (trimInlines . mconcat) <$>
                          (mapM parseInline $ elContent e)
parseInline (CRef _) = return mempty