aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/DocBook.hs
blob: 8da933a2f554cb0f0f8d77307dde8d0e4b5a4955 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
module Text.Pandoc.Readers.DocBook ( readDocBook ) where
import Text.Pandoc.Parsing (ParserState(..))
import Text.Pandoc.Definition
import Text.Pandoc.Builder
import Text.XML.Light
import Data.Maybe (fromMaybe)
import Data.Monoid
import Data.Char (isSpace)
import Control.Monad.State
import Control.Applicative ((<$>))

type DB = State DBState

data DBState = DBState{ dbSectionLevel :: Int
                      , dbQuoteType    :: QuoteType
                      , dbDocTitle     :: Inlines
                      , dbDocAuthors   :: [Inlines]
                      , dbDocDate      :: Inlines
                      } deriving Show

readDocBook :: ParserState -> String -> Pandoc
readDocBook st inp = setTitle (dbDocTitle st')
                   $ setAuthors (dbDocAuthors st')
                   $ setDate (dbDocDate st')
                   $ doc $ mconcat bs
  where (bs, st') = runState (mapM parseBlock $ parseXML inp)
                             DBState{ dbSectionLevel = 0
                                    , dbQuoteType = DoubleQuote
                                    , dbDocTitle = mempty
                                    , dbDocAuthors = []
                                    , dbDocDate = mempty
                                    }

parseBlock :: Content -> DB Blocks
parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
parseBlock (Text (CData _ s _)) = if all isSpace s
                                     then return mempty
                                     else return $ plain $ text s
parseBlock (Elem e) =
  case qName (elName e) of
        "para"  -> para <$> getInlines e
        "blockquote" -> blockQuote <$> getBlocks e
        "sect1" -> sect 1
        "sect2" -> sect 2
        "sect3" -> sect 3
        "sect4" -> sect 4
        "sect5" -> sect 5
        "sect6" -> sect 6
        "section" -> gets dbSectionLevel >>= sect . (+1)
        "abstract" -> blockQuote <$> getBlocks e
        "itemizedlist" -> bulletList <$> listitems
        "orderedlist" -> orderedList <$> listitems -- TODO list attributes
        "articleinfo" -> getTitle >> getAuthors >> getDate >> return mempty
        "programlisting" -> return $ codeBlock $ strContent e  -- TODO attrs
        "?xml"  -> return mempty
        _       -> getBlocks e
   where getBlocks e' =  mconcat <$> (mapM parseBlock $ elContent e')
         getInlines e' = (trimInlines . mconcat) <$>
                       (mapM parseInline $ elContent e')
         isTitle e' = qName (elName e') == "title"
         skipWhite (Text (CData _ s _):xs) | all isSpace s = skipWhite xs
                                           | otherwise     = xs
         skipWhite xs = xs
         listitems = mapM getBlocks $ findChildren (unqual "listitem") e
         getTitle = case findChild (unqual "title") e of
                         Just t  -> do
                            tit <- getInlines t
                            modify $ \st -> st{dbDocTitle = tit}
                         Nothing -> return ()
         getAuthors = do
                      auths <- mapM getInlines
                               $ findChildren (unqual "author") e
                      modify $ \st -> st{dbDocAuthors = auths}
         getDate = case findChild (unqual "date") e of
                         Just t  -> do
                            dat <- getInlines t
                            modify $ \st -> st{dbDocDate = dat}
                         Nothing -> return ()
         sect n = case skipWhite (elContent e) of
                        ((Elem t):body)
                          | isTitle t -> do
                                  h <- header n <$> (getInlines t)
                                  modify $ \st -> st{ dbSectionLevel = n }
                                  b <- mconcat <$> (mapM parseBlock body)
                                  modify $ \st -> st{ dbSectionLevel = n - 1 }
                                  return $ h <> b
                        body      -> do
                                  let h = header n mempty
                                  modify $ \st -> st{ dbSectionLevel = n }
                                  b <- mconcat <$> (mapM parseBlock body)
                                  modify $ \st -> st{ dbSectionLevel = n - 1 }
                                  return $ h <> b
parseBlock (CRef _) = return mempty

parseInline :: Content -> DB Inlines
parseInline (Text (CData _ s _)) = return $ text s
parseInline (Elem e) =
  case qName (elName e) of
        "subscript" -> subscript <$> innerInlines
        "superscript" -> superscript <$> innerInlines
        "quote" -> do
            qt <- gets dbQuoteType
            let qt' = if qt == SingleQuote then DoubleQuote else SingleQuote
            modify $ \st -> st{ dbQuoteType = qt' }
            contents <- innerInlines
            modify $ \st -> st{ dbQuoteType = qt }
            return $ if qt == SingleQuote
                        then singleQuoted contents
                        else doubleQuoted contents
        "literal" -> return $ code $ strContent e -- TODO attrs
        "varname" -> return $ codeWith ("",["varname"],[]) $ strContent e
        "function" -> return $ codeWith ("",["function"],[]) $ strContent e
        "type"    -> return $ codeWith ("",["type"],[]) $ strContent e
        "symbol"  -> return $ codeWith ("",["symbol"],[]) $ strContent e
        "constant" -> return $ codeWith ("",["constant"],[]) $ strContent e
        "userinput" -> return $ codeWith ("",["userinput"],[]) $ strContent e
        "varargs" -> return $ str "(…)"
        "ulink" -> link
            (fromMaybe "" (lookupAttrBy (\attr -> qName attr == "url")
              (elAttribs e))) "" <$> innerInlines
        "emphasis" -> case lookupAttrBy (\attr -> qName attr == "role")
                           (elAttribs e) of
                             Just "strong" -> strong <$> innerInlines
                             _             -> emph <$> innerInlines
        "footnote" -> (note . mconcat) <$> (mapM parseBlock $ elContent e)
        _          -> innerInlines
   where innerInlines = (trimInlines . mconcat) <$>
                          (mapM parseInline $ elContent e)
parseInline (CRef _) = return mempty