aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pandoc.cabal1
-rw-r--r--src/Text/Pandoc.hs2
-rw-r--r--src/Text/Pandoc/Readers/DocBook.hs130
-rw-r--r--src/pandoc.hs1
4 files changed, 134 insertions, 0 deletions
diff --git a/pandoc.cabal b/pandoc.cabal
index 444e737ae..9679c99ff 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -246,6 +246,7 @@ Library
Text.Pandoc.Readers.LaTeX,
Text.Pandoc.Readers.Markdown,
Text.Pandoc.Readers.RST,
+ Text.Pandoc.Readers.DocBook,
Text.Pandoc.Readers.TeXMath,
Text.Pandoc.Readers.Textile,
Text.Pandoc.Readers.Native,
diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index 597d2e07f..417362f02 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -119,6 +119,7 @@ import Text.Pandoc.Definition
import Text.Pandoc.Generic
import Text.Pandoc.Readers.Markdown
import Text.Pandoc.Readers.RST
+import Text.Pandoc.Readers.DocBook
import Text.Pandoc.Readers.LaTeX
import Text.Pandoc.Readers.HTML
import Text.Pandoc.Readers.Textile
@@ -162,6 +163,7 @@ readers = [("native" , \_ -> readNative)
,("rst" , readRST)
,("rst+lhs" , \st ->
readRST st{ stateLiterateHaskell = True})
+ ,("docbook" , readDocBook)
,("textile" , readTextile) -- TODO : textile+lhs
,("html" , readHtml)
,("latex" , readLaTeX)
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs
new file mode 100644
index 000000000..8da933a2f
--- /dev/null
+++ b/src/Text/Pandoc/Readers/DocBook.hs
@@ -0,0 +1,130 @@
+module Text.Pandoc.Readers.DocBook ( readDocBook ) where
+import Text.Pandoc.Parsing (ParserState(..))
+import Text.Pandoc.Definition
+import Text.Pandoc.Builder
+import Text.XML.Light
+import Data.Maybe (fromMaybe)
+import Data.Monoid
+import Data.Char (isSpace)
+import Control.Monad.State
+import Control.Applicative ((<$>))
+
+type DB = State DBState
+
+data DBState = DBState{ dbSectionLevel :: Int
+ , dbQuoteType :: QuoteType
+ , dbDocTitle :: Inlines
+ , dbDocAuthors :: [Inlines]
+ , dbDocDate :: Inlines
+ } deriving Show
+
+readDocBook :: ParserState -> String -> Pandoc
+readDocBook st inp = setTitle (dbDocTitle st')
+ $ setAuthors (dbDocAuthors st')
+ $ setDate (dbDocDate st')
+ $ doc $ mconcat bs
+ where (bs, st') = runState (mapM parseBlock $ parseXML inp)
+ DBState{ dbSectionLevel = 0
+ , dbQuoteType = DoubleQuote
+ , dbDocTitle = mempty
+ , dbDocAuthors = []
+ , dbDocDate = mempty
+ }
+
+parseBlock :: Content -> DB Blocks
+parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE
+parseBlock (Text (CData _ s _)) = if all isSpace s
+ then return mempty
+ else return $ plain $ text s
+parseBlock (Elem e) =
+ case qName (elName e) of
+ "para" -> para <$> getInlines e
+ "blockquote" -> blockQuote <$> getBlocks e
+ "sect1" -> sect 1
+ "sect2" -> sect 2
+ "sect3" -> sect 3
+ "sect4" -> sect 4
+ "sect5" -> sect 5
+ "sect6" -> sect 6
+ "section" -> gets dbSectionLevel >>= sect . (+1)
+ "abstract" -> blockQuote <$> getBlocks e
+ "itemizedlist" -> bulletList <$> listitems
+ "orderedlist" -> orderedList <$> listitems -- TODO list attributes
+ "articleinfo" -> getTitle >> getAuthors >> getDate >> return mempty
+ "programlisting" -> return $ codeBlock $ strContent e -- TODO attrs
+ "?xml" -> return mempty
+ _ -> getBlocks e
+ where getBlocks e' = mconcat <$> (mapM parseBlock $ elContent e')
+ getInlines e' = (trimInlines . mconcat) <$>
+ (mapM parseInline $ elContent e')
+ isTitle e' = qName (elName e') == "title"
+ skipWhite (Text (CData _ s _):xs) | all isSpace s = skipWhite xs
+ | otherwise = xs
+ skipWhite xs = xs
+ listitems = mapM getBlocks $ findChildren (unqual "listitem") e
+ getTitle = case findChild (unqual "title") e of
+ Just t -> do
+ tit <- getInlines t
+ modify $ \st -> st{dbDocTitle = tit}
+ Nothing -> return ()
+ getAuthors = do
+ auths <- mapM getInlines
+ $ findChildren (unqual "author") e
+ modify $ \st -> st{dbDocAuthors = auths}
+ getDate = case findChild (unqual "date") e of
+ Just t -> do
+ dat <- getInlines t
+ modify $ \st -> st{dbDocDate = dat}
+ Nothing -> return ()
+ sect n = case skipWhite (elContent e) of
+ ((Elem t):body)
+ | isTitle t -> do
+ h <- header n <$> (getInlines t)
+ modify $ \st -> st{ dbSectionLevel = n }
+ b <- mconcat <$> (mapM parseBlock body)
+ modify $ \st -> st{ dbSectionLevel = n - 1 }
+ return $ h <> b
+ body -> do
+ let h = header n mempty
+ modify $ \st -> st{ dbSectionLevel = n }
+ b <- mconcat <$> (mapM parseBlock body)
+ modify $ \st -> st{ dbSectionLevel = n - 1 }
+ return $ h <> b
+parseBlock (CRef _) = return mempty
+
+parseInline :: Content -> DB Inlines
+parseInline (Text (CData _ s _)) = return $ text s
+parseInline (Elem e) =
+ case qName (elName e) of
+ "subscript" -> subscript <$> innerInlines
+ "superscript" -> superscript <$> innerInlines
+ "quote" -> do
+ qt <- gets dbQuoteType
+ let qt' = if qt == SingleQuote then DoubleQuote else SingleQuote
+ modify $ \st -> st{ dbQuoteType = qt' }
+ contents <- innerInlines
+ modify $ \st -> st{ dbQuoteType = qt }
+ return $ if qt == SingleQuote
+ then singleQuoted contents
+ else doubleQuoted contents
+ "literal" -> return $ code $ strContent e -- TODO attrs
+ "varname" -> return $ codeWith ("",["varname"],[]) $ strContent e
+ "function" -> return $ codeWith ("",["function"],[]) $ strContent e
+ "type" -> return $ codeWith ("",["type"],[]) $ strContent e
+ "symbol" -> return $ codeWith ("",["symbol"],[]) $ strContent e
+ "constant" -> return $ codeWith ("",["constant"],[]) $ strContent e
+ "userinput" -> return $ codeWith ("",["userinput"],[]) $ strContent e
+ "varargs" -> return $ str "(…)"
+ "ulink" -> link
+ (fromMaybe "" (lookupAttrBy (\attr -> qName attr == "url")
+ (elAttribs e))) "" <$> innerInlines
+ "emphasis" -> case lookupAttrBy (\attr -> qName attr == "role")
+ (elAttribs e) of
+ Just "strong" -> strong <$> innerInlines
+ _ -> emph <$> innerInlines
+ "footnote" -> (note . mconcat) <$> (mapM parseBlock $ elContent e)
+ _ -> innerInlines
+ where innerInlines = (trimInlines . mconcat) <$>
+ (mapM parseInline $ elContent e)
+parseInline (CRef _) = return mempty
+
diff --git a/src/pandoc.hs b/src/pandoc.hs
index dab7b4161..a8a70a1b4 100644
--- a/src/pandoc.hs
+++ b/src/pandoc.hs
@@ -689,6 +689,7 @@ defaultReaderName fallback (x:xs) =
".ltx" -> "latex"
".rst" -> "rst"
".lhs" -> "markdown+lhs"
+ ".db" -> "docbook"
".textile" -> "textile"
".native" -> "native"
".json" -> "json"