From d339b29967878f64d5fe45d03d214476e9d88f7e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 14 Apr 2012 16:44:21 -0700 Subject: Added skeleton of basic docbook reader. --- src/Text/Pandoc.hs | 2 ++ src/Text/Pandoc/Pretty.hs | 2 +- src/Text/Pandoc/Readers/DocBook.hs | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 src/Text/Pandoc/Readers/DocBook.hs (limited to 'src/Text') diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 597d2e07f..417362f02 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -119,6 +119,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Generic import Text.Pandoc.Readers.Markdown import Text.Pandoc.Readers.RST +import Text.Pandoc.Readers.DocBook import Text.Pandoc.Readers.LaTeX import Text.Pandoc.Readers.HTML import Text.Pandoc.Readers.Textile @@ -162,6 +163,7 @@ readers = [("native" , \_ -> readNative) ,("rst" , readRST) ,("rst+lhs" , \st -> readRST st{ stateLiterateHaskell = True}) + ,("docbook" , readDocBook) ,("textile" , readTextile) -- TODO : textile+lhs ,("html" , readHtml) ,("latex" , readLaTeX) diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index bf78b2594..3cabcb75b 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE GeneralizedNewtypeDeriving, DatatypeContexts #-} {- Copyright (C) 2010 John MacFarlane diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs new file mode 100644 index 000000000..73a2e6abc --- /dev/null +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -0,0 +1,36 @@ +module Text.Pandoc.Readers.DocBook ( readDocBook ) where +import Text.Pandoc.Parsing (ParserState(..), defaultParserState) +import Text.Pandoc.Definition +import Text.Pandoc.Builder +import Text.XML.Light +import Data.Monoid +import Data.Char (isSpace) + +readDocBook :: ParserState -> String -> Pandoc +readDocBook st inp = Pandoc (Meta [] [] []) $ toList blocks + where blocks = mconcat $ map (parseBlock st) $ parseXML inp + +parseBlock :: ParserState -> Content -> Blocks +parseBlock st (Text (CData _ s _)) = if all isSpace s + then mempty + else plain $ text s +parseBlock st (Elem e) = + case qName (elName e) of + "para" -> para $ trimInlines $ mconcat + $ map (parseInline st) $ elContent e + _ -> mconcat $ map (parseBlock st) $ elContent e +parseBlock st (CRef _) = mempty + +parseInline :: ParserState -> Content -> Inlines +parseInline st (Text (CData _ s _)) = text s +parseInline st (Elem e) = + case qName (elName e) of + "emphasis" -> case lookupAttrBy (\attr -> qName attr == "role") + (elAttribs e) of + Just "strong" -> strong innerInlines + _ -> emph innerInlines + _ -> innerInlines + where innerInlines = trimInlines . mconcat . map (parseInline st) + $ elContent e +parseInline st (CRef _) = mempty + -- cgit v1.2.3