diff options
author | John MacFarlane <fiddlosopher@gmail.com> | 2012-04-14 17:33:56 -0700 |
---|---|---|
committer | John MacFarlane <fiddlosopher@gmail.com> | 2012-04-14 17:33:56 -0700 |
commit | 9ecb9b5def145db41f954be4b6ee1ce2c23bbf6c (patch) | |
tree | 1f16bc01a70d47fda7423f3994a1a73ffbdf845f /src/Text | |
parent | d339b29967878f64d5fe45d03d214476e9d88f7e (diff) | |
download | pandoc-9ecb9b5def145db41f954be4b6ee1ce2c23bbf6c.tar.gz |
DocBook reader improvements.
Diffstat (limited to 'src/Text')
-rw-r--r-- | src/Text/Pandoc/Pretty.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/DocBook.hs | 67 |
2 files changed, 50 insertions, 19 deletions
diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index 3cabcb75b..bf78b2594 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE GeneralizedNewtypeDeriving, DatatypeContexts #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} {- Copyright (C) 2010 John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 73a2e6abc..b8ff3f1ff 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -5,32 +5,63 @@ import Text.Pandoc.Builder import Text.XML.Light import Data.Monoid import Data.Char (isSpace) +import Control.Monad.State +import Control.Applicative ((<$>)) + +type DB = State DBState + +data DBState = DBState{ dbSectionLevel :: Int } + deriving (Read, Show) + +defaultDBState :: DBState +defaultDBState = DBState { dbSectionLevel = 0 } readDocBook :: ParserState -> String -> Pandoc readDocBook st inp = Pandoc (Meta [] [] []) $ toList blocks - where blocks = mconcat $ map (parseBlock st) $ parseXML inp + where blocks = mconcat $ evalState (mapM parseBlock $ parseXML inp) + defaultDBState -parseBlock :: ParserState -> Content -> Blocks -parseBlock st (Text (CData _ s _)) = if all isSpace s - then mempty - else plain $ text s -parseBlock st (Elem e) = +parseBlock :: Content -> DB Blocks +parseBlock (Text (CData _ s _)) = if all isSpace s + then return mempty + else return $ plain $ text s +parseBlock (Elem e) = case qName (elName e) of - "para" -> para $ trimInlines $ mconcat - $ map (parseInline st) $ elContent e - _ -> mconcat $ map (parseBlock st) $ elContent e -parseBlock st (CRef _) = mempty + "para" -> para <$> getInlines e + "sect1" -> sect 1 + "sect2" -> sect 2 + "sect3" -> sect 3 + "sect4" -> sect 4 + "sect5" -> sect 5 + "sect6" -> sect 6 + "title" -> return $ mempty + _ -> innerBlocks + where innerBlocks = mconcat <$> (mapM parseBlock $ elContent e) + getInlines e' = (trimInlines . mconcat) <$> + (mapM parseInline $ elContent e') + isTitle e' = qName (elName e') == "title" + skipWhite (Text (CData _ s _):xs) | all isSpace s = skipWhite xs + | otherwise = xs + skipWhite xs = xs + sect n = case skipWhite (elContent e) of + ((Elem t):body) + | isTitle t -> do + h <- header n <$> (getInlines t) + b <- mconcat <$> (mapM parseBlock body) + return $ h <> b + _ -> (header n mempty <>) <$> innerBlocks +parseBlock (CRef _) = return mempty -parseInline :: ParserState -> Content -> Inlines -parseInline st (Text (CData _ s _)) = text s -parseInline st (Elem e) = +parseInline :: Content -> DB Inlines +parseInline (Text (CData _ s _)) = return $ text s +parseInline (Elem e) = case qName (elName e) of "emphasis" -> case lookupAttrBy (\attr -> qName attr == "role") (elAttribs e) of - Just "strong" -> strong innerInlines - _ -> emph innerInlines + Just "strong" -> strong <$> innerInlines + _ -> emph <$> innerInlines _ -> innerInlines - where innerInlines = trimInlines . mconcat . map (parseInline st) - $ elContent e -parseInline st (CRef _) = mempty + where innerInlines = (trimInlines . mconcat) <$> + (mapM parseInline $ elContent e) +parseInline (CRef _) = return mempty |