From de27dbc0f8fd31cc428468e75b129bd751e03f43 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 11 May 2012 20:08:26 -0700 Subject: DocBook reader. Handle block elements inside paragraphs. --- src/Text/Pandoc/Readers/DocBook.hs | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) (limited to 'src/Text/Pandoc') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 30e926f3d..60d27afc6 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -547,6 +547,20 @@ attrValue attr elt = named :: String -> Element -> Bool named s e = qName (elName e) == s +isBlockElement :: Content -> Bool +isBlockElement (Elem e) = qName (elName e) `elem` blocktags + where blocktags = ["toc","index","para","formalpara","simpara", + "ackno","epigraph","blockquote","bibliography","bibliodiv", + "biblioentry","glossee","glosseealso","glossary", + "glossdiv","glosslist","chapter","appendix","preface", + "sect1","sect2","sect3","sect4","sect5","section", + "refsect1","refsect2","refsect3","refsection", + "important","caution","note","tip","warning","qandadiv", + "question","answer","abstract","itemizedlist","orderedlist", + "variablelist","article","book","table","informaltable", + "computeroutput","screen","programlisting"] +isBlockElement _ = False + -- Trim leading and trailing newline characters trimNl :: String -> String trimNl = reverse . go . reverse . go @@ -586,15 +600,15 @@ parseBlock (Elem e) = case qName (elName e) of "toc" -> return mempty -- skip TOC, since in pandoc it's autogenerated "index" -> return mempty -- skip index, since page numbers meaningless - "para" -> para <$> getInlines e + "para" -> parsePara (elContent e) "formalpara" -> do tit <- case filterChild (named "title") e of Just t -> (<> str "." <> linebreak) <$> emph <$> getInlines t Nothing -> return mempty - addToStart tit <$> para <$> getInlines e - "simpara" -> para <$> getInlines e - "ackno" -> para <$> getInlines e + addToStart tit <$> parsePara (elContent e) + "simpara" -> parsePara (elContent e) + "ackno" -> parsePara (elContent e) "epigraph" -> parseBlockquote "blockquote" -> parseBlockquote "attribution" -> return mempty @@ -603,7 +617,7 @@ parseBlock (Elem e) = "title" -> return mempty -- handled by getTitle or sect "bibliography" -> sect 0 "bibliodiv" -> sect 1 - "biblioentry" -> para <$> getInlines e + "biblioentry" -> parsePara (elContent e) "glosssee" -> para . (\ils -> text "See " <> ils <> str ".") <$> getInlines e "glossseealso" -> para . (\ils -> text "See also " <> ils <> str ".") @@ -689,6 +703,16 @@ parseBlock (Elem e) = "?xml" -> return mempty _ -> getBlocks e where getBlocks e' = mconcat <$> (mapM parseBlock $ elContent e') + parsePara conts = do + let (ils,rest) = break isBlockElement conts + ils' <- (trimInlines . mconcat) <$> mapM parseInline ils + let p = if ils' == mempty then mempty else para ils' + case rest of + [] -> return p + (r:rs) -> do + b <- parseBlock r + x <- parsePara rs + return $ p <> b <> x codeBlockWithLang classes = do let classes' = case attrValue "language" e of "" -> classes -- cgit v1.2.3