From d339b29967878f64d5fe45d03d214476e9d88f7e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 14 Apr 2012 16:44:21 -0700 Subject: Added skeleton of basic docbook reader. --- src/Text/Pandoc/Readers/DocBook.hs | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 src/Text/Pandoc/Readers/DocBook.hs (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs new file mode 100644 index 000000000..73a2e6abc --- /dev/null +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -0,0 +1,36 @@ +module Text.Pandoc.Readers.DocBook ( readDocBook ) where +import Text.Pandoc.Parsing (ParserState(..), defaultParserState) +import Text.Pandoc.Definition +import Text.Pandoc.Builder +import Text.XML.Light +import Data.Monoid +import Data.Char (isSpace) + +readDocBook :: ParserState -> String -> Pandoc +readDocBook st inp = Pandoc (Meta [] [] []) $ toList blocks + where blocks = mconcat $ map (parseBlock st) $ parseXML inp + +parseBlock :: ParserState -> Content -> Blocks +parseBlock st (Text (CData _ s _)) = if all isSpace s + then mempty + else plain $ text s +parseBlock st (Elem e) = + case qName (elName e) of + "para" -> para $ trimInlines $ mconcat + $ map (parseInline st) $ elContent e + _ -> mconcat $ map (parseBlock st) $ elContent e +parseBlock st (CRef _) = mempty + +parseInline :: ParserState -> Content -> Inlines +parseInline st (Text (CData _ s _)) = text s +parseInline st (Elem e) = + case qName (elName e) of + "emphasis" -> case lookupAttrBy (\attr -> qName attr == "role") + (elAttribs e) of + Just "strong" -> strong innerInlines + _ -> emph innerInlines + _ -> innerInlines + where innerInlines = trimInlines . mconcat . map (parseInline st) + $ elContent e +parseInline st (CRef _) = mempty + -- cgit v1.2.3 From 9ecb9b5def145db41f954be4b6ee1ce2c23bbf6c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 14 Apr 2012 17:33:56 -0700 Subject: DocBook reader improvements. --- src/Text/Pandoc/Pretty.hs | 2 +- src/Text/Pandoc/Readers/DocBook.hs | 67 ++++++++++++++++++++++++++++---------- 2 files changed, 50 insertions(+), 19 deletions(-) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Pretty.hs b/src/Text/Pandoc/Pretty.hs index 3cabcb75b..bf78b2594 100644 --- a/src/Text/Pandoc/Pretty.hs +++ b/src/Text/Pandoc/Pretty.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE GeneralizedNewtypeDeriving, DatatypeContexts #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} {- Copyright (C) 2010 John MacFarlane diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 73a2e6abc..b8ff3f1ff 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -5,32 +5,63 @@ import Text.Pandoc.Builder import Text.XML.Light import Data.Monoid import Data.Char (isSpace) +import Control.Monad.State +import Control.Applicative ((<$>)) + +type DB = State DBState + +data DBState = DBState{ dbSectionLevel :: Int } + deriving (Read, Show) + +defaultDBState :: DBState +defaultDBState = DBState { dbSectionLevel = 0 } readDocBook :: ParserState -> String -> Pandoc readDocBook st inp = Pandoc (Meta [] [] []) $ toList blocks - where blocks = mconcat $ map (parseBlock st) $ parseXML inp + where blocks = mconcat $ evalState (mapM parseBlock $ parseXML inp) + defaultDBState -parseBlock :: ParserState -> Content -> Blocks -parseBlock st (Text (CData _ s _)) = if all isSpace s - then mempty - else plain $ text s -parseBlock st (Elem e) = +parseBlock :: Content -> DB Blocks +parseBlock (Text (CData _ s _)) = if all isSpace s + then return mempty + else return $ plain $ text s +parseBlock (Elem e) = case qName (elName e) of - "para" -> para $ trimInlines $ mconcat - $ map (parseInline st) $ elContent e - _ -> mconcat $ map (parseBlock st) $ elContent e -parseBlock st (CRef _) = mempty + "para" -> para <$> getInlines e + "sect1" -> sect 1 + "sect2" -> sect 2 + "sect3" -> sect 3 + "sect4" -> sect 4 + "sect5" -> sect 5 + "sect6" -> sect 6 + "title" -> return $ mempty + _ -> innerBlocks + where innerBlocks = mconcat <$> (mapM parseBlock $ elContent e) + getInlines e' = (trimInlines . mconcat) <$> + (mapM parseInline $ elContent e') + isTitle e' = qName (elName e') == "title" + skipWhite (Text (CData _ s _):xs) | all isSpace s = skipWhite xs + | otherwise = xs + skipWhite xs = xs + sect n = case skipWhite (elContent e) of + ((Elem t):body) + | isTitle t -> do + h <- header n <$> (getInlines t) + b <- mconcat <$> (mapM parseBlock body) + return $ h <> b + _ -> (header n mempty <>) <$> innerBlocks +parseBlock (CRef _) = return mempty -parseInline :: ParserState -> Content -> Inlines -parseInline st (Text (CData _ s _)) = text s -parseInline st (Elem e) = +parseInline :: Content -> DB Inlines +parseInline (Text (CData _ s _)) = return $ text s +parseInline (Elem e) = case qName (elName e) of "emphasis" -> case lookupAttrBy (\attr -> qName attr == "role") (elAttribs e) of - Just "strong" -> strong innerInlines - _ -> emph innerInlines + Just "strong" -> strong <$> innerInlines + _ -> emph <$> innerInlines _ -> innerInlines - where innerInlines = trimInlines . mconcat . map (parseInline st) - $ elContent e -parseInline st (CRef _) = mempty + where innerInlines = (trimInlines . mconcat) <$> + (mapM parseInline $ elContent e) +parseInline (CRef _) = return mempty -- cgit v1.2.3 From d7e8252ba6d2124981b4c7996134f3be4fe8c8c1 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 14 Apr 2012 17:41:04 -0700 Subject: Got section and sectN tags working in docbook reader. --- src/Text/Pandoc/Readers/DocBook.hs | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index b8ff3f1ff..a570964b6 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -1,5 +1,5 @@ module Text.Pandoc.Readers.DocBook ( readDocBook ) where -import Text.Pandoc.Parsing (ParserState(..), defaultParserState) +import Text.Pandoc.Parsing (ParserState(..)) import Text.Pandoc.Definition import Text.Pandoc.Builder import Text.XML.Light @@ -10,16 +10,13 @@ import Control.Applicative ((<$>)) type DB = State DBState -data DBState = DBState{ dbSectionLevel :: Int } - deriving (Read, Show) - -defaultDBState :: DBState -defaultDBState = DBState { dbSectionLevel = 0 } +data DBState = DBState{ dbSectionLevel :: Int + } deriving Show readDocBook :: ParserState -> String -> Pandoc readDocBook st inp = Pandoc (Meta [] [] []) $ toList blocks where blocks = mconcat $ evalState (mapM parseBlock $ parseXML inp) - defaultDBState + DBState{ dbSectionLevel = 0 } parseBlock :: Content -> DB Blocks parseBlock (Text (CData _ s _)) = if all isSpace s @@ -34,6 +31,7 @@ parseBlock (Elem e) = "sect4" -> sect 4 "sect5" -> sect 5 "sect6" -> sect 6 + "section" -> gets dbSectionLevel >>= sect . (+1) "title" -> return $ mempty _ -> innerBlocks where innerBlocks = mconcat <$> (mapM parseBlock $ elContent e) @@ -47,9 +45,16 @@ parseBlock (Elem e) = ((Elem t):body) | isTitle t -> do h <- header n <$> (getInlines t) + modify $ \st -> st{ dbSectionLevel = n } + b <- mconcat <$> (mapM parseBlock body) + modify $ \st -> st{ dbSectionLevel = n - 1 } + return $ h <> b + body -> do + let h = header n mempty + modify $ \st -> st{ dbSectionLevel = n } b <- mconcat <$> (mapM parseBlock body) + modify $ \st -> st{ dbSectionLevel = n - 1 } return $ h <> b - _ -> (header n mempty <>) <$> innerBlocks parseBlock (CRef _) = return mempty parseInline :: Content -> DB Inlines -- cgit v1.2.3 From 2198ac080bb97828b9770d0522b1d4d854deb8c2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 14 Apr 2012 17:46:17 -0700 Subject: Got footnote working in docbook reader. --- src/Text/Pandoc/Readers/DocBook.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index a570964b6..488e179da 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -32,7 +32,7 @@ parseBlock (Elem e) = "sect5" -> sect 5 "sect6" -> sect 6 "section" -> gets dbSectionLevel >>= sect . (+1) - "title" -> return $ mempty + "title" -> return $ mempty -- processed by sect _ -> innerBlocks where innerBlocks = mconcat <$> (mapM parseBlock $ elContent e) getInlines e' = (trimInlines . mconcat) <$> @@ -65,6 +65,7 @@ parseInline (Elem e) = (elAttribs e) of Just "strong" -> strong <$> innerInlines _ -> emph <$> innerInlines + "footnote" -> (note . mconcat) <$> (mapM parseBlock $ elContent e) _ -> innerInlines where innerInlines = (trimInlines . mconcat) <$> (mapM parseInline $ elContent e) -- cgit v1.2.3 From fe5704bebc91428013cef306f33e7c98e4ac43be Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 14 Apr 2012 17:51:29 -0700 Subject: Handle blockquote tag in docbook reader. --- src/Text/Pandoc/Readers/DocBook.hs | 1 + 1 file changed, 1 insertion(+) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 488e179da..432daeeea 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -25,6 +25,7 @@ parseBlock (Text (CData _ s _)) = if all isSpace s parseBlock (Elem e) = case qName (elName e) of "para" -> para <$> getInlines e + "blockquote" -> blockQuote <$> innerBlocks "sect1" -> sect 1 "sect2" -> sect 2 "sect3" -> sect 3 -- cgit v1.2.3 From 3e062b5a2191967132a1ec5718207631c5fc4adf Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 14 Apr 2012 17:59:40 -0700 Subject: Support superscript, subscript tags in docbook reader. --- src/Text/Pandoc/Readers/DocBook.hs | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 432daeeea..28867adbe 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -62,6 +62,8 @@ parseInline :: Content -> DB Inlines parseInline (Text (CData _ s _)) = return $ text s parseInline (Elem e) = case qName (elName e) of + "subscript" -> subscript <$> innerInlines + "superscript" -> superscript <$> innerInlines "emphasis" -> case lookupAttrBy (\attr -> qName attr == "role") (elAttribs e) of Just "strong" -> strong <$> innerInlines -- cgit v1.2.3 From 0fdb310425075cbc2650bf079cd2a1780692ee7b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 14 Apr 2012 18:27:46 -0700 Subject: Got metadata working in docbook reader. --- src/Text/Pandoc/Readers/DocBook.hs | 39 ++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 28867adbe..b40bf32c5 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -11,17 +11,28 @@ import Control.Applicative ((<$>)) type DB = State DBState data DBState = DBState{ dbSectionLevel :: Int + , dbDocTitle :: Inlines + , dbDocAuthors :: [Inlines] + , dbDocDate :: Inlines } deriving Show readDocBook :: ParserState -> String -> Pandoc -readDocBook st inp = Pandoc (Meta [] [] []) $ toList blocks - where blocks = mconcat $ evalState (mapM parseBlock $ parseXML inp) - DBState{ dbSectionLevel = 0 } +readDocBook st inp = setTitle (dbDocTitle st') + $ setAuthors (dbDocAuthors st') + $ setDate (dbDocDate st') + $ doc $ mconcat bs + where (bs, st') = runState (mapM parseBlock $ parseXML inp) + DBState{ dbSectionLevel = 0 + , dbDocTitle = mempty + , dbDocAuthors = [] + , dbDocDate = mempty + } parseBlock :: Content -> DB Blocks +parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE parseBlock (Text (CData _ s _)) = if all isSpace s - then return mempty - else return $ plain $ text s + then return mempty + else return $ plain $ text s parseBlock (Elem e) = case qName (elName e) of "para" -> para <$> getInlines e @@ -33,7 +44,9 @@ parseBlock (Elem e) = "sect5" -> sect 5 "sect6" -> sect 6 "section" -> gets dbSectionLevel >>= sect . (+1) - "title" -> return $ mempty -- processed by sect + "articleinfo" -> getTitle >> getAuthors >> getDate >> return mempty + "title" -> return mempty -- processed by sect + "?xml" -> return mempty _ -> innerBlocks where innerBlocks = mconcat <$> (mapM parseBlock $ elContent e) getInlines e' = (trimInlines . mconcat) <$> @@ -42,6 +55,20 @@ parseBlock (Elem e) = skipWhite (Text (CData _ s _):xs) | all isSpace s = skipWhite xs | otherwise = xs skipWhite xs = xs + getTitle = case findChild (unqual "title") e of + Just t -> do + tit <- getInlines t + modify $ \st -> st{dbDocTitle = tit} + Nothing -> return () + getAuthors = do + auths <- mapM getInlines + $ findChildren (unqual "author") e + modify $ \st -> st{dbDocAuthors = auths} + getDate = case findChild (unqual "date") e of + Just t -> do + dat <- getInlines t + modify $ \st -> st{dbDocDate = dat} + Nothing -> return () sect n = case skipWhite (elContent e) of ((Elem t):body) | isTitle t -> do -- cgit v1.2.3 From 7c3e2670a8707e0fd3a5ccba781c8c676ea68e54 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 14 Apr 2012 18:31:28 -0700 Subject: Implemented itemizedlist in docbook reader. --- src/Text/Pandoc/Readers/DocBook.hs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index b40bf32c5..59830c3dd 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -36,7 +36,7 @@ parseBlock (Text (CData _ s _)) = if all isSpace s parseBlock (Elem e) = case qName (elName e) of "para" -> para <$> getInlines e - "blockquote" -> blockQuote <$> innerBlocks + "blockquote" -> blockQuote <$> getBlocks e "sect1" -> sect 1 "sect2" -> sect 2 "sect3" -> sect 3 @@ -44,17 +44,19 @@ parseBlock (Elem e) = "sect5" -> sect 5 "sect6" -> sect 6 "section" -> gets dbSectionLevel >>= sect . (+1) + "itemizedlist" -> bulletList <$> listitems "articleinfo" -> getTitle >> getAuthors >> getDate >> return mempty "title" -> return mempty -- processed by sect "?xml" -> return mempty - _ -> innerBlocks - where innerBlocks = mconcat <$> (mapM parseBlock $ elContent e) + _ -> getBlocks e + where getBlocks e' = mconcat <$> (mapM parseBlock $ elContent e') getInlines e' = (trimInlines . mconcat) <$> (mapM parseInline $ elContent e') isTitle e' = qName (elName e') == "title" skipWhite (Text (CData _ s _):xs) | all isSpace s = skipWhite xs | otherwise = xs skipWhite xs = xs + listitems = mapM getBlocks $ findChildren (unqual "listitem") e getTitle = case findChild (unqual "title") e of Just t -> do tit <- getInlines t -- cgit v1.2.3 From c747665ad75e073bbfa55e4d29d0eaea6eb9db17 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 14 Apr 2012 18:35:43 -0700 Subject: Rudimentary support for programlisting in docbook reader. --- src/Text/Pandoc/Readers/DocBook.hs | 1 + 1 file changed, 1 insertion(+) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 59830c3dd..5cb41638a 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -46,6 +46,7 @@ parseBlock (Elem e) = "section" -> gets dbSectionLevel >>= sect . (+1) "itemizedlist" -> bulletList <$> listitems "articleinfo" -> getTitle >> getAuthors >> getDate >> return mempty + "programlisting" -> return $ codeBlock $ strContent e "title" -> return mempty -- processed by sect "?xml" -> return mempty _ -> getBlocks e -- cgit v1.2.3 From d556840fc87d9c88da5c011492b0005a536c453c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 14 Apr 2012 21:33:23 -0700 Subject: Removed unneeded clause for "title" in docbook reader. --- src/Text/Pandoc/Readers/DocBook.hs | 1 - 1 file changed, 1 deletion(-) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 5cb41638a..882a568f5 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -47,7 +47,6 @@ parseBlock (Elem e) = "itemizedlist" -> bulletList <$> listitems "articleinfo" -> getTitle >> getAuthors >> getDate >> return mempty "programlisting" -> return $ codeBlock $ strContent e - "title" -> return mempty -- processed by sect "?xml" -> return mempty _ -> getBlocks e where getBlocks e' = mconcat <$> (mapM parseBlock $ elContent e') -- cgit v1.2.3 From f0a1760d41f206eec99b44199e99db046def4d08 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 14 Apr 2012 21:43:07 -0700 Subject: Support orderedlist and ulink in docbook reader. --- src/Text/Pandoc/Readers/DocBook.hs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 882a568f5..53260e279 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -3,6 +3,7 @@ import Text.Pandoc.Parsing (ParserState(..)) import Text.Pandoc.Definition import Text.Pandoc.Builder import Text.XML.Light +import Data.Maybe (fromMaybe) import Data.Monoid import Data.Char (isSpace) import Control.Monad.State @@ -45,8 +46,9 @@ parseBlock (Elem e) = "sect6" -> sect 6 "section" -> gets dbSectionLevel >>= sect . (+1) "itemizedlist" -> bulletList <$> listitems + "orderedlist" -> orderedList <$> listitems -- TODO list attributes "articleinfo" -> getTitle >> getAuthors >> getDate >> return mempty - "programlisting" -> return $ codeBlock $ strContent e + "programlisting" -> return $ codeBlock $ strContent e -- TODO attrs "?xml" -> return mempty _ -> getBlocks e where getBlocks e' = mconcat <$> (mapM parseBlock $ elContent e') @@ -93,6 +95,9 @@ parseInline (Elem e) = case qName (elName e) of "subscript" -> subscript <$> innerInlines "superscript" -> superscript <$> innerInlines + "ulink" -> link + (fromMaybe "" (lookupAttrBy (\attr -> qName attr == "url") + (elAttribs e))) "" <$> innerInlines "emphasis" -> case lookupAttrBy (\attr -> qName attr == "role") (elAttribs e) of Just "strong" -> strong <$> innerInlines -- cgit v1.2.3 From e5c4d7002eb5b9c73211186f5f15a82c2731fe31 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 14 Apr 2012 21:50:06 -0700 Subject: Implemented quoted in docbook reader. --- src/Text/Pandoc/Readers/DocBook.hs | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 53260e279..a4f0ee9c4 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -12,6 +12,7 @@ import Control.Applicative ((<$>)) type DB = State DBState data DBState = DBState{ dbSectionLevel :: Int + , dbQuoteType :: QuoteType , dbDocTitle :: Inlines , dbDocAuthors :: [Inlines] , dbDocDate :: Inlines @@ -24,6 +25,7 @@ readDocBook st inp = setTitle (dbDocTitle st') $ doc $ mconcat bs where (bs, st') = runState (mapM parseBlock $ parseXML inp) DBState{ dbSectionLevel = 0 + , dbQuoteType = DoubleQuote , dbDocTitle = mempty , dbDocAuthors = [] , dbDocDate = mempty @@ -95,6 +97,15 @@ parseInline (Elem e) = case qName (elName e) of "subscript" -> subscript <$> innerInlines "superscript" -> superscript <$> innerInlines + "quote" -> do + qt <- gets dbQuoteType + let qt' = if qt == SingleQuote then DoubleQuote else SingleQuote + modify $ \st -> st{ dbQuoteType = qt' } + contents <- innerInlines + modify $ \st -> st{ dbQuoteType = qt } + return $ if qt == SingleQuote + then singleQuoted contents + else doubleQuoted contents "ulink" -> link (fromMaybe "" (lookupAttrBy (\attr -> qName attr == "url") (elAttribs e))) "" <$> innerInlines -- cgit v1.2.3 From f78801aa20b06b63d3c6d3d336caf85ce3614d9e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 14 Apr 2012 21:55:14 -0700 Subject: Implemented literal tag in docbook reader. --- src/Text/Pandoc/Readers/DocBook.hs | 1 + 1 file changed, 1 insertion(+) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index a4f0ee9c4..cfd7a6e58 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -106,6 +106,7 @@ parseInline (Elem e) = return $ if qt == SingleQuote then singleQuoted contents else doubleQuoted contents + "literal" -> return $ code $ strContent e -- TODO attrs "ulink" -> link (fromMaybe "" (lookupAttrBy (\attr -> qName attr == "url") (elAttribs e))) "" <$> innerInlines -- cgit v1.2.3 From e2c3a7b896b076a218d94b9b0840599d399b0618 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 14 Apr 2012 22:16:20 -0700 Subject: Implemented abstract, constant, type, etc. in docbook reader. --- src/Text/Pandoc/Readers/DocBook.hs | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index cfd7a6e58..8da933a2f 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -47,6 +47,7 @@ parseBlock (Elem e) = "sect5" -> sect 5 "sect6" -> sect 6 "section" -> gets dbSectionLevel >>= sect . (+1) + "abstract" -> blockQuote <$> getBlocks e "itemizedlist" -> bulletList <$> listitems "orderedlist" -> orderedList <$> listitems -- TODO list attributes "articleinfo" -> getTitle >> getAuthors >> getDate >> return mempty @@ -107,6 +108,13 @@ parseInline (Elem e) = then singleQuoted contents else doubleQuoted contents "literal" -> return $ code $ strContent e -- TODO attrs + "varname" -> return $ codeWith ("",["varname"],[]) $ strContent e + "function" -> return $ codeWith ("",["function"],[]) $ strContent e + "type" -> return $ codeWith ("",["type"],[]) $ strContent e + "symbol" -> return $ codeWith ("",["symbol"],[]) $ strContent e + "constant" -> return $ codeWith ("",["constant"],[]) $ strContent e + "userinput" -> return $ codeWith ("",["userinput"],[]) $ strContent e + "varargs" -> return $ str "(…)" "ulink" -> link (fromMaybe "" (lookupAttrBy (\attr -> qName attr == "url") (elAttribs e))) "" <$> innerInlines -- cgit v1.2.3 From 4b2287920a095f695e03bae1eee0074a1bf05df3 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 15 Apr 2012 10:07:24 -0700 Subject: DocBook reader: Added comment with list of docbook tags. --- src/Text/Pandoc/Readers/DocBook.hs | 479 ++++++++++++++++++++++++++++++++++++- 1 file changed, 478 insertions(+), 1 deletion(-) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 8da933a2f..047df62fb 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -9,6 +9,484 @@ import Data.Char (isSpace) import Control.Monad.State import Control.Applicative ((<$>)) +{- + +List of all DocBook tags, with [x] indicating implemented: + +[ ] abbrev - An abbreviation, especially one followed by a period +[x] abstract - A summary +[ ] accel - A graphical user interface (GUI) keyboard shortcut +[ ] ackno - Acknowledgements in an Article +[ ] acronym - An often pronounceable word made from the initial +[ ] action - A response to a user event +[ ] address - A real-world address, generally a postal address +[ ] affiliation - The institutional affiliation of an individual +[ ] alt - Text representation for a graphical element +[ ] anchor - A spot in the document +[ ] answer - An answer to a question posed in a QandASet +[ ] appendix - An appendix in a Book or Article +[ ] appendixinfo - Meta-information for an Appendix +[ ] application - The name of a software program +[ ] area - A region defined for a Callout in a graphic or code example +[ ] areaset - A set of related areas in a graphic or code example +[ ] areaspec - A collection of regions in a graphic or code example +[ ] arg - An argument in a CmdSynopsis +[ ] article - An article +[x] articleinfo - Meta-information for an Article +[ ] artpagenums - The page numbers of an article as published +[ ] attribution - The source of a block quote or epigraph +[ ] audiodata - Pointer to external audio data +[ ] audioobject - A wrapper for audio data and its associated meta-information +[x] author - The name of an individual author +[ ] authorblurb - A short description or note about an author +[ ] authorgroup - Wrapper for author information when a document has + multiple authors or collabarators +[ ] authorinitials - The initials or other short identifier for an author +[ ] beginpage - The location of a page break in a print version of the document +[ ] bibliocoverage - The spatial or temporal coverage of a document +[ ] bibliodiv - A section of a Bibliography +[ ] biblioentry - An entry in a Bibliography +[ ] bibliography - A bibliography +[ ] bibliographyinfo - Meta-information for a Bibliography +[ ] biblioid - An identifier for a document +[ ] bibliolist - A wrapper for a set of bibliography entries +[ ] bibliomisc - Untyped bibliographic information +[ ] bibliomixed - An entry in a Bibliography +[ ] bibliomset - A cooked container for related bibliographic information +[ ] biblioref - A cross reference to a bibliographic entry +[ ] bibliorelation - The relationship of a document to another +[ ] biblioset - A raw container for related bibliographic information +[ ] bibliosource - The source of a document +[ ] blockinfo - Meta-information for a block element +[x] blockquote - A quotation set off from the main text +[ ] book - A book +[ ] bookinfo - Meta-information for a Book +[ ] bridgehead - A free-floating heading +[ ] callout - A “called out” description of a marked Area +[ ] calloutlist - A list of Callouts +[ ] caption - A caption +[ ] caution - A note of caution +[ ] chapter - A chapter, as of a book +[ ] chapterinfo - Meta-information for a Chapter +[ ] citation - An inline bibliographic reference to another published work +[ ] citebiblioid - A citation of a bibliographic identifier +[ ] citerefentry - A citation to a reference page +[ ] citetitle - The title of a cited work +[ ] city - The name of a city in an address +[ ] classname - The name of a class, in the object-oriented programming sense +[ ] classsynopsis - The syntax summary for a class definition +[ ] classsynopsisinfo - Information supplementing the contents of + a ClassSynopsis +[ ] cmdsynopsis - A syntax summary for a software command +[ ] co - The location of a callout embedded in text +[ ] code - An inline code fragment +[ ] col - Specifications for a column in an HTML table +[ ] colgroup - A group of columns in an HTML table +[ ] collab - Identifies a collaborator +[ ] collabname - The name of a collaborator +[ ] colophon - Text at the back of a book describing facts about its production +[ ] colspec - Specifications for a column in a table +[ ] command - The name of an executable program or other software command +[ ] computeroutput - Data, generally text, displayed or presented by a computer +[ ] confdates - The dates of a conference for which a document was written +[ ] confgroup - A wrapper for document meta-information about a conference +[ ] confnum - An identifier, frequently numerical, associated with a conference for which a document was written +[ ] confsponsor - The sponsor of a conference for which a document was written +[ ] conftitle - The title of a conference for which a document was written +[x] constant - A programming or system constant +[ ] constraint - A constraint in an EBNF production +[ ] constraintdef - The definition of a constraint in an EBNF production +[ ] constructorsynopsis - A syntax summary for a constructor +[ ] contractnum - The contract number of a document +[ ] contractsponsor - The sponsor of a contract +[ ] contrib - A summary of the contributions made to a document by a + credited source +[ ] copyright - Copyright information about a document +[ ] coref - A cross reference to a co +[ ] corpauthor - A corporate author, as opposed to an individual +[ ] corpcredit - A corporation or organization credited in a document +[ ] corpname - The name of a corporation +[ ] country - The name of a country +[ ] database - The name of a database, or part of a database +[x] date - The date of publication or revision of a document +[ ] dedication - A wrapper for the dedication section of a book +[ ] destructorsynopsis - A syntax summary for a destructor +[ ] edition - The name or number of an edition of a document +[ ] editor - The name of the editor of a document +[ ] email - An email address +[x] emphasis - Emphasized text +[ ] entry - A cell in a table +[ ] entrytbl - A subtable appearing in place of an Entry in a table +[ ] envar - A software environment variable +[ ] epigraph - A short inscription at the beginning of a document or component +[ ] equation - A displayed mathematical equation +[ ] errorcode - An error code +[ ] errorname - An error name +[ ] errortext - An error message. +[ ] errortype - The classification of an error message +[ ] example - A formal example, with a title +[ ] exceptionname - The name of an exception +[ ] fax - A fax number +[ ] fieldsynopsis - The name of a field in a class definition +[ ] figure - A formal figure, generally an illustration, with a title +[ ] filename - The name of a file +[ ] firstname - The first name of a person +[ ] firstterm - The first occurrence of a term +[ ] footnote - A footnote +[ ] footnoteref - A cross reference to a footnote (a footnote mark) +[ ] foreignphrase - A word or phrase in a language other than the primary + language of the document +[ ] formalpara - A paragraph with a title +[ ] funcdef - A function (subroutine) name and its return type +[ ] funcparams - Parameters for a function referenced through a function + pointer in a synopsis +[ ] funcprototype - The prototype of a function +[ ] funcsynopsis - The syntax summary for a function definition +[ ] funcsynopsisinfo - Information supplementing the FuncDefs of a FuncSynopsis +[x] function - The name of a function or subroutine, as in a + programming language +[ ] glossary - A glossary +[ ] glossaryinfo - Meta-information for a Glossary +[ ] glossdef - A definition in a GlossEntry +[ ] glossdiv - A division in a Glossary +[ ] glossentry - An entry in a Glossary or GlossList +[ ] glosslist - A wrapper for a set of GlossEntrys +[ ] glosssee - A cross-reference from one GlossEntry to another +[ ] glossseealso - A cross-reference from one GlossEntry to another +[ ] glossterm - A glossary term +[ ] graphic - A displayed graphical object (not an inline) +[ ] graphicco - A graphic that contains callout areas +[ ] group - A group of elements in a CmdSynopsis +[ ] guibutton - The text on a button in a GUI +[ ] guiicon - Graphic and/or text appearing as a icon in a GUI +[ ] guilabel - The text of a label in a GUI +[ ] guimenu - The name of a menu in a GUI +[ ] guimenuitem - The name of a terminal menu item in a GUI +[ ] guisubmenu - The name of a submenu in a GUI +[ ] hardware - A physical part of a computer system +[ ] highlights - A summary of the main points of the discussed component +[ ] holder - The name of the individual or organization that holds a copyright +[ ] honorific - The title of a person +[ ] html:form - An HTML form +[ ] imagedata - Pointer to external image data +[ ] imageobject - A wrapper for image data and its associated meta-information +[ ] imageobjectco - A wrapper for an image object with callouts +[ ] important - An admonition set off from the text +[ ] index - An index +[ ] indexdiv - A division in an index +[ ] indexentry - An entry in an index +[ ] indexinfo - Meta-information for an Index +[ ] indexterm - A wrapper for terms to be indexed +[ ] informalequation - A displayed mathematical equation without a title +[ ] informalexample - A displayed example without a title +[ ] informalfigure - A untitled figure +[ ] informaltable - A table without a title +[ ] initializer - The initializer for a FieldSynopsis +[ ] inlineequation - A mathematical equation or expression occurring inline +[ ] inlinegraphic - An object containing or pointing to graphical data + that will be rendered inline +[ ] inlinemediaobject - An inline media object (video, audio, image, and so on) +[ ] interface - An element of a GUI +[ ] interfacename - The name of an interface +[ ] invpartnumber - An inventory part number +[ ] isbn - The International Standard Book Number of a document +[ ] issn - The International Standard Serial Number of a periodical +[ ] issuenum - The number of an issue of a journal +[x] itemizedlist - A list in which each entry is marked with a bullet or + other dingbat +[ ] itermset - A set of index terms in the meta-information of a document +[ ] jobtitle - The title of an individual in an organization +[ ] keycap - The text printed on a key on a keyboard +[ ] keycode - The internal, frequently numeric, identifier for a key + on a keyboard +[ ] keycombo - A combination of input actions +[ ] keysym - The symbolic name of a key on a keyboard +[ ] keyword - One of a set of keywords describing the content of a document +[ ] keywordset - A set of keywords describing the content of a document +[ ] label - A label on a Question or Answer +[ ] legalnotice - A statement of legal obligations or requirements +[ ] lhs - The left-hand side of an EBNF production +[ ] lineage - The portion of a person's name indicating a relationship to + ancestors +[ ] lineannotation - A comment on a line in a verbatim listing +[ ] link - A hypertext link +[ ] listitem - A wrapper for the elements of a list item +[x] literal - Inline text that is some literal value +[ ] literallayout - A block of text in which line breaks and white space are + to be reproduced faithfully +[ ] lot - A list of the titles of formal objects (as tables or figures) in + a document +[ ] lotentry - An entry in a list of titles +[ ] manvolnum - A reference volume number +[ ] markup - A string of formatting markup in text that is to be + represented literally +[ ] mathphrase - A mathematical phrase, an expression that can be represented + with ordinary text and a small amount of markup +[ ] medialabel - A name that identifies the physical medium on which some + information resides +[ ] mediaobject - A displayed media object (video, audio, image, etc.) +[ ] mediaobjectco - A media object that contains callouts +[ ] member - An element of a simple list +[ ] menuchoice - A selection or series of selections from a menu +[ ] methodname - The name of a method +[ ] methodparam - Parameters to a method +[ ] methodsynopsis - A syntax summary for a method +[ ] mml:math - A MathML equation +[ ] modespec - Application-specific information necessary for the + completion of an OLink +[ ] modifier - Modifiers in a synopsis +[ ] mousebutton - The conventional name of a mouse button +[ ] msg - A message in a message set +[ ] msgaud - The audience to which a message in a message set is relevant +[ ] msgentry - A wrapper for an entry in a message set +[ ] msgexplan - Explanatory material relating to a message in a message set +[ ] msginfo - Information about a message in a message set +[ ] msglevel - The level of importance or severity of a message in a message set +[ ] msgmain - The primary component of a message in a message set +[ ] msgorig - The origin of a message in a message set +[ ] msgrel - A related component of a message in a message set +[ ] msgset - A detailed set of messages, usually error messages +[ ] msgsub - A subcomponent of a message in a message set +[ ] msgtext - The actual text of a message component in a message set +[ ] nonterminal - A non-terminal in an EBNF production +[ ] note - A message set off from the text +[ ] objectinfo - Meta-information for an object +[ ] olink - A link that addresses its target indirectly, through an entity +[ ] ooclass - A class in an object-oriented programming language +[ ] ooexception - An exception in an object-oriented programming language +[ ] oointerface - An interface in an object-oriented programming language +[ ] option - An option for a software command +[ ] optional - Optional information +[x] orderedlist - A list in which each entry is marked with a sequentially + incremented label +[ ] orgdiv - A division of an organization +[ ] orgname - The name of an organization other than a corporation +[ ] otheraddr - Uncategorized information in address +[ ] othercredit - A person or entity, other than an author or editor, + credited in a document +[ ] othername - A component of a persons name that is not a first name, + surname, or lineage +[ ] package - A package +[ ] pagenums - The numbers of the pages in a book, for use in a bibliographic + entry +[x] para - A paragraph +[ ] paramdef - Information about a function parameter in a programming language +[ ] parameter - A value or a symbolic reference to a value +[ ] part - A division in a book +[ ] partinfo - Meta-information for a Part +[ ] partintro - An introduction to the contents of a part +[ ] personblurb - A short description or note about a person +[ ] personname - The personal name of an individual +[ ] phone - A telephone number +[ ] phrase - A span of text +[ ] pob - A post office box in an address +[ ] postcode - A postal code in an address +[ ] preface - Introductory matter preceding the first chapter of a book +[ ] prefaceinfo - Meta-information for a Preface +[ ] primary - The primary word or phrase under which an index term should be + sorted +[ ] primaryie - A primary term in an index entry, not in the text +[ ] printhistory - The printing history of a document +[ ] procedure - A list of operations to be performed in a well-defined sequence +[ ] production - A production in a set of EBNF productions +[ ] productionrecap - A cross-reference to an EBNF production +[ ] productionset - A set of EBNF productions +[ ] productname - The formal name of a product +[ ] productnumber - A number assigned to a product +[ ] programlisting - A literal listing of all or part of a program +[ ] programlistingco - A program listing with associated areas used in callouts +[ ] prompt - A character or string indicating the start of an input field in + a computer display +[ ] property - A unit of data associated with some part of a computer system +[ ] pubdate - The date of publication of a document +[ ] publisher - The publisher of a document +[ ] publishername - The name of the publisher of a document +[ ] pubsnumber - A number assigned to a publication other than an ISBN or ISSN + or inventory part number +[ ] qandadiv - A titled division in a QandASet +[ ] qandaentry - A question/answer set within a QandASet +[ ] qandaset - A question-and-answer set +[ ] question - A question in a QandASet +[ ] quote - An inline quotation +[ ] refclass - The scope or other indication of applicability of a + reference entry +[ ] refdescriptor - A description of the topic of a reference page +[ ] refentry - A reference page (originally a UNIX man-style reference page) +[ ] refentryinfo - Meta-information for a Refentry +[ ] refentrytitle - The title of a reference page +[ ] reference - A collection of reference entries +[ ] referenceinfo - Meta-information for a Reference +[ ] refmeta - Meta-information for a reference entry +[ ] refmiscinfo - Meta-information for a reference entry other than the title + and volume number +[ ] refname - The name of (one of) the subject(s) of a reference page +[ ] refnamediv - The name, purpose, and classification of a reference page +[ ] refpurpose - A short (one sentence) synopsis of the topic of a reference + page +[ ] refsect1 - A major subsection of a reference entry +[ ] refsect1info - Meta-information for a RefSect1 +[ ] refsect2 - A subsection of a RefSect1 +[ ] refsect2info - Meta-information for a RefSect2 +[ ] refsect3 - A subsection of a RefSect2 +[ ] refsect3info - Meta-information for a RefSect3 +[ ] refsection - A recursive section in a refentry +[ ] refsectioninfo - Meta-information for a refsection +[ ] refsynopsisdiv - A syntactic synopsis of the subject of the reference page +[ ] refsynopsisdivinfo - Meta-information for a RefSynopsisDiv +[ ] releaseinfo - Information about a particular release of a document +[ ] remark - A remark (or comment) intended for presentation in a draft + manuscript +[ ] replaceable - Content that may or must be replaced by the user +[ ] returnvalue - The value returned by a function +[ ] revdescription - A extended description of a revision to a document +[ ] revhistory - A history of the revisions to a document +[ ] revision - An entry describing a single revision in the history of the + revisions to a document +[ ] revnumber - A document revision number +[ ] revremark - A description of a revision to a document +[ ] rhs - The right-hand side of an EBNF production +[ ] row - A row in a table +[ ] sbr - An explicit line break in a command synopsis +[ ] screen - Text that a user sees or might see on a computer screen +[ ] screenco - A screen with associated areas used in callouts +[ ] screeninfo - Information about how a screen shot was produced +[ ] screenshot - A representation of what the user sees or might see on a + computer screen +[ ] secondary - A secondary word or phrase in an index term +[ ] secondaryie - A secondary term in an index entry, rather than in the text +[x] sect1 - A top-level section of document +[ ] sect1info - Meta-information for a Sect1 +[x] sect2 - A subsection within a Sect1 +[ ] sect2info - Meta-information for a Sect2 +[x] sect3 - A subsection within a Sect2 +[ ] sect3info - Meta-information for a Sect3 +[x] sect4 - A subsection within a Sect3 +[ ] sect4info - Meta-information for a Sect4 +[x] sect5 - A subsection within a Sect4 +[ ] sect5info - Meta-information for a Sect5 +[x] section - A recursive section +[ ] sectioninfo - Meta-information for a recursive section +[ ] see - Part of an index term directing the reader instead to another entry + in the index +[ ] seealso - Part of an index term directing the reader also to another entry + in the index +[ ] seealsoie - A See also entry in an index, rather than in the text +[ ] seeie - A See entry in an index, rather than in the text +[ ] seg - An element of a list item in a segmented list +[ ] seglistitem - A list item in a segmented list +[ ] segmentedlist - A segmented list, a list of sets of elements +[ ] segtitle - The title of an element of a list item in a segmented list +[ ] seriesvolnums - Numbers of the volumes in a series of books +[ ] set - A collection of books +[ ] setindex - An index to a set of books +[ ] setindexinfo - Meta-information for a SetIndex +[ ] setinfo - Meta-information for a Set +[ ] sgmltag - A component of SGML markup +[ ] shortaffil - A brief description of an affiliation +[ ] shortcut - A key combination for an action that is also accessible through + a menu +[ ] sidebar - A portion of a document that is isolated from the main + narrative flow +[ ] sidebarinfo - Meta-information for a Sidebar +[ ] simpara - A paragraph that contains only text and inline markup, no block + elements +[ ] simplelist - An undecorated list of single words or short phrases +[ ] simplemsgentry - A wrapper for a simpler entry in a message set +[ ] simplesect - A section of a document with no subdivisions +[ ] spanspec - Formatting information for a spanned column in a table +[ ] state - A state or province in an address +[ ] step - A unit of action in a procedure +[ ] stepalternatives - Alternative steps in a procedure +[ ] street - A street address in an address +[ ] structfield - A field in a structure (in the programming language sense) +[ ] structname - The name of a structure (in the programming language sense) +[ ] subject - One of a group of terms describing the subject matter of a + document +[ ] subjectset - A set of terms describing the subject matter of a document +[ ] subjectterm - A term in a group of terms describing the subject matter of + a document +[ ] subscript - A subscript (as in H2O, the molecular formula for water) +[ ] substeps - A wrapper for steps that occur within steps in a procedure +[ ] subtitle - The subtitle of a document +[ ] superscript - A superscript (as in x2, the mathematical notation for x + multiplied by itself) +[ ] surname - A family name; in western cultures the last name +[ ] svg:svg - An SVG graphic +[x] symbol - A name that is replaced by a value before processing +[ ] synopfragment - A portion of a CmdSynopsis broken out from the main body + of the synopsis +[ ] synopfragmentref - A reference to a fragment of a command synopsis +[ ] synopsis - A general-purpose element for representing the syntax of + commands or functions +[ ] systemitem - A system-related item or term +[ ] table - A formal table in a document +[ ] task - A task to be completed +[ ] taskprerequisites - The prerequisites for a task +[ ] taskrelated - Information related to a task +[ ] tasksummary - A summary of a task +[ ] tbody - A wrapper for the rows of a table or informal table +[ ] td - A table entry in an HTML table +[ ] term - The word or phrase being defined or described in a variable list +[ ] termdef - An inline term definition +[ ] tertiary - A tertiary word or phrase in an index term +[ ] tertiaryie - A tertiary term in an index entry, rather than in the text +[ ] textdata - Pointer to external text data +[ ] textobject - A wrapper for a text description of an object and its + associated meta-information +[ ] tfoot - A table footer consisting of one or more rows +[ ] tgroup - A wrapper for the main content of a table, or part of a table +[ ] th - A table header entry in an HTML table +[ ] thead - A table header consisting of one or more rows +[ ] tip - A suggestion to the user, set off from the text +[x] title - The text of the title of a section of a document or of a formal + block-level element +[ ] titleabbrev - The abbreviation of a Title +[ ] toc - A table of contents +[ ] tocback - An entry in a table of contents for a back matter component +[ ] tocchap - An entry in a table of contents for a component in the body of + a document +[ ] tocentry - A component title in a table of contents +[ ] tocfront - An entry in a table of contents for a front matter component +[ ] toclevel1 - A top-level entry within a table of contents entry for a + chapter-like component +[ ] toclevel2 - A second-level entry within a table of contents entry for a + chapter-like component +[ ] toclevel3 - A third-level entry within a table of contents entry for a + chapter-like component +[ ] toclevel4 - A fourth-level entry within a table of contents entry for a + chapter-like component +[ ] toclevel5 - A fifth-level entry within a table of contents entry for a + chapter-like component +[ ] tocpart - An entry in a table of contents for a part of a book +[ ] token - A unit of information +[ ] tr - A row in an HTML table +[ ] trademark - A trademark +[ ] type - The classification of a value +[x] ulink - A link that addresses its target by means of a URL + (Uniform Resource Locator) +[ ] uri - A Uniform Resource Identifier +[ ] userinput - Data entered by the user +[x] varargs - An empty element in a function synopsis indicating a variable + number of arguments +[ ] variablelist - A list in which each entry is composed of a set of one or + more terms and an associated description +[ ] varlistentry - A wrapper for a set of terms and the associated description + in a variable list +[x] varname - The name of a variable +[ ] videodata - Pointer to external video data +[ ] videoobject - A wrapper for video data and its associated meta-information +[ ] void - An empty element in a function synopsis indicating that the + function in question takes no arguments +[ ] volumenum - The volume number of a document in a set (as of books in a set + or articles in a journal) +[ ] warning - An admonition set off from the text +[ ] wordasword - A word meant specifically as a word and not representing + anything else +[ ] xref - A cross reference to another part of the document +[ ] year - The year of publication of a document + +-} + type DB = State DBState data DBState = DBState{ dbSectionLevel :: Int @@ -45,7 +523,6 @@ parseBlock (Elem e) = "sect3" -> sect 3 "sect4" -> sect 4 "sect5" -> sect 5 - "sect6" -> sect 6 "section" -> gets dbSectionLevel >>= sect . (+1) "abstract" -> blockQuote <$> getBlocks e "itemizedlist" -> bulletList <$> listitems -- cgit v1.2.3 From 2f559597d54d9e37885333201434d586ff11bc2e Mon Sep 17 00:00:00 2001 From: mb21 Date: Thu, 19 Apr 2012 15:45:59 +0300 Subject: Added link, code and info elements to DocBook Reader. --- src/Text/Pandoc/Readers/DocBook.hs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 047df62fb..8fe9797a8 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -79,7 +79,7 @@ List of all DocBook tags, with [x] indicating implemented: a ClassSynopsis [ ] cmdsynopsis - A syntax summary for a software command [ ] co - The location of a callout embedded in text -[ ] code - An inline code fragment +[x] code - An inline code fragment [ ] col - Specifications for a column in an HTML table [ ] colgroup - A group of columns in an HTML table [ ] collab - Identifies a collaborator @@ -177,6 +177,7 @@ List of all DocBook tags, with [x] indicating implemented: [ ] indexentry - An entry in an index [ ] indexinfo - Meta-information for an Index [ ] indexterm - A wrapper for terms to be indexed +[ ] info - A wrapper for information about a component or other block. (DocBook v5) [ ] informalequation - A displayed mathematical equation without a title [ ] informalexample - A displayed example without a title [ ] informalfigure - A untitled figure @@ -209,7 +210,7 @@ List of all DocBook tags, with [x] indicating implemented: [ ] lineage - The portion of a person's name indicating a relationship to ancestors [ ] lineannotation - A comment on a line in a verbatim listing -[ ] link - A hypertext link +[x] link - A hypertext link [ ] listitem - A wrapper for the elements of a list item [x] literal - Inline text that is some literal value [ ] literallayout - A block of text in which line breaks and white space are @@ -405,10 +406,10 @@ List of all DocBook tags, with [x] indicating implemented: [ ] subjectset - A set of terms describing the subject matter of a document [ ] subjectterm - A term in a group of terms describing the subject matter of a document -[ ] subscript - A subscript (as in H2O, the molecular formula for water) +[x] subscript - A subscript (as in H2O, the molecular formula for water) [ ] substeps - A wrapper for steps that occur within steps in a procedure [ ] subtitle - The subtitle of a document -[ ] superscript - A superscript (as in x2, the mathematical notation for x +[x] superscript - A superscript (as in x2, the mathematical notation for x multiplied by itself) [ ] surname - A family name; in western cultures the last name [ ] svg:svg - An SVG graphic @@ -527,6 +528,7 @@ parseBlock (Elem e) = "abstract" -> blockQuote <$> getBlocks e "itemizedlist" -> bulletList <$> listitems "orderedlist" -> orderedList <$> listitems -- TODO list attributes + "info" -> getTitle >> getAuthors >> getDate >> return mempty "articleinfo" -> getTitle >> getAuthors >> getDate >> return mempty "programlisting" -> return $ codeBlock $ strContent e -- TODO attrs "?xml" -> return mempty @@ -584,6 +586,7 @@ parseInline (Elem e) = return $ if qt == SingleQuote then singleQuoted contents else doubleQuoted contents + "code" -> return $ code $ strContent e -- TODO attrs "literal" -> return $ code $ strContent e -- TODO attrs "varname" -> return $ codeWith ("",["varname"],[]) $ strContent e "function" -> return $ codeWith ("",["function"],[]) $ strContent e @@ -595,6 +598,11 @@ parseInline (Elem e) = "ulink" -> link (fromMaybe "" (lookupAttrBy (\attr -> qName attr == "url") (elAttribs e))) "" <$> innerInlines + "link" -> case findAttr (QName "href" Nothing $ Just "xlink") e of + Just href -> link href "" <$> innerInlines + _ -> link ("#"++(fromMaybe "" (lookupAttrBy + (\attr -> qName attr == "linkend") + (elAttribs e)))) "" <$> innerInlines "emphasis" -> case lookupAttrBy (\attr -> qName attr == "role") (elAttribs e) of Just "strong" -> strong <$> innerInlines @@ -604,4 +612,3 @@ parseInline (Elem e) = where innerInlines = (trimInlines . mconcat) <$> (mapM parseInline $ elContent e) parseInline (CRef _) = return mempty - -- cgit v1.2.3