diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/DocBook.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/DocBook.hs | 1055 |
1 files changed, 0 insertions, 1055 deletions
diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs deleted file mode 100644 index bef256a93..000000000 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ /dev/null @@ -1,1055 +0,0 @@ -module Text.Pandoc.Readers.DocBook ( readDocBook ) where -import Data.Char (toUpper) -import Text.Pandoc.Shared (safeRead) -import Text.Pandoc.Options -import Text.Pandoc.Definition -import Text.Pandoc.Builder -import Text.XML.Light -import Text.HTML.TagSoup.Entity (lookupEntity) -import Data.Either (rights) -import Data.Generics -import Data.Char (isSpace) -import Control.Monad.State -import Data.List (intersperse) -import Data.Maybe (fromMaybe) -import Text.TeXMath (readMathML, writeTeX) -import Data.Default -import Data.Foldable (asum) -import Text.Pandoc.Class (PandocMonad) - -{- - -List of all DocBook tags, with [x] indicating implemented, -[o] meaning intentionally left unimplemented (pass through): - -[o] abbrev - An abbreviation, especially one followed by a period -[x] abstract - A summary -[o] accel - A graphical user interface (GUI) keyboard shortcut -[x] ackno - Acknowledgements in an Article -[o] acronym - An often pronounceable word made from the initial -[o] action - A response to a user event -[o] address - A real-world address, generally a postal address -[ ] affiliation - The institutional affiliation of an individual -[ ] alt - Text representation for a graphical element -[o] anchor - A spot in the document -[x] answer - An answer to a question posed in a QandASet -[x] appendix - An appendix in a Book or Article -[x] appendixinfo - Meta-information for an Appendix -[o] application - The name of a software program -[x] area - A region defined for a Callout in a graphic or code example -[x] areaset - A set of related areas in a graphic or code example -[x] areaspec - A collection of regions in a graphic or code example -[ ] arg - An argument in a CmdSynopsis -[x] article - An article -[x] articleinfo - Meta-information for an Article -[ ] artpagenums - The page numbers of an article as published -[x] attribution - The source of a block quote or epigraph -[ ] audiodata - Pointer to external audio data -[ ] audioobject - A wrapper for audio data and its associated meta-information -[x] author - The name of an individual author -[ ] authorblurb - A short description or note about an author -[x] authorgroup - Wrapper for author information when a document has - multiple authors or collabarators -[x] authorinitials - The initials or other short identifier for an author -[o] beginpage - The location of a page break in a print version of the document -[ ] bibliocoverage - The spatial or temporal coverage of a document -[x] bibliodiv - A section of a Bibliography -[x] biblioentry - An entry in a Bibliography -[x] bibliography - A bibliography -[ ] bibliographyinfo - Meta-information for a Bibliography -[ ] biblioid - An identifier for a document -[o] bibliolist - A wrapper for a set of bibliography entries -[ ] bibliomisc - Untyped bibliographic information -[x] bibliomixed - An entry in a Bibliography -[ ] bibliomset - A cooked container for related bibliographic information -[ ] biblioref - A cross reference to a bibliographic entry -[ ] bibliorelation - The relationship of a document to another -[ ] biblioset - A raw container for related bibliographic information -[ ] bibliosource - The source of a document -[ ] blockinfo - Meta-information for a block element -[x] blockquote - A quotation set off from the main text -[x] book - A book -[x] bookinfo - Meta-information for a Book -[x] bridgehead - A free-floating heading -[x] callout - A “called out” description of a marked Area -[x] calloutlist - A list of Callouts -[x] caption - A caption -[x] caution - A note of caution -[x] chapter - A chapter, as of a book -[x] chapterinfo - Meta-information for a Chapter -[ ] citation - An inline bibliographic reference to another published work -[ ] citebiblioid - A citation of a bibliographic identifier -[ ] citerefentry - A citation to a reference page -[ ] citetitle - The title of a cited work -[ ] city - The name of a city in an address -[x] classname - The name of a class, in the object-oriented programming sense -[ ] classsynopsis - The syntax summary for a class definition -[ ] classsynopsisinfo - Information supplementing the contents of - a ClassSynopsis -[ ] cmdsynopsis - A syntax summary for a software command -[ ] co - The location of a callout embedded in text -[x] code - An inline code fragment -[x] col - Specifications for a column in an HTML table -[x] colgroup - A group of columns in an HTML table -[ ] collab - Identifies a collaborator -[ ] collabname - The name of a collaborator -[ ] colophon - Text at the back of a book describing facts about its production -[x] colspec - Specifications for a column in a table -[x] command - The name of an executable program or other software command -[x] computeroutput - Data, generally text, displayed or presented by a computer -[ ] confdates - The dates of a conference for which a document was written -[ ] confgroup - A wrapper for document meta-information about a conference -[ ] confnum - An identifier, frequently numerical, associated with a conference for which a document was written -[ ] confsponsor - The sponsor of a conference for which a document was written -[ ] conftitle - The title of a conference for which a document was written -[x] constant - A programming or system constant -[ ] constraint - A constraint in an EBNF production -[ ] constraintdef - The definition of a constraint in an EBNF production -[ ] constructorsynopsis - A syntax summary for a constructor -[ ] contractnum - The contract number of a document -[ ] contractsponsor - The sponsor of a contract -[ ] contrib - A summary of the contributions made to a document by a - credited source -[ ] copyright - Copyright information about a document -[ ] coref - A cross reference to a co -[ ] corpauthor - A corporate author, as opposed to an individual -[ ] corpcredit - A corporation or organization credited in a document -[ ] corpname - The name of a corporation -[ ] country - The name of a country -[ ] database - The name of a database, or part of a database -[x] date - The date of publication or revision of a document -[ ] dedication - A wrapper for the dedication section of a book -[ ] destructorsynopsis - A syntax summary for a destructor -[ ] edition - The name or number of an edition of a document -[ ] editor - The name of the editor of a document -[x] email - An email address -[x] emphasis - Emphasized text -[x] entry - A cell in a table -[ ] entrytbl - A subtable appearing in place of an Entry in a table -[ ] envar - A software environment variable -[x] epigraph - A short inscription at the beginning of a document or component - note: also handle embedded attribution tag -[x] equation - A displayed mathematical equation -[ ] errorcode - An error code -[ ] errorname - An error name -[ ] errortext - An error message. -[ ] errortype - The classification of an error message -[ ] example - A formal example, with a title -[ ] exceptionname - The name of an exception -[ ] fax - A fax number -[ ] fieldsynopsis - The name of a field in a class definition -[x] figure - A formal figure, generally an illustration, with a title -[x] filename - The name of a file -[ ] firstname - The first name of a person -[ ] firstterm - The first occurrence of a term -[x] footnote - A footnote -[ ] footnoteref - A cross reference to a footnote (a footnote mark) -[x] foreignphrase - A word or phrase in a language other than the primary - language of the document -[x] formalpara - A paragraph with a title -[ ] funcdef - A function (subroutine) name and its return type -[ ] funcparams - Parameters for a function referenced through a function - pointer in a synopsis -[ ] funcprototype - The prototype of a function -[ ] funcsynopsis - The syntax summary for a function definition -[ ] funcsynopsisinfo - Information supplementing the FuncDefs of a FuncSynopsis -[x] function - The name of a function or subroutine, as in a - programming language -[x] glossary - A glossary -[x] glossaryinfo - Meta-information for a Glossary -[x] glossdef - A definition in a GlossEntry -[x] glossdiv - A division in a Glossary -[x] glossentry - An entry in a Glossary or GlossList -[x] glosslist - A wrapper for a set of GlossEntrys -[x] glosssee - A cross-reference from one GlossEntry to another -[x] glossseealso - A cross-reference from one GlossEntry to another -[x] glossterm - A glossary term -[ ] graphic - A displayed graphical object (not an inline) - Note: in DocBook v5 `graphic` is discarded -[ ] graphicco - A graphic that contains callout areas - Note: in DocBook v5 `graphicco` is discarded -[ ] group - A group of elements in a CmdSynopsis -[ ] guibutton - The text on a button in a GUI -[ ] guiicon - Graphic and/or text appearing as a icon in a GUI -[ ] guilabel - The text of a label in a GUI -[x] guimenu - The name of a menu in a GUI -[x] guimenuitem - The name of a terminal menu item in a GUI -[x] guisubmenu - The name of a submenu in a GUI -[ ] hardware - A physical part of a computer system -[ ] highlights - A summary of the main points of the discussed component -[ ] holder - The name of the individual or organization that holds a copyright -[o] honorific - The title of a person -[ ] html:form - An HTML form -[x] imagedata - Pointer to external image data (only `fileref` attribute - implemented but not `entityref` which would require parsing of the DTD) -[x] imageobject - A wrapper for image data and its associated meta-information -[ ] imageobjectco - A wrapper for an image object with callouts -[x] important - An admonition set off from the text -[x] index - An index -[x] indexdiv - A division in an index -[x] indexentry - An entry in an index -[x] indexinfo - Meta-information for an Index -[x] indexterm - A wrapper for terms to be indexed -[x] info - A wrapper for information about a component or other block. (DocBook v5) -[x] informalequation - A displayed mathematical equation without a title -[x] informalexample - A displayed example without a title -[ ] informalfigure - A untitled figure -[ ] informaltable - A table without a title -[ ] initializer - The initializer for a FieldSynopsis -[x] inlineequation - A mathematical equation or expression occurring inline -[ ] inlinegraphic - An object containing or pointing to graphical data - that will be rendered inline -[x] inlinemediaobject - An inline media object (video, audio, image, and so on) -[ ] interface - An element of a GUI -[ ] interfacename - The name of an interface -[ ] invpartnumber - An inventory part number -[ ] isbn - The International Standard Book Number of a document -[ ] issn - The International Standard Serial Number of a periodical -[ ] issuenum - The number of an issue of a journal -[x] itemizedlist - A list in which each entry is marked with a bullet or - other dingbat -[ ] itermset - A set of index terms in the meta-information of a document -[ ] jobtitle - The title of an individual in an organization -[x] keycap - The text printed on a key on a keyboard -[ ] keycode - The internal, frequently numeric, identifier for a key - on a keyboard -[x] keycombo - A combination of input actions -[ ] keysym - The symbolic name of a key on a keyboard -[ ] keyword - One of a set of keywords describing the content of a document -[ ] keywordset - A set of keywords describing the content of a document -[ ] label - A label on a Question or Answer -[ ] legalnotice - A statement of legal obligations or requirements -[ ] lhs - The left-hand side of an EBNF production -[ ] lineage - The portion of a person's name indicating a relationship to - ancestors -[ ] lineannotation - A comment on a line in a verbatim listing -[x] link - A hypertext link -[x] listitem - A wrapper for the elements of a list item -[x] literal - Inline text that is some literal value -[x] literallayout - A block of text in which line breaks and white space are - to be reproduced faithfully -[ ] lot - A list of the titles of formal objects (as tables or figures) in - a document -[ ] lotentry - An entry in a list of titles -[ ] manvolnum - A reference volume number -[x] markup - A string of formatting markup in text that is to be - represented literally -[ ] mathphrase - A mathematical phrase, an expression that can be represented - with ordinary text and a small amount of markup -[ ] medialabel - A name that identifies the physical medium on which some - information resides -[x] mediaobject - A displayed media object (video, audio, image, etc.) -[ ] mediaobjectco - A media object that contains callouts -[x] member - An element of a simple list -[x] menuchoice - A selection or series of selections from a menu -[ ] methodname - The name of a method -[ ] methodparam - Parameters to a method -[ ] methodsynopsis - A syntax summary for a method -[x] mml:math - A MathML equation -[ ] modespec - Application-specific information necessary for the - completion of an OLink -[ ] modifier - Modifiers in a synopsis -[ ] mousebutton - The conventional name of a mouse button -[ ] msg - A message in a message set -[ ] msgaud - The audience to which a message in a message set is relevant -[ ] msgentry - A wrapper for an entry in a message set -[ ] msgexplan - Explanatory material relating to a message in a message set -[ ] msginfo - Information about a message in a message set -[ ] msglevel - The level of importance or severity of a message in a message set -[ ] msgmain - The primary component of a message in a message set -[ ] msgorig - The origin of a message in a message set -[ ] msgrel - A related component of a message in a message set -[ ] msgset - A detailed set of messages, usually error messages -[ ] msgsub - A subcomponent of a message in a message set -[ ] msgtext - The actual text of a message component in a message set -[ ] nonterminal - A non-terminal in an EBNF production -[x] note - A message set off from the text -[ ] objectinfo - Meta-information for an object -[ ] olink - A link that addresses its target indirectly, through an entity -[ ] ooclass - A class in an object-oriented programming language -[ ] ooexception - An exception in an object-oriented programming language -[ ] oointerface - An interface in an object-oriented programming language -[x] option - An option for a software command -[x] optional - Optional information -[x] orderedlist - A list in which each entry is marked with a sequentially - incremented label -[ ] orgdiv - A division of an organization -[ ] orgname - The name of an organization other than a corporation -[ ] otheraddr - Uncategorized information in address -[ ] othercredit - A person or entity, other than an author or editor, - credited in a document -[ ] othername - A component of a persons name that is not a first name, - surname, or lineage -[ ] package - A package -[ ] pagenums - The numbers of the pages in a book, for use in a bibliographic - entry -[x] para - A paragraph -[ ] paramdef - Information about a function parameter in a programming language -[x] parameter - A value or a symbolic reference to a value -[ ] part - A division in a book -[ ] partinfo - Meta-information for a Part -[ ] partintro - An introduction to the contents of a part -[ ] personblurb - A short description or note about a person -[ ] personname - The personal name of an individual -[ ] phone - A telephone number -[ ] phrase - A span of text -[ ] pob - A post office box in an address -[ ] postcode - A postal code in an address -[x] preface - Introductory matter preceding the first chapter of a book -[ ] prefaceinfo - Meta-information for a Preface -[ ] primary - The primary word or phrase under which an index term should be - sorted -[ ] primaryie - A primary term in an index entry, not in the text -[ ] printhistory - The printing history of a document -[ ] procedure - A list of operations to be performed in a well-defined sequence -[ ] production - A production in a set of EBNF productions -[ ] productionrecap - A cross-reference to an EBNF production -[ ] productionset - A set of EBNF productions -[ ] productname - The formal name of a product -[ ] productnumber - A number assigned to a product -[x] programlisting - A literal listing of all or part of a program -[ ] programlistingco - A program listing with associated areas used in callouts -[x] prompt - A character or string indicating the start of an input field in - a computer display -[ ] property - A unit of data associated with some part of a computer system -[ ] pubdate - The date of publication of a document -[ ] publisher - The publisher of a document -[ ] publishername - The name of the publisher of a document -[ ] pubsnumber - A number assigned to a publication other than an ISBN or ISSN - or inventory part number -[x] qandadiv - A titled division in a QandASet -[o] qandaentry - A question/answer set within a QandASet -[o] qandaset - A question-and-answer set -[x] question - A question in a QandASet -[x] quote - An inline quotation -[ ] refclass - The scope or other indication of applicability of a - reference entry -[ ] refdescriptor - A description of the topic of a reference page -[ ] refentry - A reference page (originally a UNIX man-style reference page) -[ ] refentryinfo - Meta-information for a Refentry -[ ] refentrytitle - The title of a reference page -[ ] reference - A collection of reference entries -[ ] referenceinfo - Meta-information for a Reference -[ ] refmeta - Meta-information for a reference entry -[ ] refmiscinfo - Meta-information for a reference entry other than the title - and volume number -[ ] refname - The name of (one of) the subject(s) of a reference page -[ ] refnamediv - The name, purpose, and classification of a reference page -[ ] refpurpose - A short (one sentence) synopsis of the topic of a reference - page -[x] refsect1 - A major subsection of a reference entry -[x] refsect1info - Meta-information for a RefSect1 -[x] refsect2 - A subsection of a RefSect1 -[x] refsect2info - Meta-information for a RefSect2 -[x] refsect3 - A subsection of a RefSect2 -[x] refsect3info - Meta-information for a RefSect3 -[x] refsection - A recursive section in a refentry -[x] refsectioninfo - Meta-information for a refsection -[ ] refsynopsisdiv - A syntactic synopsis of the subject of the reference page -[ ] refsynopsisdivinfo - Meta-information for a RefSynopsisDiv -[x] releaseinfo - Information about a particular release of a document -[ ] remark - A remark (or comment) intended for presentation in a draft - manuscript -[ ] replaceable - Content that may or must be replaced by the user -[ ] returnvalue - The value returned by a function -[ ] revdescription - A extended description of a revision to a document -[ ] revhistory - A history of the revisions to a document -[ ] revision - An entry describing a single revision in the history of the - revisions to a document -[ ] revnumber - A document revision number -[ ] revremark - A description of a revision to a document -[ ] rhs - The right-hand side of an EBNF production -[x] row - A row in a table -[ ] sbr - An explicit line break in a command synopsis -[x] screen - Text that a user sees or might see on a computer screen -[o] screenco - A screen with associated areas used in callouts -[o] screeninfo - Information about how a screen shot was produced -[ ] screenshot - A representation of what the user sees or might see on a - computer screen -[ ] secondary - A secondary word or phrase in an index term -[ ] secondaryie - A secondary term in an index entry, rather than in the text -[x] sect1 - A top-level section of document -[x] sect1info - Meta-information for a Sect1 -[x] sect2 - A subsection within a Sect1 -[x] sect2info - Meta-information for a Sect2 -[x] sect3 - A subsection within a Sect2 -[x] sect3info - Meta-information for a Sect3 -[x] sect4 - A subsection within a Sect3 -[x] sect4info - Meta-information for a Sect4 -[x] sect5 - A subsection within a Sect4 -[x] sect5info - Meta-information for a Sect5 -[x] section - A recursive section -[x] sectioninfo - Meta-information for a recursive section -[x] see - Part of an index term directing the reader instead to another entry - in the index -[x] seealso - Part of an index term directing the reader also to another entry - in the index -[ ] seealsoie - A See also entry in an index, rather than in the text -[ ] seeie - A See entry in an index, rather than in the text -[x] seg - An element of a list item in a segmented list -[x] seglistitem - A list item in a segmented list -[x] segmentedlist - A segmented list, a list of sets of elements -[x] segtitle - The title of an element of a list item in a segmented list -[ ] seriesvolnums - Numbers of the volumes in a series of books -[ ] set - A collection of books -[ ] setindex - An index to a set of books -[ ] setindexinfo - Meta-information for a SetIndex -[ ] setinfo - Meta-information for a Set -[ ] sgmltag - A component of SGML markup -[ ] shortaffil - A brief description of an affiliation -[ ] shortcut - A key combination for an action that is also accessible through - a menu -[ ] sidebar - A portion of a document that is isolated from the main - narrative flow -[ ] sidebarinfo - Meta-information for a Sidebar -[x] simpara - A paragraph that contains only text and inline markup, no block - elements -[x] simplelist - An undecorated list of single words or short phrases -[ ] simplemsgentry - A wrapper for a simpler entry in a message set -[ ] simplesect - A section of a document with no subdivisions -[ ] spanspec - Formatting information for a spanned column in a table -[ ] state - A state or province in an address -[ ] step - A unit of action in a procedure -[ ] stepalternatives - Alternative steps in a procedure -[ ] street - A street address in an address -[ ] structfield - A field in a structure (in the programming language sense) -[ ] structname - The name of a structure (in the programming language sense) -[ ] subject - One of a group of terms describing the subject matter of a - document -[ ] subjectset - A set of terms describing the subject matter of a document -[ ] subjectterm - A term in a group of terms describing the subject matter of - a document -[x] subscript - A subscript (as in H2O, the molecular formula for water) -[ ] substeps - A wrapper for steps that occur within steps in a procedure -[x] subtitle - The subtitle of a document -[x] superscript - A superscript (as in x2, the mathematical notation for x - multiplied by itself) -[ ] surname - A family name; in western cultures the last name -[ ] svg:svg - An SVG graphic -[x] symbol - A name that is replaced by a value before processing -[ ] synopfragment - A portion of a CmdSynopsis broken out from the main body - of the synopsis -[ ] synopfragmentref - A reference to a fragment of a command synopsis -[ ] synopsis - A general-purpose element for representing the syntax of - commands or functions -[ ] systemitem - A system-related item or term -[ ] table - A formal table in a document -[ ] task - A task to be completed -[ ] taskprerequisites - The prerequisites for a task -[ ] taskrelated - Information related to a task -[ ] tasksummary - A summary of a task -[x] tbody - A wrapper for the rows of a table or informal table -[x] td - A table entry in an HTML table -[x] term - The word or phrase being defined or described in a variable list -[ ] termdef - An inline term definition -[ ] tertiary - A tertiary word or phrase in an index term -[ ] tertiaryie - A tertiary term in an index entry, rather than in the text -[ ] textdata - Pointer to external text data -[ ] textobject - A wrapper for a text description of an object and its - associated meta-information -[ ] tfoot - A table footer consisting of one or more rows -[x] tgroup - A wrapper for the main content of a table, or part of a table -[x] th - A table header entry in an HTML table -[x] thead - A table header consisting of one or more rows -[x] tip - A suggestion to the user, set off from the text -[x] title - The text of the title of a section of a document or of a formal - block-level element -[x] titleabbrev - The abbreviation of a Title -[x] toc - A table of contents -[x] tocback - An entry in a table of contents for a back matter component -[x] tocchap - An entry in a table of contents for a component in the body of - a document -[x] tocentry - A component title in a table of contents -[x] tocfront - An entry in a table of contents for a front matter component -[x] toclevel1 - A top-level entry within a table of contents entry for a - chapter-like component -[x] toclevel2 - A second-level entry within a table of contents entry for a - chapter-like component -[x] toclevel3 - A third-level entry within a table of contents entry for a - chapter-like component -[x] toclevel4 - A fourth-level entry within a table of contents entry for a - chapter-like component -[x] toclevel5 - A fifth-level entry within a table of contents entry for a - chapter-like component -[x] tocpart - An entry in a table of contents for a part of a book -[ ] token - A unit of information -[x] tr - A row in an HTML table -[ ] trademark - A trademark -[x] type - The classification of a value -[x] ulink - A link that addresses its target by means of a URL - (Uniform Resource Locator) -[x] uri - A Uniform Resource Identifier -[x] userinput - Data entered by the user -[x] varargs - An empty element in a function synopsis indicating a variable - number of arguments -[x] variablelist - A list in which each entry is composed of a set of one or - more terms and an associated description -[x] varlistentry - A wrapper for a set of terms and the associated description - in a variable list -[x] varname - The name of a variable -[ ] videodata - Pointer to external video data -[ ] videoobject - A wrapper for video data and its associated meta-information -[ ] void - An empty element in a function synopsis indicating that the - function in question takes no arguments -[ ] volumenum - The volume number of a document in a set (as of books in a set - or articles in a journal) -[x] warning - An admonition set off from the text -[x] wordasword - A word meant specifically as a word and not representing - anything else -[x] xref - A cross reference to another part of the document -[ ] year - The year of publication of a document -[x] ?asciidoc-br? - line break from asciidoc docbook output --} - -type DB m = StateT DBState m - -data DBState = DBState{ dbSectionLevel :: Int - , dbQuoteType :: QuoteType - , dbMeta :: Meta - , dbAcceptsMeta :: Bool - , dbBook :: Bool - , dbFigureTitle :: Inlines - , dbContent :: [Content] - } deriving Show - -instance Default DBState where - def = DBState{ dbSectionLevel = 0 - , dbQuoteType = DoubleQuote - , dbMeta = mempty - , dbAcceptsMeta = False - , dbBook = False - , dbFigureTitle = mempty - , dbContent = [] } - - -readDocBook :: PandocMonad m => ReaderOptions -> String -> m Pandoc -readDocBook _ inp = do - let tree = normalizeTree . parseXML . handleInstructions $ inp - (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock $ tree - return $ Pandoc (dbMeta st') (toList . mconcat $ bs) - --- We treat <?asciidoc-br?> specially (issue #1236), converting it --- to <br/>, since xml-light doesn't parse the instruction correctly. --- Other xml instructions are simply removed from the input stream. -handleInstructions :: String -> String -handleInstructions ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions xs -handleInstructions xs = case break (=='<') xs of - (ys, []) -> ys - ([], '<':zs) -> '<' : handleInstructions zs - (ys, zs) -> ys ++ handleInstructions zs - -getFigure :: PandocMonad m => Element -> DB m Blocks -getFigure e = do - tit <- case filterChild (named "title") e of - Just t -> getInlines t - Nothing -> return mempty - modify $ \st -> st{ dbFigureTitle = tit } - res <- getBlocks e - modify $ \st -> st{ dbFigureTitle = mempty } - return res - --- normalize input, consolidating adjacent Text and CRef elements -normalizeTree :: [Content] -> [Content] -normalizeTree = everywhere (mkT go) - where go :: [Content] -> [Content] - go (Text (CData CDataRaw _ _):xs) = xs - go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) = - Text (CData CDataText (s1 ++ s2) z):xs - go (Text (CData CDataText s1 z):CRef r:xs) = - Text (CData CDataText (s1 ++ convertEntity r) z):xs - go (CRef r:Text (CData CDataText s1 z):xs) = - Text (CData CDataText (convertEntity r ++ s1) z):xs - go (CRef r1:CRef r2:xs) = - Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs - go xs = xs - -convertEntity :: String -> String -convertEntity e = maybe (map toUpper e) id (lookupEntity e) - --- convenience function to get an attribute value, defaulting to "" -attrValue :: String -> Element -> String -attrValue attr elt = - case lookupAttrBy (\x -> qName x == attr) (elAttribs elt) of - Just z -> z - Nothing -> "" - --- convenience function -named :: String -> Element -> Bool -named s e = qName (elName e) == s - --- - -acceptingMetadata :: PandocMonad m => DB m a -> DB m a -acceptingMetadata p = do - modify (\s -> s { dbAcceptsMeta = True } ) - res <- p - modify (\s -> s { dbAcceptsMeta = False }) - return res - -checkInMeta :: (PandocMonad m, Monoid a) => DB m () -> DB m a -checkInMeta p = do - accepts <- dbAcceptsMeta <$> get - when accepts p - return mempty - -addMeta :: PandocMonad m => ToMetaValue a => String -> a -> DB m () -addMeta field val = modify (setMeta field val) - -instance HasMeta DBState where - setMeta field v s = s {dbMeta = setMeta field v (dbMeta s)} - deleteMeta field s = s {dbMeta = deleteMeta field (dbMeta s)} - -isBlockElement :: Content -> Bool -isBlockElement (Elem e) = qName (elName e) `elem` blocktags - where blocktags = ["toc","index","para","formalpara","simpara", - "ackno","epigraph","blockquote","bibliography","bibliodiv", - "biblioentry","glossee","glosseealso","glossary", - "glossdiv","glosslist","chapter","appendix","preface", - "bridgehead","sect1","sect2","sect3","sect4","sect5","section", - "refsect1","refsect2","refsect3","refsection", - "important","caution","note","tip","warning","qandadiv", - "question","answer","abstract","itemizedlist","orderedlist", - "variablelist","article","book","table","informaltable", - "informalexample", "linegroup", - "screen","programlisting","example","calloutlist"] -isBlockElement _ = False - --- Trim leading and trailing newline characters -trimNl :: String -> String -trimNl = reverse . go . reverse . go - where go ('\n':xs) = xs - go xs = xs - --- meld text into beginning of first paragraph of Blocks. --- assumes Blocks start with a Para; if not, does nothing. -addToStart :: Inlines -> Blocks -> Blocks -addToStart toadd bs = - case toList bs of - (Para xs : rest) -> para (toadd <> fromList xs) <> fromList rest - _ -> bs - --- function that is used by both mediaobject (in parseBlock) --- and inlinemediaobject (in parseInline) --- A DocBook mediaobject is a wrapper around a set of alternative presentations -getMediaobject :: PandocMonad m => Element -> DB m Inlines -getMediaobject e = do - (imageUrl, attr) <- - case filterChild (named "imageobject") e of - Nothing -> return (mempty, nullAttr) - Just z -> case filterChild (named "imagedata") z of - Nothing -> return (mempty, nullAttr) - Just i -> let atVal a = attrValue a i - w = case atVal "width" of - "" -> [] - d -> [("width", d)] - h = case atVal "depth" of - "" -> [] - d -> [("height", d)] - atr = (atVal "id", words $ atVal "role", w ++ h) - in return (atVal "fileref", atr) - let getCaption el = case filterChild (\x -> named "caption" x - || named "textobject" x - || named "alt" x) el of - Nothing -> return mempty - Just z -> mconcat <$> (mapM parseInline $ elContent z) - figTitle <- gets dbFigureTitle - let (caption, title) = if isNull figTitle - then (getCaption e, "") - else (return figTitle, "fig:") - liftM (imageWith attr imageUrl title) caption - -getBlocks :: PandocMonad m => Element -> DB m Blocks -getBlocks e = mconcat <$> (mapM parseBlock $ elContent e) - - -parseBlock :: PandocMonad m => Content -> DB m Blocks -parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE -parseBlock (Text (CData _ s _)) = if all isSpace s - then return mempty - else return $ plain $ trimInlines $ text s -parseBlock (CRef x) = return $ plain $ str $ map toUpper x -parseBlock (Elem e) = - case qName (elName e) of - "toc" -> return mempty -- skip TOC, since in pandoc it's autogenerated - "index" -> return mempty -- skip index, since page numbers meaningless - "para" -> parseMixed para (elContent e) - "formalpara" -> do - tit <- case filterChild (named "title") e of - Just t -> (para . strong . (<> str ".")) <$> - getInlines t - Nothing -> return mempty - (tit <>) <$> parseMixed para (elContent e) - "simpara" -> parseMixed para (elContent e) - "ackno" -> parseMixed para (elContent e) - "epigraph" -> parseBlockquote - "blockquote" -> parseBlockquote - "attribution" -> return mempty - "titleabbrev" -> return mempty - "authorinitials" -> return mempty - "title" -> checkInMeta getTitle - "author" -> checkInMeta getAuthor - "authorgroup" -> checkInMeta getAuthorGroup - "releaseinfo" -> checkInMeta (getInlines e >>= addMeta "release") - "date" -> checkInMeta getDate - "bibliography" -> sect 0 - "bibliodiv" -> sect 1 - "biblioentry" -> parseMixed para (elContent e) - "bibliomixed" -> parseMixed para (elContent e) - "glosssee" -> para . (\ils -> text "See " <> ils <> str ".") - <$> getInlines e - "glossseealso" -> para . (\ils -> text "See also " <> ils <> str ".") - <$> getInlines e - "glossary" -> sect 0 - "glossdiv" -> definitionList <$> - mapM parseGlossEntry (filterChildren (named "glossentry") e) - "glosslist" -> definitionList <$> - mapM parseGlossEntry (filterChildren (named "glossentry") e) - "chapter" -> sect 0 - "appendix" -> sect 0 - "preface" -> sect 0 - "bridgehead" -> para . strong <$> getInlines e - "sect1" -> sect 1 - "sect2" -> sect 2 - "sect3" -> sect 3 - "sect4" -> sect 4 - "sect5" -> sect 5 - "section" -> gets dbSectionLevel >>= sect . (+1) - "refsect1" -> sect 1 - "refsect2" -> sect 2 - "refsect3" -> sect 3 - "refsection" -> gets dbSectionLevel >>= sect . (+1) - "important" -> blockQuote . (para (strong $ str "Important") <>) - <$> getBlocks e - "caution" -> blockQuote . (para (strong $ str "Caution") <>) - <$> getBlocks e - "note" -> blockQuote . (para (strong $ str "Note") <>) - <$> getBlocks e - "tip" -> blockQuote . (para (strong $ str "Tip") <>) - <$> getBlocks e - "warning" -> blockQuote . (para (strong $ str "Warning") <>) - <$> getBlocks e - "area" -> return mempty - "areaset" -> return mempty - "areaspec" -> return mempty - "qandadiv" -> gets dbSectionLevel >>= sect . (+1) - "question" -> addToStart (strong (str "Q:") <> str " ") <$> getBlocks e - "answer" -> addToStart (strong (str "A:") <> str " ") <$> getBlocks e - "abstract" -> blockQuote <$> getBlocks e - "calloutlist" -> bulletList <$> callouts - "itemizedlist" -> bulletList <$> listitems - "orderedlist" -> do - let listStyle = case attrValue "numeration" e of - "arabic" -> Decimal - "loweralpha" -> LowerAlpha - "upperalpha" -> UpperAlpha - "lowerroman" -> LowerRoman - "upperroman" -> UpperRoman - _ -> Decimal - let start = fromMaybe 1 $ - (attrValue "override" <$> filterElement (named "listitem") e) - >>= safeRead - orderedListWith (start,listStyle,DefaultDelim) - <$> listitems - "variablelist" -> definitionList <$> deflistitems - "figure" -> getFigure e - "mediaobject" -> para <$> getMediaobject e - "caption" -> return mempty - "info" -> metaBlock - "articleinfo" -> metaBlock - "sectioninfo" -> return mempty -- keywords & other metadata - "refsectioninfo" -> return mempty -- keywords & other metadata - "refsect1info" -> return mempty -- keywords & other metadata - "refsect2info" -> return mempty -- keywords & other metadata - "refsect3info" -> return mempty -- keywords & other metadata - "sect1info" -> return mempty -- keywords & other metadata - "sect2info" -> return mempty -- keywords & other metadata - "sect3info" -> return mempty -- keywords & other metadata - "sect4info" -> return mempty -- keywords & other metadata - "sect5info" -> return mempty -- keywords & other metadata - "chapterinfo" -> return mempty -- keywords & other metadata - "glossaryinfo" -> return mempty -- keywords & other metadata - "appendixinfo" -> return mempty -- keywords & other metadata - "bookinfo" -> metaBlock - "article" -> modify (\st -> st{ dbBook = False }) >> - getBlocks e - "book" -> modify (\st -> st{ dbBook = True }) >> getBlocks e - "table" -> parseTable - "informaltable" -> parseTable - "informalexample" -> divWith ("", ["informalexample"], []) <$> - getBlocks e - "linegroup" -> lineBlock <$> lineItems - "literallayout" -> codeBlockWithLang - "screen" -> codeBlockWithLang - "programlisting" -> codeBlockWithLang - "?xml" -> return mempty - _ -> getBlocks e - where parseMixed container conts = do - let (ils,rest) = break isBlockElement conts - ils' <- (trimInlines . mconcat) <$> mapM parseInline ils - let p = if ils' == mempty then mempty else container ils' - case rest of - [] -> return p - (r:rs) -> do - b <- parseBlock r - x <- parseMixed container rs - return $ p <> b <> x - codeBlockWithLang = do - let classes' = case attrValue "language" e of - "" -> [] - x -> [x] - return $ codeBlockWith (attrValue "id" e, classes', []) - $ trimNl $ strContentRecursive e - parseBlockquote = do - attrib <- case filterChild (named "attribution") e of - Nothing -> return mempty - Just z -> (para . (str "— " <>) . mconcat) - <$> (mapM parseInline $ elContent z) - contents <- getBlocks e - return $ blockQuote (contents <> attrib) - listitems = mapM getBlocks $ filterChildren (named "listitem") e - callouts = mapM getBlocks $ filterChildren (named "callout") e - deflistitems = mapM parseVarListEntry $ filterChildren - (named "varlistentry") e - parseVarListEntry e' = do - let terms = filterChildren (named "term") e' - let items = filterChildren (named "listitem") e' - terms' <- mapM getInlines terms - items' <- mapM getBlocks items - return (mconcat $ intersperse (str "; ") terms', items') - parseGlossEntry e' = do - let terms = filterChildren (named "glossterm") e' - let items = filterChildren (named "glossdef") e' - terms' <- mapM getInlines terms - items' <- mapM getBlocks items - return (mconcat $ intersperse (str "; ") terms', items') - getTitle = do - tit <- getInlines e - subtit <- case filterChild (named "subtitle") e of - Just s -> (text ": " <>) <$> - getInlines s - Nothing -> return mempty - addMeta "title" (tit <> subtit) - - getAuthor = (:[]) <$> getInlines e >>= addMeta "author" - getAuthorGroup = do - let terms = filterChildren (named "author") e - mapM getInlines terms >>= addMeta "author" - getDate = getInlines e >>= addMeta "date" - parseTable = do - let isCaption x = named "title" x || named "caption" x - caption <- case filterChild isCaption e of - Just t -> getInlines t - Nothing -> return mempty - let e' = fromMaybe e $ filterChild (named "tgroup") e - let isColspec x = named "colspec" x || named "col" x - let colspecs = case filterChild (named "colgroup") e' of - Just c -> filterChildren isColspec c - _ -> filterChildren isColspec e' - let isRow x = named "row" x || named "tr" x - headrows <- case filterChild (named "thead") e' of - Just h -> case filterChild isRow h of - Just x -> parseRow x - Nothing -> return [] - Nothing -> return [] - bodyrows <- case filterChild (named "tbody") e' of - Just b -> mapM parseRow - $ filterChildren isRow b - Nothing -> mapM parseRow - $ filterChildren isRow e' - let toAlignment c = case findAttr (unqual "align") c of - Just "left" -> AlignLeft - Just "right" -> AlignRight - Just "center" -> AlignCenter - _ -> AlignDefault - let toWidth c = case findAttr (unqual "colwidth") c of - Just w -> fromMaybe 0 - $ safeRead $ '0': filter (\x -> - (x >= '0' && x <= '9') - || x == '.') w - Nothing -> 0 :: Double - let numrows = case bodyrows of - [] -> 0 - xs -> maximum $ map length xs - let aligns = case colspecs of - [] -> replicate numrows AlignDefault - cs -> map toAlignment cs - let widths = case colspecs of - [] -> replicate numrows 0 - cs -> let ws = map toWidth cs - tot = sum ws - in if all (> 0) ws - then map (/ tot) ws - else replicate numrows 0 - let headrows' = if null headrows - then replicate numrows mempty - else headrows - return $ table caption (zip aligns widths) - headrows' bodyrows - isEntry x = named "entry" x || named "td" x || named "th" x - parseRow = mapM (parseMixed plain . elContent) . filterChildren isEntry - sect n = do isbook <- gets dbBook - let n' = if isbook || n == 0 then n + 1 else n - headerText <- case filterChild (named "title") e `mplus` - (filterChild (named "info") e >>= - filterChild (named "title")) of - Just t -> getInlines t - Nothing -> return mempty - modify $ \st -> st{ dbSectionLevel = n } - b <- getBlocks e - let ident = attrValue "id" e - modify $ \st -> st{ dbSectionLevel = n - 1 } - return $ headerWith (ident,[],[]) n' headerText <> b - lineItems = mapM getInlines $ filterChildren (named "line") e - metaBlock = acceptingMetadata (getBlocks e) >> return mempty - -getInlines :: PandocMonad m => Element -> DB m Inlines -getInlines e' = (trimInlines . mconcat) <$> (mapM parseInline $ elContent e') - -strContentRecursive :: Element -> String -strContentRecursive = strContent . - (\e' -> e'{ elContent = map elementToStr $ elContent e' }) - -elementToStr :: Content -> Content -elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing -elementToStr x = x - -parseInline :: PandocMonad m => Content -> DB m Inlines -parseInline (Text (CData _ s _)) = return $ text s -parseInline (CRef ref) = - return $ maybe (text $ map toUpper ref) (text) $ lookupEntity ref -parseInline (Elem e) = - case qName (elName e) of - "equation" -> equation displayMath - "informalequation" -> equation displayMath - "inlineequation" -> equation math - "subscript" -> subscript <$> innerInlines - "superscript" -> superscript <$> innerInlines - "inlinemediaobject" -> getMediaobject e - "quote" -> do - qt <- gets dbQuoteType - let qt' = if qt == SingleQuote then DoubleQuote else SingleQuote - modify $ \st -> st{ dbQuoteType = qt' } - contents <- innerInlines - modify $ \st -> st{ dbQuoteType = qt } - return $ if qt == SingleQuote - then singleQuoted contents - else doubleQuoted contents - "simplelist" -> simpleList - "segmentedlist" -> segmentedList - "classname" -> codeWithLang - "code" -> codeWithLang - "filename" -> codeWithLang - "literal" -> codeWithLang - "computeroutput" -> codeWithLang - "prompt" -> codeWithLang - "parameter" -> codeWithLang - "option" -> codeWithLang - "optional" -> do x <- getInlines e - return $ str "[" <> x <> str "]" - "markup" -> codeWithLang - "wordasword" -> emph <$> innerInlines - "command" -> codeWithLang - "varname" -> codeWithLang - "function" -> codeWithLang - "type" -> codeWithLang - "symbol" -> codeWithLang - "constant" -> codeWithLang - "userinput" -> codeWithLang - "varargs" -> return $ code "(...)" - "keycap" -> return (str $ strContent e) - "keycombo" -> keycombo <$> (mapM parseInline $ elContent e) - "menuchoice" -> menuchoice <$> (mapM parseInline $ - filter isGuiMenu $ elContent e) - "xref" -> do - content <- dbContent <$> get - let linkend = attrValue "linkend" e - let title = case attrValue "endterm" e of - "" -> maybe "???" xrefTitleByElem - (findElementById linkend content) - endterm -> maybe "???" strContent - (findElementById endterm content) - return $ link ('#' : linkend) "" (text title) - "email" -> return $ link ("mailto:" ++ strContent e) "" - $ str $ strContent e - "uri" -> return $ link (strContent e) "" $ str $ strContent e - "ulink" -> link (attrValue "url" e) "" <$> innerInlines - "link" -> do - ils <- innerInlines - let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of - Just h -> h - _ -> ('#' : attrValue "linkend" e) - let ils' = if ils == mempty then str href else ils - let attr = (attrValue "id" e, words $ attrValue "role" e, []) - return $ linkWith attr href "" ils' - "foreignphrase" -> emph <$> innerInlines - "emphasis" -> case attrValue "role" e of - "bold" -> strong <$> innerInlines - "strong" -> strong <$> innerInlines - "strikethrough" -> strikeout <$> innerInlines - _ -> emph <$> innerInlines - "footnote" -> (note . mconcat) <$> (mapM parseBlock $ elContent e) - "title" -> return mempty - "affiliation" -> return mempty - -- Note: this isn't a real docbook tag; it's what we convert - -- <?asciidor-br?> to in handleInstructions, above. A kludge to - -- work around xml-light's inability to parse an instruction. - "br" -> return linebreak - _ -> innerInlines - where innerInlines = (trimInlines . mconcat) <$> - (mapM parseInline $ elContent e) - equation constructor = return $ mconcat $ - map (constructor . writeTeX) - $ rights - $ map (readMathML . showElement . everywhere (mkT removePrefix)) - $ filterChildren (\x -> qName (elName x) == "math" && - qPrefix (elName x) == Just "mml") e - removePrefix elname = elname { qPrefix = Nothing } - codeWithLang = do - let classes' = case attrValue "language" e of - "" -> [] - l -> [l] - return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e - simpleList = (mconcat . intersperse (str "," <> space)) <$> mapM getInlines - (filterChildren (named "member") e) - segmentedList = do - tit <- maybe (return mempty) getInlines $ filterChild (named "title") e - segtits <- mapM getInlines $ filterChildren (named "segtitle") e - segitems <- mapM (mapM getInlines . filterChildren (named "seg")) - $ filterChildren (named "seglistitem") e - let toSeg = mconcat . zipWith (\x y -> strong (x <> str ":") <> space <> - y <> linebreak) segtits - let segs = mconcat $ map toSeg segitems - let tit' = if tit == mempty - then mempty - else strong tit <> linebreak - return $ linebreak <> tit' <> segs - keycombo = spanWith ("",["keycombo"],[]) . - mconcat . intersperse (str "+") - menuchoice = spanWith ("",["menuchoice"],[]) . - mconcat . intersperse (text " > ") - isGuiMenu (Elem x) = named "guimenu" x || named "guisubmenu" x || - named "guimenuitem" x - isGuiMenu _ = False - - findElementById idString content - = asum [filterElement (\x -> attrValue "id" x == idString) el | Elem el <- content] - - -- Use the 'xreflabel' attribute for getting the title of a xref link; - -- if there's no such attribute, employ some heuristics based on what - -- docbook-xsl does. - xrefTitleByElem el - | not (null xrefLabel) = xrefLabel - | otherwise = case qName (elName el) of - "chapter" -> descendantContent "title" el - "sect1" -> descendantContent "title" el - "sect2" -> descendantContent "title" el - "sect3" -> descendantContent "title" el - "sect4" -> descendantContent "title" el - "sect5" -> descendantContent "title" el - "cmdsynopsis" -> descendantContent "command" el - "funcsynopsis" -> descendantContent "function" el - _ -> qName (elName el) ++ "_title" - where - xrefLabel = attrValue "xreflabel" el - descendantContent name = maybe "???" strContent - . filterElementName (\n -> qName n == name) |