From 64fe39c255357c25fc636c46bc3bdfd31257b445 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 28 Mar 2017 09:36:07 +0200 Subject: Added default.jats template. This is copied from Martin Fenner's pandoc-jats project: https://github.com/mfenner/pandoc-jats --- COPYRIGHT | 4 + data/templates/default.jats | 193 ++++++++++++++++++++++++++++++++++++++++++++ pandoc.cabal | 1 + 3 files changed, 198 insertions(+) create mode 100644 data/templates/default.jats diff --git a/COPYRIGHT b/COPYRIGHT index b52c4267e..c7b0f58f7 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -106,3 +106,7 @@ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +------------------------------------------------------------------------ +The template pandoc.jats is Copyright 2013--15 Martin Fenner, +released under GPL version 2 or later. diff --git a/data/templates/default.jats b/data/templates/default.jats new file mode 100644 index 000000000..37f8f16a8 --- /dev/null +++ b/data/templates/default.jats @@ -0,0 +1,193 @@ + + +$if(article.type)$ +
+$else$ +
+$endif$ + + +$if(journal.publisher-id)$ +$journal.publisher-id$ +$endif$ +$if(journal.nlm-ta)$ +$journal.nlm-ta$ +$endif$ +$if(journal.pmc)$ +$journal.pmc$ +$endif$ + +$if(journal.title)$ +$journal.title$ +$endif$ +$if(journal.abbrev-title)$ +$journal.abbrev-title$ +$endif$ + +$if(journal.pissn)$ +$journal.pissn$ +$endif$ +$if(journal.eissn)$ +$journal.eissn$ +$endif$ + +$journal.publisher-name$ +$if(journal.publisher-loc)$ +$journal.publisher-loc$ +$endif$ + + + +$if(article.publisher-id)$ +$article.publisher-id$ +$endif$ +$if(article.doi)$ +$article.doi$ +$endif$ +$if(article.pmid)$ +$article.pmid$ +$endif$ +$if(article.pmcid)$ +$article.pmcid$ +$endif$ +$if(article.art-access-id)$ +$article.art-access-id$ +$endif$ +$if(article.heading)$ + + +$article.heading$ + +$if(article.categories)$ + +$for(article.categories)$ +$article.categories$ +$endfor$ + +$endif$ + +$endif$ +$if(title)$ + +$title$ + +$endif$ +$if(author)$ + +$for(author)$ + +$if(author.orcid)$ +$author.orcid$ +$endif$ + +$if(author.surname)$ +$author.surname$ +$author.given-names$ +$else$ +$author$ +$endif$ + +$if(author.email)$ +$author.email$ +$endif$ +$if(author.aff-id)$ + +$endif$ +$if(author.cor-id)$ +* +$endif$ + +$endfor$ + +$endif$ +$if(article.author-notes)$ + +$if(article.author-notes.corresp)$ +$for(article.author-notes.corresp)$ +* E-mail: $article.author-notes.corresp.email$ +$endfor$ +$endif$ +$if(article.author-notes.conflict)$ +

$article.author-notes.conflict$

+$endif$ +$if(article.author-notes.con)$ +

$article.author-notes.con$

+$endif$ +
+$endif$ +$if(date)$ +$if(date.iso-8601)$ + +$else$ + +$endif$ +$if(date.day)$ +$pub-date.day$ +$endif$ +$if(date.month)$ +$pub-date.month$ +$endif$ +$if(date.year)$ +$pub-date.year$ +$else$ +$date$ +$endif$ + +$endif$ +$if(article.volume)$ +$article.volume$ +$endif$ +$if(article.issue)$ +$article.issue$ +$endif$ +$if(article.fpage)$ +$article.fpage$ +$endif$ +$if(article.lpage)$ +$article.lpage$ +$endif$ +$if(article.elocation-id)$ +$article.elocation-id$ +$endif$ +$if(history)$ + + +$endif$ +$if(copyright)$ + +$if(copyright.statement)$ +$copyright.statement$ +$endif$ +$if(copyright.year)$ +$copyright.year$ +$endif$ +$if(copyright.holder)$ +$copyright.holder$ +$endif$ +$if(copyright.text)$ + +$copyright.text$ + + +$endif$ +$endif$ +$if(tags)$ + +$for(tags)$ +$tags$ +$endfor$ + +$endif$ +$if(article.funding-statement)$ + +$article.funding-statement$ + +$endif$ +
+$if(notes)$ +$notes$ +$endif$ +
+$body$ +
diff --git a/pandoc.cabal b/pandoc.cabal index 592c308e3..596b52c44 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -41,6 +41,7 @@ Data-Files: data/templates/default.html5 data/templates/default.docbook4 data/templates/default.docbook5 + data/templates/default.jats data/templates/default.tei data/templates/default.beamer data/templates/default.opendocument -- cgit v1.2.3 From 831e1c5edd4703b6ab0953a79980e37ea1bee5dc Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 28 Mar 2017 09:51:30 +0200 Subject: Added JATS writer. * New module Text.Pandoc.Writer.JATS exporting writeJATS. * New output format `jats`. * Added tests. * Revised manual. --- MANUAL.txt | 12 +- pandoc.cabal | 3 + src/Text/Pandoc.hs | 3 + src/Text/Pandoc/Writers/JATS.hs | 429 ++++++++++++ test/Tests/Old.hs | 3 + test/tables.jats | 616 +++++++++++++++++ test/writer.jats | 1425 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 2485 insertions(+), 6 deletions(-) create mode 100644 src/Text/Pandoc/Writers/JATS.hs create mode 100644 test/tables.jats create mode 100644 test/writer.jats diff --git a/MANUAL.txt b/MANUAL.txt index e11e57459..f0e951f75 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -291,9 +291,9 @@ General options (LaTeX), `beamer` (LaTeX beamer slide show), `context` (ConTeXt), `man` (groff man), `mediawiki` (MediaWiki markup), `dokuwiki` (DokuWiki markup), `zimwiki` (ZimWiki markup), - `textile` (Textile), `org` (Emacs Org mode), - `texinfo` (GNU Texinfo), `opml` (OPML), `docbook` or `docbook4` - (DocBook 4), `docbook5` (DocBook 5), `opendocument` (OpenDocument), + `textile` (Textile), `org` (Emacs Org mode), `texinfo` (GNU Texinfo), + `opml` (OPML), `docbook` or `docbook4` (DocBook 4), `docbook5` + (DocBook 5), `jats` (JATS XML), `opendocument` (OpenDocument), `odt` (OpenOffice text document), `docx` (Word docx), `haddock` (Haddock markup), `rtf` (rich text format), `epub2` (EPUB v2 book), `epub` or `epub3` (EPUB v3), `fb2` (FictionBook2 e-book), @@ -616,8 +616,8 @@ General writer options : Include an automatically generated table of contents (or, in the case of `latex`, `context`, `docx`, `rst`, or `ms`, an instruction to create one) in the output document. This - option has no effect on `man`, `docbook4`, `docbook5`, `slidy`, - `slideous`, `s5`, or `odt` output. + option has no effect on `man`, `docbook4`, `docbook5`, `jats`, + `slidy`, `slideous`, `s5`, or `odt` output. `--toc-depth=`*NUMBER* @@ -1000,7 +1000,7 @@ Math rendering in HTML `--mathml` -: Convert TeX math to [MathML] (in `docbook4`, `docbook5`, +: Convert TeX math to [MathML] (in `docbook4`, `docbook5`, `jats`, `html4` and `html5`). `--jsmath`[`=`*URL*] diff --git a/pandoc.cabal b/pandoc.cabal index 596b52c44..33694dec0 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -159,6 +159,7 @@ Extra-Source-Files: test/tables.context test/tables.docbook4 test/tables.docbook5 + test/tables.jats test/tables.dokuwiki test/tables.zimwiki test/tables.icml @@ -187,6 +188,7 @@ Extra-Source-Files: test/writer.context test/writer.docbook4 test/writer.docbook5 + test/writer.jats test/writer.html4 test/writer.html5 test/writer.man @@ -381,6 +383,7 @@ Library Text.Pandoc.Readers.EPUB, Text.Pandoc.Writers.Native, Text.Pandoc.Writers.Docbook, + Text.Pandoc.Writers.JATS, Text.Pandoc.Writers.OPML, Text.Pandoc.Writers.HTML, Text.Pandoc.Writers.ICML, diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index e77bc6d45..977ad1ab4 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -117,6 +117,7 @@ module Text.Pandoc , writeICML , writeDocbook4 , writeDocbook5 + , writeJATS , writeOPML , writeOpenDocument , writeMan @@ -182,6 +183,7 @@ import Text.Pandoc.Writers.CommonMark import Text.Pandoc.Writers.ConTeXt import Text.Pandoc.Writers.Custom import Text.Pandoc.Writers.Docbook +import Text.Pandoc.Writers.JATS import Text.Pandoc.Writers.Docx import Text.Pandoc.Writers.DokuWiki import Text.Pandoc.Writers.EPUB @@ -287,6 +289,7 @@ writers = [ ,("docbook" , StringWriter writeDocbook5) ,("docbook4" , StringWriter writeDocbook4) ,("docbook5" , StringWriter writeDocbook5) + ,("jats" , StringWriter writeJATS) ,("opml" , StringWriter writeOPML) ,("opendocument" , StringWriter writeOpenDocument) ,("latex" , StringWriter writeLaTeX) diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs new file mode 100644 index 000000000..9aaba78e0 --- /dev/null +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -0,0 +1,429 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE PatternGuards #-} +{- +Copyright (C) 2006-2015 John MacFarlane + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} +{- | + Module : Text.Pandoc.Writers.JATS + Copyright : Copyright (C) 2017 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Conversion of 'Pandoc' documents to JATS XML. +Reference: +https://jats.nlm.nih.gov/publishing/tag-library/1.1d3/element/mml-math.html +-} +module Text.Pandoc.Writers.JATS ( writeJATS ) where +import Control.Monad.Reader +import Data.Char (toLower) +import Data.Generics (everywhere, mkT) +import Data.List (intercalate, isSuffixOf) +import Data.Maybe (fromMaybe) +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Class (PandocMonad, report) +import Text.Pandoc.Definition +import Text.Pandoc.Highlighting (languages, languagesByExtension) +import Text.Pandoc.Logging +import Text.Pandoc.Options +import Text.Pandoc.Pretty +import Text.Pandoc.Shared +import Text.Pandoc.Templates (renderTemplate') +import Text.Pandoc.Writers.Math +import Text.Pandoc.Writers.Shared +import Text.Pandoc.XML +import Text.Pandoc.MIME (getMimeType) +import Text.TeXMath +import qualified Text.XML.Light as Xml + +data JATSVersion = JATS1_1 + deriving (Eq, Show) + +type DB = ReaderT JATSVersion + +-- | Convert list of authors to a docbook section +authorToJATS :: PandocMonad m => WriterOptions -> [Inline] -> DB m B.Inlines +authorToJATS opts name' = do + name <- render Nothing <$> inlinesToJATS opts name' + let colwidth = if writerWrapText opts == WrapAuto + then Just $ writerColumns opts + else Nothing + return $ B.rawInline "docbook" $ render colwidth $ + if ',' `elem` name + then -- last name first + let (lastname, rest) = break (==',') name + firstname = triml rest in + inTagsSimple "firstname" (text $ escapeStringForXML firstname) <> + inTagsSimple "surname" (text $ escapeStringForXML lastname) + else -- last name last + let namewords = words name + lengthname = length namewords + (firstname, lastname) = case lengthname of + 0 -> ("","") + 1 -> ("", name) + n -> (intercalate " " (take (n-1) namewords), last namewords) + in inTagsSimple "firstname" (text $ escapeStringForXML firstname) $$ + inTagsSimple "surname" (text $ escapeStringForXML lastname) + +writeJATS :: PandocMonad m => WriterOptions -> Pandoc -> m String +writeJATS opts d = + runReaderT (docToJATS opts d) JATS1_1 + +-- | Convert Pandoc document to string in JATS format. +docToJATS :: PandocMonad m => WriterOptions -> Pandoc -> DB m String +docToJATS opts (Pandoc meta blocks) = do + let elements = hierarchicalize blocks + let colwidth = if writerWrapText opts == WrapAuto + then Just $ writerColumns opts + else Nothing + let render' = render colwidth + let opts' = if (maybe False (("/book>" `isSuffixOf`) . trimr) + (writerTemplate opts) && + TopLevelDefault == writerTopLevelDivision opts) + then opts{ writerTopLevelDivision = TopLevelChapter } + else opts + -- The numbering here follows LaTeX's internal numbering + let startLvl = case writerTopLevelDivision opts' of + TopLevelPart -> -1 + TopLevelChapter -> 0 + TopLevelSection -> 1 + TopLevelDefault -> 1 + auths' <- mapM (authorToJATS opts) $ docAuthors meta + let meta' = B.setMeta "author" auths' meta + metadata <- metaToJSON opts + (fmap (render colwidth . vcat) . + (mapM (elementToJATS opts' startLvl) . + hierarchicalize)) + (fmap (render colwidth) . inlinesToJATS opts') + meta' + main <- (render' . inTagsIndented "body" . vcat) <$> + (mapM (elementToJATS opts' startLvl) elements) + let context = defField "body" main + $ defField "mathml" (case writerHTMLMathMethod opts of + MathML -> True + _ -> False) + $ metadata + return $ case writerTemplate opts of + Nothing -> main + Just tpl -> renderTemplate' tpl context + +-- | Convert an Element to JATS. +elementToJATS :: PandocMonad m => WriterOptions -> Int -> Element -> DB m Doc +elementToJATS opts _ (Blk block) = blockToJATS opts block +elementToJATS opts lvl (Sec _ _num (id',_,kvs) title elements) = do + let idAttr = [("id", writerIdentifierPrefix opts ++ id') | not (null id')] + let otherAttrs = ["sec-type", "specific-use"] + let attribs = idAttr ++ [(k,v) | (k,v) <- kvs, k `elem` otherAttrs] + contents <- mapM (elementToJATS opts (lvl + 1)) elements + title' <- inlinesToJATS opts title + return $ inTags True "sec" attribs $ + inTagsSimple "title" title' $$ vcat contents + +-- | Convert a list of Pandoc blocks to JATS. +blocksToJATS :: PandocMonad m => WriterOptions -> [Block] -> DB m Doc +blocksToJATS opts = fmap vcat . mapM (blockToJATS opts) + +-- | Auxiliary function to convert Plain block to Para. +plainToPara :: Block -> Block +plainToPara (Plain x) = Para x +plainToPara x = x + +-- | Convert a list of pairs of terms and definitions into a list of +-- JATS varlistentrys. +deflistItemsToJATS :: PandocMonad m + => WriterOptions -> [([Inline],[[Block]])] -> DB m Doc +deflistItemsToJATS opts items = + vcat <$> mapM (\(term, defs) -> deflistItemToJATS opts term defs) items + +-- | Convert a term and a list of blocks into a JATS varlistentry. +deflistItemToJATS :: PandocMonad m + => WriterOptions -> [Inline] -> [[Block]] -> DB m Doc +deflistItemToJATS opts term defs = do + term' <- inlinesToJATS opts term + def' <- blocksToJATS opts $ concatMap (map plainToPara) defs + return $ inTagsIndented "def-item" $ + inTagsIndented "term" term' $$ + inTagsIndented "def" def' + +-- | Convert a list of lists of blocks to a list of JATS list items. +listItemsToJATS :: PandocMonad m + => WriterOptions -> (Maybe [String]) -> [[Block]] -> DB m Doc +listItemsToJATS opts markers items = + case markers of + Nothing -> vcat <$> mapM (listItemToJATS opts Nothing) items + Just ms -> vcat <$> zipWithM (listItemToJATS opts) (map Just ms) items + +-- | Convert a list of blocks into a JATS list item. +listItemToJATS :: PandocMonad m + => WriterOptions -> (Maybe String) -> [Block] -> DB m Doc +listItemToJATS opts mbmarker item = do + contents <- blocksToJATS opts item + return $ inTagsIndented "list-item" $ + maybe empty (\lbl -> inTagsIndented "label" (text lbl)) mbmarker + $$ contents + +-- | Convert a Pandoc block element to JATS. +blockToJATS :: PandocMonad m => WriterOptions -> Block -> DB m Doc +blockToJATS _ Null = return empty +-- Add ids to paragraphs in divs with ids - this is needed for +-- pandoc-citeproc to get link anchors in bibliographies: +blockToJATS opts (Div (ident,_,_) [Para lst]) = + let attribs = [("id", ident) | not (null ident)] in + inTags True "p" attribs <$> inlinesToJATS opts lst +blockToJATS opts (Div (ident,_,kvs) bs) = do + contents <- blocksToJATS opts bs + let attr = [("id", ident) | not (null ident)] ++ + [("xml:lang",l) | ("lang",l) <- kvs] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["specific-use", + "content-type", "orientation", "position"]] + return $ inTags True "boxed-text" attr contents +blockToJATS _ (Header _ _ _) = + return empty -- should not occur after hierarchicalize +-- No Plain, everything needs to be in a block-level tag +blockToJATS opts (Plain lst) = blockToJATS opts (Para lst) +-- title beginning with fig: indicates that the image is a figure +blockToJATS opts (Para [Image (ident,_,kvs) txt + (src,'f':'i':'g':':':tit)]) = do + alt <- inlinesToJATS opts txt + let capt = if null txt + then empty + else inTagsSimple "caption" alt + let attr = [("id", ident) | not (null ident)] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["fig-type", "orientation", + "position", "specific-use"]] + let mbMT = getMimeType src + let maintype = fromMaybe "image" $ + lookup "mimetype" kvs `mplus` + (takeWhile (/='/') <$> mbMT) + let subtype = fromMaybe "" $ + lookup "mime-subtype" kvs `mplus` + ((drop 1 . dropWhile (/='/')) <$> mbMT) + let graphicattr = [("mimetype",maintype), + ("mime-subtype",drop 1 subtype), + ("xlink:href",src), -- do we need to URL escape this? + ("xlink:title",tit)] + return $ inTags True "fig" attr $ + capt $$ selfClosingTag "graphic" graphicattr +blockToJATS opts (Para lst) = + inTagsIndented "p" <$> inlinesToJATS opts lst +blockToJATS opts (LineBlock lns) = + blockToJATS opts $ linesToPara lns +blockToJATS opts (BlockQuote blocks) = + inTagsIndented "disp-quote" <$> blocksToJATS opts blocks +blockToJATS _ (CodeBlock (ident,classes,kvs) str) = return $ + inTags False tag attr (flush (text (escapeStringForXML str))) + where attr = [("id",ident) | not (null ident)] ++ + [("language",lang) | not (null lang)] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["code-type", + "code-version", "executable", + "language-version", "orientation", + "platforms", "position", "specific-use"]] + tag = if null lang then "preformat" else "code" + lang = case langs of + (l:_) -> escapeStringForXML l + [] -> "" + isLang l = map toLower l `elem` map (map toLower) languages + langsFrom s = if isLang s + then [s] + else languagesByExtension . map toLower $ s + langs = concatMap langsFrom classes +blockToJATS _ (BulletList []) = return empty +blockToJATS opts (BulletList lst) = do + inTags True "list" [("list-type", "bullet")] <$> + listItemsToJATS opts Nothing lst +blockToJATS _ (OrderedList _ []) = return empty +blockToJATS opts (OrderedList (start, numstyle, delimstyle) items) = do + let listType = case numstyle of + DefaultStyle -> "order" + Decimal -> "order" + Example -> "order" + UpperAlpha -> "alpha-upper" + LowerAlpha -> "alpha-lower" + UpperRoman -> "roman-upper" + LowerRoman -> "roman-lower" + let simpleList = start == 1 && (delimstyle == DefaultDelim || + delimstyle == Period) + let markers = if simpleList + then Nothing + else Just $ + orderedListMarkers (start, numstyle, delimstyle) + inTags True "list" [("list-type", listType)] <$> + listItemsToJATS opts markers items +blockToJATS opts (DefinitionList lst) = do + inTags True "def-list" [] <$> deflistItemsToJATS opts lst +blockToJATS _ b@(RawBlock f str) + | f == "jats" = return $ text str -- raw XML block + | otherwise = do + report $ BlockNotRendered b + return empty +blockToJATS _ HorizontalRule = return empty -- not semantic +blockToJATS opts (Table [] aligns widths headers rows) = do + let percent w = show (truncate (100*w) :: Integer) ++ "*" + let coltags = vcat $ zipWith (\w al -> selfClosingTag "col" + ([("width", percent w) | w > 0] ++ + [("align", alignmentToString al)])) widths aligns + thead <- if all null headers + then return empty + else inTagsIndented "thead" <$> tableRowToJATS opts True headers + tbody <- (inTagsIndented "tbody" . vcat) <$> + mapM (tableRowToJATS opts False) rows + return $ inTags True "table" [] $ coltags $$ thead $$ tbody +blockToJATS opts (Table caption aligns widths headers rows) = do + captionDoc <- inTagsIndented "caption" <$> blockToJATS opts (Para caption) + tbl <- blockToJATS opts (Table [] aligns widths headers rows) + return $ inTags True "table-wrap" [] $ captionDoc $$ tbl + +alignmentToString :: Alignment -> [Char] +alignmentToString alignment = case alignment of + AlignLeft -> "left" + AlignRight -> "right" + AlignCenter -> "center" + AlignDefault -> "left" + +tableRowToJATS :: PandocMonad m + => WriterOptions + -> Bool + -> [[Block]] + -> DB m Doc +tableRowToJATS opts isHeader cols = + (inTagsIndented "tr" . vcat) <$> mapM (tableItemToJATS opts isHeader) cols + +tableItemToJATS :: PandocMonad m + => WriterOptions + -> Bool + -> [Block] + -> DB m Doc +tableItemToJATS opts isHeader item = + (inTags True (if isHeader then "th" else "td") [] . vcat) <$> + mapM (blockToJATS opts) item + +-- | Convert a list of inline elements to JATS. +inlinesToJATS :: PandocMonad m => WriterOptions -> [Inline] -> DB m Doc +inlinesToJATS opts lst = hcat <$> mapM (inlineToJATS opts) lst + +-- | Convert an inline element to JATS. +inlineToJATS :: PandocMonad m => WriterOptions -> Inline -> DB m Doc +inlineToJATS _ (Str str) = return $ text $ escapeStringForXML str +inlineToJATS opts (Emph lst) = + inTagsSimple "italic" <$> inlinesToJATS opts lst +inlineToJATS opts (Strong lst) = + inTags False "bold" [("role", "strong")] <$> inlinesToJATS opts lst +inlineToJATS opts (Strikeout lst) = + inTagsSimple "strike" <$> inlinesToJATS opts lst +inlineToJATS opts (Superscript lst) = + inTagsSimple "sup" <$> inlinesToJATS opts lst +inlineToJATS opts (Subscript lst) = + inTagsSimple "sub" <$> inlinesToJATS opts lst +inlineToJATS opts (SmallCaps lst) = + inTags False "sc" [("role", "smallcaps")] <$> + inlinesToJATS opts lst +inlineToJATS opts (Quoted SingleQuote lst) = do + contents <- inlinesToJATS opts lst + return $ char '‘' <> contents <> char '’' +inlineToJATS opts (Quoted DoubleQuote lst) = do + contents <- inlinesToJATS opts lst + return $ char '“' <> contents <> char '”' +inlineToJATS _ (Code _ str) = + return $ inTagsSimple "monospace" $ text (escapeStringForXML str) +inlineToJATS _ il@(RawInline f x) + | f == "jats" = return $ text x + | otherwise = do + report $ InlineNotRendered il + return empty +inlineToJATS _ LineBreak = return $ selfClosingTag "break" [] +inlineToJATS _ Space = return space +inlineToJATS opts SoftBreak + | writerWrapText opts == WrapPreserve = return cr + | otherwise = return space +inlineToJATS opts (Note contents) = + -- TODO technically only

tags are allowed inside + inTagsIndented "fn" <$> blocksToJATS opts contents +inlineToJATS opts (Cite _ lst) = + -- TODO revisit this after examining the jats.csl pipeline + inlinesToJATS opts lst +inlineToJATS opts (Span ("",_,[]) ils) = inlinesToJATS opts ils +inlineToJATS opts (Span (ident,_,kvs) ils) = do + contents <- inlinesToJATS opts ils + let attr = [("id",ident) | not (null ident)] ++ + [("xml:lang",l) | ("lang",l) <- kvs] ++ + [(k,v) | (k,v) <- kvs + , k `elem` ["content-type", "rationale", + "rid", "specific-use"]] + return $ selfClosingTag "milestone-start" attr <> contents <> + selfClosingTag "milestone-end" [] +inlineToJATS _ (Math t str) = do + let addPref (Xml.Attr q v) + | Xml.qName q == "xmlns" = Xml.Attr q{ Xml.qName = "xmlns:mml" } v + | otherwise = Xml.Attr q v + let fixNS' e = e{ Xml.elName = + (Xml.elName e){ Xml.qPrefix = Just "mml" } } + let fixNS = everywhere (mkT fixNS') . + (\e -> e{ Xml.elAttribs = map addPref (Xml.elAttribs e) }) + let conf = Xml.useShortEmptyTags (const False) Xml.defaultConfigPP + res <- convertMath writeMathML t str + let tagtype = case t of + DisplayMath -> "disp-formula" + InlineMath -> "inline-formula" + return $ inTagsSimple tagtype $ + case res of + Right r -> text $ Xml.ppcElement conf + $ fixNS r + Left _ -> inTagsSimple "tex-math" + $ text " + text str <> + text "]]>" +inlineToJATS _ (Link _attr [Str t] ('m':'a':'i':'l':'t':'o':':':email, _)) + | escapeURI t == email = + return $ inTagsSimple "email" $ text (escapeStringForXML email) +inlineToJATS opts (Link (ident,_,kvs) txt ('#':src, _)) = do + let attr = [("id", ident) | not (null ident)] ++ + [("alt", stringify txt), + ("rid", src)] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["ref-type", "specific-use"]] + contents <- inlinesToJATS opts txt + return $ inTags False "xref" attr contents +inlineToJATS opts (Link (ident,_,kvs) txt (src, tit)) = do + let attr = [("id", ident) | not (null ident)] ++ + [("ext-link-type", "uri"), + ("xlink:href", src)] ++ + [("xlink:title", tit) | not (null tit)] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["assigning-authority", + "specific-use", "xlink:actuate", + "xlink:role", "xlink:show", + "xlink:type"]] + contents <- inlinesToJATS opts txt + return $ inTags False "ext-link" attr contents +inlineToJATS _ (Image (ident,_,kvs) _ (src, tit)) = do + let mbMT = getMimeType src + let maintype = fromMaybe "image" $ + lookup "mimetype" kvs `mplus` + (takeWhile (/='/') <$> mbMT) + let subtype = fromMaybe "" $ + lookup "mime-subtype" kvs `mplus` + ((drop 1 . dropWhile (/='/')) <$> mbMT) + let attr = [("id", ident) | not (null ident)] ++ + [("mimetype", maintype), + ("mime-subtype", subtype), + ("xlink:href", src)] ++ + [("xlink:title", tit) | not (null tit)] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["baseline-shift", + "content-type", "specific-use", "xlink:actuate", + "xlink:href", "xlink:role", "xlink:show", + "xlink:type"]] + return $ selfClosingTag "inline-graphic" attr diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs index 3c473792f..fceb776f7 100644 --- a/test/Tests/Old.hs +++ b/test/Tests/Old.hs @@ -77,6 +77,9 @@ tests = [ testGroup "markdown" , testGroup "docbook5" [ testGroup "writer" $ writerTests "docbook5" ] + , testGroup "jats" + [ testGroup "writer" $ writerTests "jats" + ] , testGroup "native" [ testGroup "writer" $ writerTests "native" , test "reader" ["-r", "native", "-w", "native", "-s"] diff --git a/test/tables.jats b/test/tables.jats new file mode 100644 index 000000000..11f543f17 --- /dev/null +++ b/test/tables.jats @@ -0,0 +1,616 @@ + +

+ Simple table with caption: +

+ + +

+ Demonstration of simple table syntax. +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Right +

+
+

+ Left +

+
+

+ Center +

+
+

+ Default +

+
+

+ 12 +

+
+

+ 12 +

+
+

+ 12 +

+
+

+ 12 +

+
+

+ 123 +

+
+

+ 123 +

+
+

+ 123 +

+
+

+ 123 +

+
+

+ 1 +

+
+

+ 1 +

+
+

+ 1 +

+
+

+ 1 +

+
+
+

+ Simple table without caption: +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Right +

+
+

+ Left +

+
+

+ Center +

+
+

+ Default +

+
+

+ 12 +

+
+

+ 12 +

+
+

+ 12 +

+
+

+ 12 +

+
+

+ 123 +

+
+

+ 123 +

+
+

+ 123 +

+
+

+ 123 +

+
+

+ 1 +

+
+

+ 1 +

+
+

+ 1 +

+
+

+ 1 +

+
+

+ Simple table indented two spaces: +

+ + +

+ Demonstration of simple table syntax. +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Right +

+
+

+ Left +

+
+

+ Center +

+
+

+ Default +

+
+

+ 12 +

+
+

+ 12 +

+
+

+ 12 +

+
+

+ 12 +

+
+

+ 123 +

+
+

+ 123 +

+
+

+ 123 +

+
+

+ 123 +

+
+

+ 1 +

+
+

+ 1 +

+
+

+ 1 +

+
+

+ 1 +

+
+
+

+ Multiline table with caption: +

+ + +

+ Here’s the caption. It may span multiple lines. +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Centered Header +

+
+

+ Left Aligned +

+
+

+ Right Aligned +

+
+

+ Default aligned +

+
+

+ First +

+
+

+ row +

+
+

+ 12.0 +

+
+

+ Example of a row that spans multiple lines. +

+
+

+ Second +

+
+

+ row +

+
+

+ 5.0 +

+
+

+ Here’s another one. Note the blank line between rows. +

+
+
+

+ Multiline table without caption: +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ Centered Header +

+
+

+ Left Aligned +

+
+

+ Right Aligned +

+
+

+ Default aligned +

+
+

+ First +

+
+

+ row +

+
+

+ 12.0 +

+
+

+ Example of a row that spans multiple lines. +

+
+

+ Second +

+
+

+ row +

+
+

+ 5.0 +

+
+

+ Here’s another one. Note the blank line between rows. +

+
+

+ Table without column headers: +

+ + + + + + + + + + + + + + + + + + + + + + + + + +
+

+ 12 +

+
+

+ 12 +

+
+

+ 12 +

+
+

+ 12 +

+
+

+ 123 +

+
+

+ 123 +

+
+

+ 123 +

+
+

+ 123 +

+
+

+ 1 +

+
+

+ 1 +

+
+

+ 1 +

+
+

+ 1 +

+
+

+ Multiline table without column headers: +

+ + + + + + + + + + + + + + + + + + + +
+

+ First +

+
+

+ row +

+
+

+ 12.0 +

+
+

+ Example of a row that spans multiple lines. +

+
+

+ Second +

+
+

+ row +

+
+

+ 5.0 +

+
+

+ Here’s another one. Note the blank line between rows. +

+
+ diff --git a/test/writer.jats b/test/writer.jats new file mode 100644 index 000000000..1703de4aa --- /dev/null +++ b/test/writer.jats @@ -0,0 +1,1425 @@ + + +
+ + + + + + + + + + +Pandoc Test Suite + + +July 17, 2006 + + + + +

+ This is a set of tests for pandoc. Most of them are adapted from John + Gruber’s markdown test suite. +

+ + Headers + + Level 2 with an + <ext-link ext-link-type="uri" xlink:href="/url">embedded + link</ext-link> + + Level 3 with <italic>emphasis</italic> + + Level 4 + + Level 5 + + + + + + + Level 1 + + Level 2 with <italic>emphasis</italic> + + Level 3 +

+ with no blank line +

+
+
+ + Level 2 +

+ with no blank line +

+
+
+ + Paragraphs +

+ Here’s a regular paragraph. +

+

+ In Markdown 1.0.0 and earlier. Version 8. This line turns into a list + item. Because a hard-wrapped line in the middle of a paragraph looked + like a list item. +

+

+ Here’s one with a bullet. * criminey. +

+

+ There should be a hard line breakhere. +

+
+ + Block Quotes +

+ E-mail style: +

+ +

+ This is a block quote. It is pretty short. +

+
+ +

+ Code in a block quote: +

+ sub status { + print "working"; +} +

+ A list: +

+ + +

+ item one +

+
+ +

+ item two +

+
+
+

+ Nested block quotes: +

+ +

+ nested +

+
+ +

+ nested +

+
+
+

+ This should not be a block quote: 2 > 1. +

+

+ And a following paragraph. +

+
+ + Code Blocks +

+ Code: +

+ ---- (should be four hyphens) + +sub status { + print "working"; +} + +this code block is indented by one tab +

+ And: +

+ this code block is indented by two tabs + +These should not be escaped: \$ \\ \> \[ \{ +
+ + Lists + + Unordered +

+ Asterisks tight: +

+ + +

+ asterisk 1 +

+
+ +

+ asterisk 2 +

+
+ +

+ asterisk 3 +

+
+
+

+ Asterisks loose: +

+ + +

+ asterisk 1 +

+
+ +

+ asterisk 2 +

+
+ +

+ asterisk 3 +

+
+
+

+ Pluses tight: +

+ + +

+ Plus 1 +

+
+ +

+ Plus 2 +

+
+ +

+ Plus 3 +

+
+
+

+ Pluses loose: +

+ + +

+ Plus 1 +

+
+ +

+ Plus 2 +

+
+ +

+ Plus 3 +

+
+
+

+ Minuses tight: +

+ + +

+ Minus 1 +

+
+ +

+ Minus 2 +

+
+ +

+ Minus 3 +

+
+
+

+ Minuses loose: +

+ + +

+ Minus 1 +

+
+ +

+ Minus 2 +

+
+ +

+ Minus 3 +

+
+
+
+ + Ordered +

+ Tight: +

+ + +

+ First +

+
+ +

+ Second +

+
+ +

+ Third +

+
+
+

+ and: +

+ + +

+ One +

+
+ +

+ Two +

+
+ +

+ Three +

+
+
+

+ Loose using tabs: +

+ + +

+ First +

+
+ +

+ Second +

+
+ +

+ Third +

+
+
+

+ and using spaces: +

+ + +

+ One +

+
+ +

+ Two +

+
+ +

+ Three +

+
+
+

+ Multiple paragraphs: +

+ + +

+ Item 1, graf one. +

+

+ Item 1. graf two. The quick brown fox jumped over the lazy dog’s + back. +

+
+ +

+ Item 2. +

+
+ +

+ Item 3. +

+
+
+
+ + Nested + + +

+ Tab +

+ + +

+ Tab +

+ + +

+ Tab +

+
+
+
+
+
+
+

+ Here’s another: +

+ + +

+ First +

+
+ +

+ Second: +

+ + +

+ Fee +

+
+ +

+ Fie +

+
+ +

+ Foe +

+
+
+
+ +

+ Third +

+
+
+

+ Same thing but with paragraphs: +

+ + +

+ First +

+
+ +

+ Second: +

+ + +

+ Fee +

+
+ +

+ Fie +

+
+ +

+ Foe +

+
+
+
+ +

+ Third +

+
+
+
+ + Tabs and spaces + + +

+ this is a list item indented with tabs +

+
+ +

+ this is a list item indented with spaces +

+ + +

+ this is an example list item indented with tabs +

+
+ +

+ this is an example list item indented with spaces +

+
+
+
+
+
+ + Fancy list markers + + + +

+ begins with 2 +

+
+ + +

+ and now 3 +

+

+ with a continuation +

+ + + +

+ sublist with roman numerals, starting with 4 +

+
+ + +

+ more items +

+ + + +

+ a subsublist +

+
+ + +

+ a subsublist +

+
+
+
+
+
+
+

+ Nesting: +

+ + +

+ Upper Alpha +

+ + +

+ Upper Roman. +

+ + + +

+ Decimal start with 6 +

+ + + +

+ Lower alpha with paren +

+
+
+
+
+
+
+
+
+

+ Autonumbering: +

+ + +

+ Autonumber. +

+
+ +

+ More. +

+ + +

+ Nested. +

+
+
+
+
+

+ Should not be a list item: +

+

+ M.A. 2007 +

+

+ B. Williams +

+
+
+ + Definition Lists +

+ Tight using spaces: +

+ + + + apple + + +

+ red fruit +

+
+
+ + + orange + + +

+ orange fruit +

+
+
+ + + banana + + +

+ yellow fruit +

+
+
+
+

+ Tight using tabs: +

+ + + + apple + + +

+ red fruit +

+
+
+ + + orange + + +

+ orange fruit +

+
+
+ + + banana + + +

+ yellow fruit +

+
+
+
+

+ Loose: +

+ + + + apple + + +

+ red fruit +

+
+
+ + + orange + + +

+ orange fruit +

+
+
+ + + banana + + +

+ yellow fruit +

+
+
+
+

+ Multiple blocks with italics: +

+ + + + apple + + +

+ red fruit +

+

+ contains seeds, crisp, pleasant to taste +

+
+
+ + + orange + + +

+ orange fruit +

+ { orange code block } + +

+ orange block quote +

+
+
+
+
+

+ Multiple definitions, tight: +

+ + + + apple + + +

+ red fruit +

+

+ computer +

+
+
+ + + orange + + +

+ orange fruit +

+

+ bank +

+
+
+
+

+ Multiple definitions, loose: +

+ + + + apple + + +

+ red fruit +

+

+ computer +

+
+
+ + + orange + + +

+ orange fruit +

+

+ bank +

+
+
+
+

+ Blank line after term, indented marker, alternate markers: +

+ + + + apple + + +

+ red fruit +

+

+ computer +

+
+
+ + + orange + + +

+ orange fruit +

+ + +

+ sublist +

+
+ +

+ sublist +

+
+
+
+
+
+
+ + HTML Blocks +

+ Simple block on one line: +

+ +

+ foo +

+
+

+ And nested without indentation: +

+ + +

+ foo +

+
+ +

+ bar +

+
+
+

+ Interpreted markdown in a table: +

+

+ This is emphasized +

+

+ And this is strong +

+

+ Here’s a simple block: +

+

+ foo +

+

+ This should be a code block, though: +

+ <div> + foo +</div> +

+ As should this: +

+ <div>foo</div> +

+ Now, nested: +

+ + + +

+ foo +

+
+
+
+

+ This should just be an HTML comment: +

+

+ Multiline: +

+

+ Code block: +

+ <!-- Comment --> +

+ Just plain comment, with trailing spaces on the line: +

+

+ Code: +

+ <hr /> +

+ Hr’s: +

+
+ + Inline Markup +

+ This is emphasized, and so is this. +

+

+ This is strong, and so is + this. +

+

+ An emphasized + link. +

+

+ This is strong and em. +

+

+ So is this word. +

+

+ This is strong and em. +

+

+ So is this word. +

+

+ This is code: >, $, + \, \$, + <html>. +

+

+ This is strikeout. +

+

+ Superscripts: abcd ahello + ahello there. +

+

+ Subscripts: H2O, H23O, Hmany of themO. +

+

+ These should not be superscripts or subscripts, because of the unescaped + spaces: a^b c^d, a~b c~d. +

+
+ + Smart quotes, ellipses, dashes +

+ “Hello,” said the spider. “‘Shelob’ is my name.” +

+

+ ‘A’, ‘B’, and ‘C’ are letters. +

+

+ ‘Oak,’ ‘elm,’ and ‘beech’ are names of trees. So is ‘pine.’ +

+

+ ‘He said, “I want to go.”’ Were you alive in the 70’s? +

+

+ Here is some quoted ‘code’ and a + “quoted + link”. +

+

+ Some dashes: one—two — three—four — five. +

+

+ Dashes between numbers: 5–7, 255–66, 1987–1999. +

+

+ Ellipses…and…and…. +

+
+ + LaTeX + + +

+

+
+ +

+ 2+2=4 +

+
+ +

+ xy +

+
+ +

+ αω +

+
+ +

+ 223 +

+
+ +

+ p-Tree +

+
+ +

+ Here’s some display math: + ddxf(x)=limh0f(x+h)f(x)h +

+
+ +

+ Here’s one that has a line break in it: + α+ω×x2. +

+
+
+

+ These shouldn’t be math: +

+ + +

+ To get the famous equation, write $e = mc^2$. +

+
+ +

+ $22,000 is a lot of money. So is $34,000. (It + worked if “lot” is emphasized.) +

+
+ +

+ Shoes ($20) and socks ($5). +

+
+ +

+ Escaped $: $73 this should be + emphasized 23$. +

+
+
+

+ Here’s a LaTeX table: +

+
+ + Special Characters +

+ Here is some unicode: +

+ + +

+ I hat: Î +

+
+ +

+ o umlaut: ö +

+
+ +

+ section: § +

+
+ +

+ set membership: ∈ +

+
+ +

+ copyright: © +

+
+
+

+ AT&T has an ampersand in their name. +

+

+ AT&T is another way to write it. +

+

+ This & that. +

+

+ 4 < 5. +

+

+ 6 > 5. +

+

+ Backslash: \ +

+

+ Backtick: ` +

+

+ Asterisk: * +

+

+ Underscore: _ +

+

+ Left brace: { +

+

+ Right brace: } +

+

+ Left bracket: [ +

+

+ Right bracket: ] +

+

+ Left paren: ( +

+

+ Right paren: ) +

+

+ Greater-than: > +

+

+ Hash: # +

+

+ Period: . +

+

+ Bang: ! +

+

+ Plus: + +

+

+ Minus: - +

+
+ + Links + + Explicit +

+ Just a + URL. +

+

+ URL + and title. +

+

+ URL + and title. +

+

+ URL + and title. +

+

+ URL + and title +

+

+ URL + and title +

+

+ with_underscore +

+

+ Email + link +

+

+ Empty. +

+
+ + Reference +

+ Foo bar. +

+

+ Foo bar. +

+

+ Foo bar. +

+

+ With embedded + [brackets]. +

+

+ b by + itself should be a link. +

+

+ Indented + once. +

+

+ Indented + twice. +

+

+ Indented + thrice. +

+

+ This should [not][] be a link. +

+ [not]: /url +

+ Foo + bar. +

+

+ Foo + biz. +

+
+ + With ampersands +

+ Here’s a + link + with an ampersand in the URL. +

+

+ Here’s a link with an amersand in the link text: + AT&T. +

+

+ Here’s an + inline + link. +

+

+ Here’s an + inline + link in pointy braces. +

+
+ + Autolinks +

+ With an ampersand: + http://example.com/?foo=1&bar=2 +

+ + +

+ In a list? +

+
+ +

+ http://example.com/ +

+
+ +

+ It should. +

+
+
+

+ An e-mail address: nobody@nowhere.net +

+ +

+ Blockquoted: + http://example.com/ +

+
+

+ Auto-links should not occur here: + <http://example.com/> +

+ or here: <http://example.com/> +
+
+ + Images +

+ From “Voyage dans la Lune” by Georges Melies (1902): +

+ + lalune + + +

+ Here is a movie + + icon. +

+
+ + Footnotes +

+ Here is a footnote reference, +

+ Here is the footnote. It can go anywhere after the footnote + reference. It need not be placed at the end of the document. +

+ and another. +

+ Here’s the long note. This one contains multiple blocks. +

+

+ Subsequent blocks are indented to show that they belong to the + footnote (as with list items). +

+ { <code> } +

+ If you want, you can indent every line, but you can also be lazy and + just indent the first line of each block. +

+
This should not be a footnote reference, because + it contains a space.[^my note] Here is an inline note. +

+ This is easier to type. Inline notes may contain + links + and ] verbatim characters, as well as + [bracketed text]. +

+
+

+ +

+ Notes can go in quotes. +

+ In quote. +

+ +

+
+ + +

+ And in list items. +

+ In list. +

+ +

+
+
+

+ This paragraph should not be part of the note, as it is not indented. +

+
+ +
-- cgit v1.2.3