From 5d3c9e56460165be452b672f12fc476e7a5ed3a9 Mon Sep 17 00:00:00 2001 From: Hamish Mackenzie Date: Wed, 20 Dec 2017 13:54:02 +1300 Subject: Add Basic JATS reader based on DocBook reader --- test/Tests/Old.hs | 2 + test/Tests/Readers/JATS.hs | 111 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 test/Tests/Readers/JATS.hs (limited to 'test/Tests') diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs index 9c6b9f660..bbd51ee98 100644 --- a/test/Tests/Old.hs +++ b/test/Tests/Old.hs @@ -79,6 +79,8 @@ tests = [ testGroup "markdown" ] , testGroup "jats" [ testGroup "writer" $ writerTests "jats" + , test "reader" ["-r", "jats", "-w", "native", "-s"] + "jats-reader.xml" "jats-reader.native" ] , testGroup "native" [ testGroup "writer" $ writerTests "native" diff --git a/test/Tests/Readers/JATS.hs b/test/Tests/Readers/JATS.hs new file mode 100644 index 000000000..ed6317c71 --- /dev/null +++ b/test/Tests/Readers/JATS.hs @@ -0,0 +1,111 @@ +{-# LANGUAGE OverloadedStrings #-} +module Tests.Readers.JATS (tests) where + +import Data.Text (Text) +import Test.Tasty +import Tests.Helpers +import Text.Pandoc +import Text.Pandoc.Arbitrary () +import Text.Pandoc.Builder + +jats :: Text -> Pandoc +jats = purely $ readJATS def + +tests :: [TestTree] +tests = [ testGroup "inline code" + [ test jats "basic" $ "

\n @&\n

" =?> para (code "@&") + ] + , testGroup "images" + [ test jats "basic" $ "" + =?> para (image "/url" "title" mempty) + ] + , test jats "bullet list" $ + "\n\ + \ \n\ + \

\n\ + \ first\n\ + \

\n\ + \
\n\ + \ \n\ + \

\n\ + \ second\n\ + \

\n\ + \
\n\ + \ \n\ + \

\n\ + \ third\n\ + \

\n\ + \
\n\ + \
" + =?> bulletList [ para $ text "first" + , para $ text "second" + , para $ text "third" + ] + , testGroup "definition lists" + [ test jats "with internal link" $ + "\n\ + \ \n\ + \ \n\ + \ testing\n\ + \ \n\ + \ \n\ + \

\n\ + \ hi there\n\ + \

\n\ + \
\n\ + \
\n\ + \
" + =?> definitionList [(link "#go" "" (str "testing"), + [para (text "hi there")])] + ] + , testGroup "math" + [ test jats "escape |" $ + "

\n\ + \ \n\ + \ \n\ + \ σ|{x}\n\ + \

" + =?> para (math "\\sigma|_{\\{x\\}}") + , test jats "tex-math only" $ + "

\n\ + \ \n\ + \ \n\ + \

" + =?> para (math "\\sigma|_{\\{x\\}}") + , test jats "math ml only" $ + "

\n\ + \ \n\ + \ σ|{x}\n\ + \

" + =?> para (math "\\sigma|_{\\{ x\\}}") + ] + , testGroup "headers" +-- TODO fix footnotes in headers +-- [ test jats "unnumbered header" $ +-- "\n\ +-- \ Header 1<fn>\n\ +-- \ <p>\n\ +-- \ note\n\ +-- \ </p>\n\ +-- \ </fn>\n\ +-- \" +-- =?> header 1 +-- (text "Header 1" <> note (plain $ text "note")) + [ test jats "unnumbered sub header" $ + "\n\ + \ Header\n\ + \ \n\ + \ Sub-Header\n\ + \ \n\ + \" + =?> headerWith ("foo", [], []) 1 + (text "Header") + <> headerWith ("foo2", [], []) 2 + (text "Sub-Header") + , test jats "containing image" $ + "\n\ + \ <inline-graphic mimetype=\"image\" mime-subtype=\"jpeg\" xlink:href=\"imgs/foo.jpg\" />\n\ + \" + =?> header 1 (image "imgs/foo.jpg" "" mempty) + ] + ] -- cgit v1.2.3 From d85357139748ea657f030ab314c39e70f56764f4 Mon Sep 17 00:00:00 2001 From: Hamish Mackenzie Date: Wed, 20 Dec 2017 23:55:48 +1300 Subject: Improve support for code language in JATS --- src/Text/Pandoc/Readers/JATS.hs | 21 ++++++++++++++++-- src/Text/Pandoc/Writers/JATS.hs | 47 +++++++++++++++++++++++++---------------- test/Tests/Readers/JATS.hs | 5 +++++ test/Tests/Writers/JATS.hs | 7 +++++- 4 files changed, 59 insertions(+), 21 deletions(-) (limited to 'test/Tests') diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index fc71e9a51..851fbec35 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -15,6 +15,8 @@ import Text.Pandoc.Options import Text.Pandoc.Shared (underlineSpan, crFilter, safeRead) import Text.TeXMath (readMathML, writeTeX) import Text.XML.Light +import qualified Data.Set as S (fromList, member) +import Data.Set ((\\)) type JATS m = StateT JATSState m @@ -98,8 +100,8 @@ instance HasMeta JATSState where deleteMeta field s = s {jatsMeta = deleteMeta field (jatsMeta s)} isBlockElement :: Content -> Bool -isBlockElement (Elem e) = qName (elName e) `elem` blocktags - where blocktags = paragraphLevel ++ lists ++ mathML ++ other +isBlockElement (Elem e) = qName (elName e) `S.member` blocktags + where blocktags = S.fromList (paragraphLevel ++ lists ++ mathML ++ other) \\ S.fromList inlinetags paragraphLevel = ["address", "array", "boxed-text", "chem-struct-wrap", "code", "fig", "fig-group", "graphic", "media", "preformat", "supplementary-material", "table-wrap", "table-wrap-group", @@ -108,6 +110,21 @@ isBlockElement (Elem e) = qName (elName e) `elem` blocktags mathML = ["tex-math", "mml:math"] other = ["p", "related-article", "related-object", "ack", "disp-quote", "speech", "statement", "verse-group", "x"] + inlinetags = ["email", "ext-link", "uri", "inline-supplementary-material", + "related-article", "related-object", "hr", "bold", "fixed-case", + "italic", "monospace", "overline", "overline-start", "overline-end", + "roman", "sans-serif", "sc", "strike", "underline", "underline-start", + "underline-end", "ruby", "alternatives", "inline-graphic", "private-char", + "chem-struct", "inline-formula", "tex-math", "mml:math", "abbrev", + "milestone-end", "milestone-start", "named-content", "styled-content", + "fn", "target", "xref", "sub", "sup", "x", "address", "array", + "boxed-text", "chem-struct-wrap", "code", "fig", "fig-group", "graphic", + "media", "preformat", "supplementary-material", "table-wrap", + "table-wrap-group", "disp-formula", "disp-formula-group", + "citation-alternatives", "element-citation", "mixed-citation", + "nlm-citation", "award-id", "funding-source", "open-access", + "def-list", "list", "ack", "disp-quote", "speech", "statement", + "verse-group"] isBlockElement _ = False -- Trim leading and trailing newline characters diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index 8824eeb24..8dda969d9 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -170,6 +170,28 @@ imageMimeType src kvs = ((drop 1 . dropWhile (/='/')) <$> mbMT) in (maintype, subtype) +languageFor :: [String] -> String +languageFor classes = + case langs of + (l:_) -> escapeStringForXML l + [] -> "" + where isLang l = map toLower l `elem` map (map toLower) languages + langsFrom s = if isLang s + then [s] + else languagesByExtension . map toLower $ s + langs = concatMap langsFrom classes + +codeAttr :: Attr -> (String, [(String, String)]) +codeAttr (ident,classes,kvs) = (lang, attr) + where + attr = [("id",ident) | not (null ident)] ++ + [("language",lang) | not (null lang)] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["code-type", + "code-version", "executable", + "language-version", "orientation", + "platforms", "position", "specific-use"]] + lang = languageFor classes + -- | Convert a Pandoc block element to JATS. blockToJATS :: PandocMonad m => WriterOptions -> Block -> JATS m Doc blockToJATS _ Null = return empty @@ -233,23 +255,10 @@ blockToJATS opts (LineBlock lns) = blockToJATS opts $ linesToPara lns blockToJATS opts (BlockQuote blocks) = inTagsIndented "disp-quote" <$> blocksToJATS opts blocks -blockToJATS _ (CodeBlock (ident,classes,kvs) str) = return $ +blockToJATS _ (CodeBlock a str) = return $ inTags False tag attr (flush (text (escapeStringForXML str))) - where attr = [("id",ident) | not (null ident)] ++ - [("language",lang) | not (null lang)] ++ - [(k,v) | (k,v) <- kvs, k `elem` ["code-type", - "code-version", "executable", - "language-version", "orientation", - "platforms", "position", "specific-use"]] - tag = if null lang then "preformat" else "code" - lang = case langs of - (l:_) -> escapeStringForXML l - [] -> "" - isLang l = map toLower l `elem` map (map toLower) languages - langsFrom s = if isLang s - then [s] - else languagesByExtension . map toLower $ s - langs = concatMap langsFrom classes + where (lang, attr) = codeAttr a + tag = if null lang then "preformat" else "code" blockToJATS _ (BulletList []) = return empty blockToJATS opts (BulletList lst) = inTags True "list" [("list-type", "bullet")] <$> @@ -346,8 +355,10 @@ inlineToJATS opts (Quoted SingleQuote lst) = do inlineToJATS opts (Quoted DoubleQuote lst) = do contents <- inlinesToJATS opts lst return $ char '“' <> contents <> char '”' -inlineToJATS _ (Code _ str) = - return $ inTagsSimple "monospace" $ text (escapeStringForXML str) +inlineToJATS _ (Code a str) = + return $ inTags False tag attr $ text (escapeStringForXML str) + where (lang, attr) = codeAttr a + tag = if null lang then "monospace" else "code" inlineToJATS _ il@(RawInline f x) | f == "jats" = return $ text x | otherwise = do diff --git a/test/Tests/Readers/JATS.hs b/test/Tests/Readers/JATS.hs index ed6317c71..5c7dfa77c 100644 --- a/test/Tests/Readers/JATS.hs +++ b/test/Tests/Readers/JATS.hs @@ -14,6 +14,11 @@ jats = purely $ readJATS def tests :: [TestTree] tests = [ testGroup "inline code" [ test jats "basic" $ "

\n @&\n

" =?> para (code "@&") + , test jats "lang" $ "

\n @&\n

" =?> para (codeWith ("", ["c"], []) "@&") + ] + , testGroup "block code" + [ test jats "basic" $ "@&" =?> codeBlock "@&" + , test jats "lang" $ "@&" =?> codeBlockWith ("", ["c"], []) "@&" ] , testGroup "images" [ test jats "basic" $ "" diff --git a/test/Tests/Writers/JATS.hs b/test/Tests/Writers/JATS.hs index cd4609849..f14f1c229 100644 --- a/test/Tests/Writers/JATS.hs +++ b/test/Tests/Writers/JATS.hs @@ -31,6 +31,11 @@ infix 4 =: tests :: [TestTree] tests = [ testGroup "inline code" [ "basic" =: code "@&" =?> "

\n @&\n

" + , "lang" =: codeWith ("", ["c"], []) "@&" =?> "

\n @&\n

" + ] + , testGroup "block code" + [ "basic" =: codeBlock "@&" =?> "@&" + , "lang" =: codeBlockWith ("", ["c"], []) "@&" =?> "@&" ] , testGroup "images" [ "basic" =: @@ -38,7 +43,7 @@ tests = [ testGroup "inline code" =?> "" ] , testGroup "inlines" - [ "Emphasis" =: emph ("emphasized") + [ "Emphasis" =: emph "emphasized" =?> "

\n emphasized\n

" ] , "bullet list" =: bulletList [ plain $ text "first" -- cgit v1.2.3