From 5d3c9e56460165be452b672f12fc476e7a5ed3a9 Mon Sep 17 00:00:00 2001 From: Hamish Mackenzie Date: Wed, 20 Dec 2017 13:54:02 +1300 Subject: Add Basic JATS reader based on DocBook reader --- pandoc.cabal | 3 + src/Text/Pandoc/Readers.hs | 5 +- src/Text/Pandoc/Readers/JATS.hs | 387 +++++++++ test/Tests/Old.hs | 2 + test/Tests/Readers/JATS.hs | 111 +++ test/jats-reader.native | 422 ++++++++++ test/jats-reader.xml | 1773 +++++++++++++++++++++++++++++++++++++++ test/test-pandoc.hs | 2 + 8 files changed, 2704 insertions(+), 1 deletion(-) create mode 100644 src/Text/Pandoc/Readers/JATS.hs create mode 100644 test/Tests/Readers/JATS.hs create mode 100644 test/jats-reader.native create mode 100644 test/jats-reader.xml diff --git a/pandoc.cabal b/pandoc.cabal index 1bd24f4dd..4ad2005b8 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -214,6 +214,7 @@ extra-source-files: test/creole-reader.txt test/creole-reader.native test/rst-reader.rst + test/jats-reader.xml test/s5-basic.html test/s5-fancy.html test/s5-fragment.html @@ -436,6 +437,7 @@ library Text.Pandoc.Readers.RST, Text.Pandoc.Readers.Org, Text.Pandoc.Readers.DocBook, + Text.Pandoc.Readers.JATS, Text.Pandoc.Readers.OPML, Text.Pandoc.Readers.Textile, Text.Pandoc.Readers.Native, @@ -623,6 +625,7 @@ test-suite test-pandoc Tests.Shared Tests.Readers.LaTeX Tests.Readers.HTML + Tests.Readers.JATS Tests.Readers.Markdown Tests.Readers.Org Tests.Readers.RST diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs index d954256c8..a8448952e 100644 --- a/src/Text/Pandoc/Readers.hs +++ b/src/Text/Pandoc/Readers.hs @@ -52,6 +52,7 @@ module Text.Pandoc.Readers , readOrg , readLaTeX , readHtml + , readJATS , readTextile , readDocBook , readOPML @@ -84,7 +85,8 @@ import Text.Pandoc.Readers.DocBook import Text.Pandoc.Readers.Docx import Text.Pandoc.Readers.EPUB import Text.Pandoc.Readers.Haddock -import Text.Pandoc.Readers.HTML +import Text.Pandoc.Readers.HTML (readHtml) +import Text.Pandoc.Readers.JATS (readJATS) import Text.Pandoc.Readers.LaTeX import Text.Pandoc.Readers.Markdown import Text.Pandoc.Readers.MediaWiki @@ -129,6 +131,7 @@ readers = [ ("native" , TextReader readNative) ,("org" , TextReader readOrg) ,("textile" , TextReader readTextile) -- TODO : textile+lhs ,("html" , TextReader readHtml) + ,("jats" , TextReader readJATS) ,("latex" , TextReader readLaTeX) ,("haddock" , TextReader readHaddock) ,("twiki" , TextReader readTWiki) diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs new file mode 100644 index 000000000..fc71e9a51 --- /dev/null +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -0,0 +1,387 @@ +{-# LANGUAGE ExplicitForAll, TupleSections #-} +module Text.Pandoc.Readers.JATS ( readJATS ) where +import Control.Monad.State.Strict +import Data.Char (isDigit, isSpace, toUpper) +import Data.Default +import Data.Generics +import Data.List (intersperse) +import Data.Maybe (maybeToList, fromMaybe) +import Data.Text (Text) +import qualified Data.Text as T +import Text.HTML.TagSoup.Entity (lookupEntity) +import Text.Pandoc.Builder +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Options +import Text.Pandoc.Shared (underlineSpan, crFilter, safeRead) +import Text.TeXMath (readMathML, writeTeX) +import Text.XML.Light + +type JATS m = StateT JATSState m + +data JATSState = JATSState{ jatsSectionLevel :: Int + , jatsQuoteType :: QuoteType + , jatsMeta :: Meta + , jatsAcceptsMeta :: Bool + , jatsBook :: Bool + , jatsFigureTitle :: Inlines + , jatsContent :: [Content] + } deriving Show + +instance Default JATSState where + def = JATSState{ jatsSectionLevel = 0 + , jatsQuoteType = DoubleQuote + , jatsMeta = mempty + , jatsAcceptsMeta = False + , jatsBook = False + , jatsFigureTitle = mempty + , jatsContent = [] } + + +readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readJATS _ inp = do + let tree = normalizeTree . parseXML + $ T.unpack $ crFilter inp + (bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree + return $ Pandoc (jatsMeta st') (toList . mconcat $ bs) + +-- normalize input, consolidating adjacent Text and CRef elements +normalizeTree :: [Content] -> [Content] +normalizeTree = everywhere (mkT go) + where go :: [Content] -> [Content] + go (Text (CData CDataRaw _ _):xs) = xs + go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) = + Text (CData CDataText (s1 ++ s2) z):xs + go (Text (CData CDataText s1 z):CRef r:xs) = + Text (CData CDataText (s1 ++ convertEntity r) z):xs + go (CRef r:Text (CData CDataText s1 z):xs) = + Text (CData CDataText (convertEntity r ++ s1) z):xs + go (CRef r1:CRef r2:xs) = + Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs + go xs = xs + +convertEntity :: String -> String +convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) + +-- convenience function to get an attribute value, defaulting to "" +attrValue :: String -> Element -> String +attrValue attr = + fromMaybe "" . maybeAttrValue attr + +maybeAttrValue :: String -> Element -> Maybe String +maybeAttrValue attr elt = + lookupAttrBy (\x -> qName x == attr) (elAttribs elt) + +-- convenience function +named :: String -> Element -> Bool +named s e = qName (elName e) == s + +-- + +acceptingMetadata :: PandocMonad m => JATS m a -> JATS m a +acceptingMetadata p = do + modify (\s -> s { jatsAcceptsMeta = True } ) + res <- p + modify (\s -> s { jatsAcceptsMeta = False }) + return res + +checkInMeta :: (PandocMonad m, Monoid a) => JATS m () -> JATS m a +checkInMeta p = do + accepts <- jatsAcceptsMeta <$> get + when accepts p + return mempty + +addMeta :: PandocMonad m => ToMetaValue a => String -> a -> JATS m () +addMeta field val = modify (setMeta field val) + +instance HasMeta JATSState where + setMeta field v s = s {jatsMeta = setMeta field v (jatsMeta s)} + deleteMeta field s = s {jatsMeta = deleteMeta field (jatsMeta s)} + +isBlockElement :: Content -> Bool +isBlockElement (Elem e) = qName (elName e) `elem` blocktags + where blocktags = paragraphLevel ++ lists ++ mathML ++ other + paragraphLevel = ["address", "array", "boxed-text", "chem-struct-wrap", + "code", "fig", "fig-group", "graphic", "media", "preformat", + "supplementary-material", "table-wrap", "table-wrap-group", + "alternatives", "disp-formula", "disp-formula-group"] + lists = ["def-list", "list"] + mathML = ["tex-math", "mml:math"] + other = ["p", "related-article", "related-object", "ack", "disp-quote", + "speech", "statement", "verse-group", "x"] +isBlockElement _ = False + +-- Trim leading and trailing newline characters +trimNl :: String -> String +trimNl = reverse . go . reverse . go + where go ('\n':xs) = xs + go xs = xs + +-- function that is used by both graphic (in parseBlock) +-- and inline-graphic (in parseInline) +getGraphic :: PandocMonad m => Element -> JATS m Inlines +getGraphic e = do + let atVal a = attrValue a e + attr = (atVal "id", words $ atVal "role", []) + imageUrl = atVal "href" + captionOrLabel = case filterChild (\x -> named "caption" x + || named "label" x) e of + Nothing -> return mempty + Just z -> mconcat <$> + mapM parseInline (elContent z) + figTitle <- gets jatsFigureTitle + let (caption, title) = if isNull figTitle + then (captionOrLabel, atVal "title") + else (return figTitle, "fig:") + fmap (imageWith attr imageUrl title) caption + +getBlocks :: PandocMonad m => Element -> JATS m Blocks +getBlocks e = mconcat <$> + mapM parseBlock (elContent e) + + +parseBlock :: PandocMonad m => Content -> JATS m Blocks +parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE +parseBlock (Text (CData _ s _)) = if all isSpace s + then return mempty + else return $ plain $ trimInlines $ text s +parseBlock (CRef x) = return $ plain $ str $ map toUpper x +parseBlock (Elem e) = + case qName (elName e) of + "p" -> parseMixed para (elContent e) + "code" -> codeBlockWithLang + "preformat" -> codeBlockWithLang + "disp-quote" -> parseBlockquote + "list" -> case attrValue "list-type" e of + "bullet" -> bulletList <$> listitems + listType -> do + let start = fromMaybe 1 $ + (strContent <$> (filterElement (named "list-item") e + >>= filterElement (named "lable"))) + >>= safeRead + orderedListWith (start, parseListStyleType listType, DefaultDelim) + <$> listitems + "def-list" -> definitionList <$> deflistitems + "sec" -> gets jatsSectionLevel >>= sect . (+1) + "title" -> return mempty + "title-group" -> checkInMeta getTitle + "graphic" -> para <$> getGraphic e + "journal-meta" -> metaBlock + "article-meta" -> metaBlock + "custom-meta" -> metaBlock + "table" -> parseTable + "fig" -> divWith (attrValue "id" e, ["fig"], []) <$> getBlocks e + "table-wrap" -> divWith (attrValue "id" e, ["table-wrap"], []) <$> getBlocks e + "caption" -> divWith (attrValue "id" e, ["caption"], []) <$> sect 6 + "ref-list" -> divWith ("refs", [], []) <$> getBlocks e + "ref" -> divWith ("ref-" <> attrValue "id" e, [], []) <$> getBlocks e + "?xml" -> return mempty + _ -> getBlocks e + where parseMixed container conts = do + let (ils,rest) = break isBlockElement conts + ils' <- (trimInlines . mconcat) <$> mapM parseInline ils + let p = if ils' == mempty then mempty else container ils' + case rest of + [] -> return p + (r:rs) -> do + b <- parseBlock r + x <- parseMixed container rs + return $ p <> b <> x + codeBlockWithLang = do + let classes' = case attrValue "language" e of + "" -> [] + x -> [x] + return $ codeBlockWith (attrValue "id" e, classes', []) + $ trimNl $ strContentRecursive e + parseBlockquote = do + attrib <- case filterChild (named "attribution") e of + Nothing -> return mempty + Just z -> (para . (str "— " <>) . mconcat) + <$> + mapM parseInline (elContent z) + contents <- getBlocks e + return $ blockQuote (contents <> attrib) + parseListStyleType "roman-lower" = LowerRoman + parseListStyleType "roman-upper" = UpperRoman + parseListStyleType "alpha-lower" = LowerAlpha + parseListStyleType "alpha-upper" = UpperAlpha + parseListStyleType _ = DefaultStyle + listitems = mapM getBlocks $ filterChildren (named "list-item") e + deflistitems = mapM parseVarListEntry $ filterChildren + (named "def-item") e + parseVarListEntry e' = do + let terms = filterChildren (named "term") e' + let items = filterChildren (named "def") e' + terms' <- mapM getInlines terms + items' <- mapM getBlocks items + return (mconcat $ intersperse (str "; ") terms', items') + getTitle = do + tit <- case filterChild (named "article-title") e of + Just s -> getInlines s + Nothing -> return mempty + subtit <- case filterChild (named "subtitle") e of + Just s -> (text ": " <>) <$> + getInlines s + Nothing -> return mempty + addMeta "title" (tit <> subtit) + + parseTable = do + let isCaption x = named "title" x || named "caption" x + caption <- case filterChild isCaption e of + Just t -> getInlines t + Nothing -> return mempty + let e' = fromMaybe e $ filterChild (named "tgroup") e + let isColspec x = named "colspec" x || named "col" x + let colspecs = case filterChild (named "colgroup") e' of + Just c -> filterChildren isColspec c + _ -> filterChildren isColspec e' + let isRow x = named "row" x || named "tr" x + headrows <- case filterChild (named "thead") e' of + Just h -> case filterChild isRow h of + Just x -> parseRow x + Nothing -> return [] + Nothing -> return [] + bodyrows <- case filterChild (named "tbody") e' of + Just b -> mapM parseRow + $ filterChildren isRow b + Nothing -> mapM parseRow + $ filterChildren isRow e' + let toAlignment c = case findAttr (unqual "align") c of + Just "left" -> AlignLeft + Just "right" -> AlignRight + Just "center" -> AlignCenter + _ -> AlignDefault + let toWidth c = case findAttr (unqual "colwidth") c of + Just w -> fromMaybe 0 + $ safeRead $ '0': filter (\x -> + isDigit x || x == '.') w + Nothing -> 0 :: Double + let numrows = case bodyrows of + [] -> 0 + xs -> maximum $ map length xs + let aligns = case colspecs of + [] -> replicate numrows AlignDefault + cs -> map toAlignment cs + let widths = case colspecs of + [] -> replicate numrows 0 + cs -> let ws = map toWidth cs + tot = sum ws + in if all (> 0) ws + then map (/ tot) ws + else replicate numrows 0 + let headrows' = if null headrows + then replicate numrows mempty + else headrows + return $ table caption (zip aligns widths) + headrows' bodyrows + isEntry x = named "entry" x || named "td" x || named "th" x + parseRow = mapM (parseMixed plain . elContent) . filterChildren isEntry + sect n = do isbook <- gets jatsBook + let n' = if isbook || n == 0 then n + 1 else n + headerText <- case filterChild (named "title") e `mplus` + (filterChild (named "info") e >>= + filterChild (named "title")) of + Just t -> getInlines t + Nothing -> return mempty + oldN <- gets jatsSectionLevel + modify $ \st -> st{ jatsSectionLevel = n } + b <- getBlocks e + let ident = attrValue "id" e + modify $ \st -> st{ jatsSectionLevel = oldN } + return $ headerWith (ident,[],[]) n' headerText <> b +-- lineItems = mapM getInlines $ filterChildren (named "line") e + metaBlock = acceptingMetadata (getBlocks e) >> return mempty + +getInlines :: PandocMonad m => Element -> JATS m Inlines +getInlines e' = (trimInlines . mconcat) <$> + mapM parseInline (elContent e') + +strContentRecursive :: Element -> String +strContentRecursive = strContent . + (\e' -> e'{ elContent = map elementToStr $ elContent e' }) + +elementToStr :: Content -> Content +elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing +elementToStr x = x + +parseInline :: PandocMonad m => Content -> JATS m Inlines +parseInline (Text (CData _ s _)) = return $ text s +parseInline (CRef ref) = + return $ maybe (text $ map toUpper ref) text $ lookupEntity ref +parseInline (Elem e) = + case qName (elName e) of + "italic" -> emph <$> innerInlines + "bold" -> strong <$> innerInlines + "strike" -> strikeout <$> innerInlines + "sub" -> subscript <$> innerInlines + "sup" -> superscript <$> innerInlines + "underline" -> underlineSpan <$> innerInlines + "break" -> return linebreak + "sc" -> smallcaps <$> innerInlines + + "code" -> codeWithLang + "monospace" -> codeWithLang + + "inline-graphic" -> getGraphic e + "disp-quote" -> do + qt <- gets jatsQuoteType + let qt' = if qt == SingleQuote then DoubleQuote else SingleQuote + modify $ \st -> st{ jatsQuoteType = qt' } + contents <- innerInlines + modify $ \st -> st{ jatsQuoteType = qt } + return $ if qt == SingleQuote + then singleQuoted contents + else doubleQuoted contents + + "xref" -> do + ils <- innerInlines + let rid = attrValue "rid" e + let refType = ("ref-type",) <$> maybeAttrValue "ref-type" e + let attr = (attrValue "id" e, [], maybeToList refType) + return $ linkWith attr ('#' : rid) "" ils + "ext-link" -> do + ils <- innerInlines + let title = fromMaybe "" $ findAttr (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e + let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of + Just h -> h + _ -> '#' : attrValue "rid" e + let ils' = if ils == mempty then str href else ils + let attr = (attrValue "id" e, [], []) + return $ linkWith attr href title ils' + + "disp-formula" -> formula displayMath + "inline-formula" -> formula math + "math" | qPrefix (elName e) == Just "mml" -> return . math $ mathML e + "tex-math" -> return . math $ strContent e + + "email" -> return $ link ("mailto:" ++ strContent e) "" + $ str $ strContent e + "uri" -> return $ link (strContent e) "" $ str $ strContent e + "fn" -> (note . mconcat) <$> + mapM parseBlock (elContent e) + -- Note: this isn't a real docbook tag; it's what we convert + -- to in handleInstructions, above. A kludge to + -- work around xml-light's inability to parse an instruction. + _ -> innerInlines + where innerInlines = (trimInlines . mconcat) <$> + mapM parseInline (elContent e) + mathML x = + case readMathML . showElement $ everywhere (mkT removePrefix) x of + Left _ -> mempty + Right m -> writeTeX m + formula constructor = do + let whereToLook = fromMaybe e $ filterElement (named "alternatives") e + texMaths = map strContent $ + filterChildren (named "tex-math") whereToLook + mathMLs = map mathML $ + filterChildren isMathML whereToLook + return . mconcat . take 1 . map constructor $ texMaths ++ mathMLs + + isMathML x = qName (elName x) == "math" && + qPrefix (elName x) == Just "mml" + removePrefix elname = elname { qPrefix = Nothing } + codeWithLang = do + let classes' = case attrValue "language" e of + "" -> [] + l -> [l] + return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e + diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs index 9c6b9f660..bbd51ee98 100644 --- a/test/Tests/Old.hs +++ b/test/Tests/Old.hs @@ -79,6 +79,8 @@ tests = [ testGroup "markdown" ] , testGroup "jats" [ testGroup "writer" $ writerTests "jats" + , test "reader" ["-r", "jats", "-w", "native", "-s"] + "jats-reader.xml" "jats-reader.native" ] , testGroup "native" [ testGroup "writer" $ writerTests "native" diff --git a/test/Tests/Readers/JATS.hs b/test/Tests/Readers/JATS.hs new file mode 100644 index 000000000..ed6317c71 --- /dev/null +++ b/test/Tests/Readers/JATS.hs @@ -0,0 +1,111 @@ +{-# LANGUAGE OverloadedStrings #-} +module Tests.Readers.JATS (tests) where + +import Data.Text (Text) +import Test.Tasty +import Tests.Helpers +import Text.Pandoc +import Text.Pandoc.Arbitrary () +import Text.Pandoc.Builder + +jats :: Text -> Pandoc +jats = purely $ readJATS def + +tests :: [TestTree] +tests = [ testGroup "inline code" + [ test jats "basic" $ "

\n @&\n

" =?> para (code "@&") + ] + , testGroup "images" + [ test jats "basic" $ "" + =?> para (image "/url" "title" mempty) + ] + , test jats "bullet list" $ + "\n\ + \ \n\ + \

\n\ + \ first\n\ + \

\n\ + \
\n\ + \ \n\ + \

\n\ + \ second\n\ + \

\n\ + \
\n\ + \ \n\ + \

\n\ + \ third\n\ + \

\n\ + \
\n\ + \
" + =?> bulletList [ para $ text "first" + , para $ text "second" + , para $ text "third" + ] + , testGroup "definition lists" + [ test jats "with internal link" $ + "\n\ + \ \n\ + \ \n\ + \ testing\n\ + \ \n\ + \ \n\ + \

\n\ + \ hi there\n\ + \

\n\ + \
\n\ + \
\n\ + \
" + =?> definitionList [(link "#go" "" (str "testing"), + [para (text "hi there")])] + ] + , testGroup "math" + [ test jats "escape |" $ + "

\n\ + \ \n\ + \ \n\ + \ σ|{x}\n\ + \

" + =?> para (math "\\sigma|_{\\{x\\}}") + , test jats "tex-math only" $ + "

\n\ + \ \n\ + \ \n\ + \

" + =?> para (math "\\sigma|_{\\{x\\}}") + , test jats "math ml only" $ + "

\n\ + \ \n\ + \ σ|{x}\n\ + \

" + =?> para (math "\\sigma|_{\\{ x\\}}") + ] + , testGroup "headers" +-- TODO fix footnotes in headers +-- [ test jats "unnumbered header" $ +-- "\n\ +-- \ Header 1<fn>\n\ +-- \ <p>\n\ +-- \ note\n\ +-- \ </p>\n\ +-- \ </fn>\n\ +-- \" +-- =?> header 1 +-- (text "Header 1" <> note (plain $ text "note")) + [ test jats "unnumbered sub header" $ + "\n\ + \ Header\n\ + \ \n\ + \ Sub-Header\n\ + \ \n\ + \" + =?> headerWith ("foo", [], []) 1 + (text "Header") + <> headerWith ("foo2", [], []) 2 + (text "Sub-Header") + , test jats "containing image" $ + "\n\ + \ <inline-graphic mimetype=\"image\" mime-subtype=\"jpeg\" xlink:href=\"imgs/foo.jpg\" />\n\ + \" + =?> header 1 (image "imgs/foo.jpg" "" mempty) + ] + ] diff --git a/test/jats-reader.native b/test/jats-reader.native new file mode 100644 index 000000000..2bc8b94ce --- /dev/null +++ b/test/jats-reader.native @@ -0,0 +1,422 @@ +Pandoc (Meta {unMeta = fromList [("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) +[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",SoftBreak,Str "Gruber's",Space,Str "markdown",Space,Str "test",Space,Str "suite."] +,Header 1 ("headers",[],[]) [Str "Headers"] +,Header 2 ("level-2-with-an-embedded-link",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",SoftBreak,Link ("",[],[]) [Str "embedded",SoftBreak,Str "link"] ("/url","")] +,Header 3 ("level-3-with-emphasis",[],[]) [Str "Level",Space,Str "3",Space,Str "with",Space,Emph [Str "emphasis"]] +,Header 4 ("level-4",[],[]) [Str "Level",Space,Str "4"] +,Header 5 ("level-5",[],[]) [Str "Level",Space,Str "5"] +,Header 1 ("level-1",[],[]) [Str "Level",Space,Str "1"] +,Header 2 ("level-2-with-emphasis",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Emph [Str "emphasis"]] +,Header 3 ("level-3",[],[]) [Str "Level",Space,Str "3"] +,Para [Str "with",Space,Str "no",Space,Str "blank",Space,Str "line"] +,Header 2 ("level-2",[],[]) [Str "Level",Space,Str "2"] +,Para [Str "with",Space,Str "no",Space,Str "blank",Space,Str "line"] +,Header 1 ("paragraphs",[],[]) [Str "Paragraphs"] +,Para [Str "Here's",Space,Str "a",Space,Str "regular",Space,Str "paragraph."] +,Para [Str "In",Space,Str "Markdown",Space,Str "1.0.0",Space,Str "and",Space,Str "earlier.",Space,Str "Version",Space,Str "8.",Space,Str "This",Space,Str "line",Space,Str "turns",Space,Str "into",Space,Str "a",SoftBreak,Str "list",Space,Str "item.",Space,Str "Because",Space,Str "a",Space,Str "hard-wrapped",Space,Str "line",Space,Str "in",Space,Str "the",Space,Str "middle",Space,Str "of",Space,Str "a",Space,Str "paragraph",SoftBreak,Str "looked",Space,Str "like",Space,Str "a",Space,Str "list",Space,Str "item."] +,Para [Str "Here's",Space,Str "one",Space,Str "with",Space,Str "a",Space,Str "bullet.",Space,Str "*",Space,Str "criminey."] +,Para [Str "There",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "hard",Space,Str "line",Space,Str "break",LineBreak,Str "here."] +,Header 1 ("block-quotes",[],[]) [Str "Block",Space,Str "Quotes"] +,Para [Str "E-mail",Space,Str "style:"] +,BlockQuote + [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "block",Space,Str "quote.",Space,Str "It",Space,Str "is",Space,Str "pretty",Space,Str "short."]] +,BlockQuote + [Para [Str "Code",Space,Str "in",Space,Str "a",Space,Str "block",Space,Str "quote:"] + ,CodeBlock ("",[],[]) "sub status {\n print \"working\";\n}" + ,Para [Str "A",Space,Str "list:"] + ,OrderedList (1,DefaultStyle,DefaultDelim) + [[Para [Str "item",Space,Str "one"]] + ,[Para [Str "item",Space,Str "two"]]] + ,Para [Str "Nested",Space,Str "block",Space,Str "quotes:"] + ,BlockQuote + [Para [Str "nested"]] + ,BlockQuote + [Para [Str "nested"]]] +,Para [Str "This",Space,Str "should",Space,Str "not",Space,Str "be",Space,Str "a",Space,Str "block",Space,Str "quote:",Space,Str "2",Space,Str ">",Space,Str "1."] +,Para [Str "Box-style:"] +,BlockQuote + [Para [Str "Example:"] + ,CodeBlock ("",[],[]) "sub status {\n print \"working\";\n}"] +,BlockQuote + [OrderedList (1,DefaultStyle,DefaultDelim) + [[Para [Str "do",Space,Str "laundry"]] + ,[Para [Str "take",Space,Str "out",Space,Str "the",Space,Str "trash"]]]] +,Para [Str "Here's",Space,Str "a",Space,Str "nested",Space,Str "one:"] +,BlockQuote + [Para [Str "Joe",Space,Str "said:"] + ,BlockQuote + [Para [Str "Don't",Space,Str "quote",Space,Str "me."]]] +,Para [Str "And",Space,Str "a",Space,Str "following",Space,Str "paragraph."] +,Header 1 ("code-blocks",[],[]) [Str "Code",Space,Str "Blocks"] +,Para [Str "Code:"] +,CodeBlock ("",[],[]) "---- (should be four hyphens)\n\nsub status {\n print \"working\";\n}\n\nthis code block is indented by one tab" +,Para [Str "And:"] +,CodeBlock ("",[],[]) " this code block is indented by two tabs\n\nThese should not be escaped: \\$ \\\\ \\> \\[ \\{" +,Header 1 ("lists",[],[]) [Str "Lists"] +,Header 2 ("unordered",[],[]) [Str "Unordered"] +,Para [Str "Asterisks",Space,Str "tight:"] +,BulletList + [[Para [Str "asterisk",Space,Str "1"]] + ,[Para [Str "asterisk",Space,Str "2"]] + ,[Para [Str "asterisk",Space,Str "3"]]] +,Para [Str "Asterisks",Space,Str "loose:"] +,BulletList + [[Para [Str "asterisk",Space,Str "1"]] + ,[Para [Str "asterisk",Space,Str "2"]] + ,[Para [Str "asterisk",Space,Str "3"]]] +,Para [Str "Pluses",Space,Str "tight:"] +,BulletList + [[Para [Str "Plus",Space,Str "1"]] + ,[Para [Str "Plus",Space,Str "2"]] + ,[Para [Str "Plus",Space,Str "3"]]] +,Para [Str "Pluses",Space,Str "loose:"] +,BulletList + [[Para [Str "Plus",Space,Str "1"]] + ,[Para [Str "Plus",Space,Str "2"]] + ,[Para [Str "Plus",Space,Str "3"]]] +,Para [Str "Minuses",Space,Str "tight:"] +,BulletList + [[Para [Str "Minus",Space,Str "1"]] + ,[Para [Str "Minus",Space,Str "2"]] + ,[Para [Str "Minus",Space,Str "3"]]] +,Para [Str "Minuses",Space,Str "loose:"] +,BulletList + [[Para [Str "Minus",Space,Str "1"]] + ,[Para [Str "Minus",Space,Str "2"]] + ,[Para [Str "Minus",Space,Str "3"]]] +,Header 2 ("ordered",[],[]) [Str "Ordered"] +,Para [Str "Tight:"] +,OrderedList (1,DefaultStyle,DefaultDelim) + [[Para [Str "First"]] + ,[Para [Str "Second"]] + ,[Para [Str "Third"]]] +,Para [Str "and:"] +,OrderedList (1,DefaultStyle,DefaultDelim) + [[Para [Str "One"]] + ,[Para [Str "Two"]] + ,[Para [Str "Three"]]] +,Para [Str "Loose",Space,Str "using",Space,Str "tabs:"] +,OrderedList (1,DefaultStyle,DefaultDelim) + [[Para [Str "First"]] + ,[Para [Str "Second"]] + ,[Para [Str "Third"]]] +,Para [Str "and",Space,Str "using",Space,Str "spaces:"] +,OrderedList (1,DefaultStyle,DefaultDelim) + [[Para [Str "One"]] + ,[Para [Str "Two"]] + ,[Para [Str "Three"]]] +,Para [Str "Multiple",Space,Str "paragraphs:"] +,OrderedList (1,DefaultStyle,DefaultDelim) + [[Para [Str "Item",Space,Str "1,",Space,Str "graf",Space,Str "one."] + ,Para [Str "Item",Space,Str "1.",Space,Str "graf",Space,Str "two.",Space,Str "The",Space,Str "quick",Space,Str "brown",Space,Str "fox",Space,Str "jumped",Space,Str "over",Space,Str "the",Space,Str "lazy",SoftBreak,Str "dog's",Space,Str "back."]] + ,[Para [Str "Item",Space,Str "2."]] + ,[Para [Str "Item",Space,Str "3."]]] +,Para [Str "List",Space,Str "styles:"] +,OrderedList (1,DefaultStyle,DefaultDelim) + [] +,OrderedList (1,LowerRoman,DefaultDelim) + [] +,Header 2 ("nested",[],[]) [Str "Nested"] +,BulletList + [[Para [Str "Tab"] + ,BulletList + [[Para [Str "Tab"] + ,BulletList + [[Para [Str "Tab"]]]]]]] +,Para [Str "Here's",Space,Str "another:"] +,OrderedList (1,DefaultStyle,DefaultDelim) + [[Para [Str "First"]] + ,[Para [Str "Second:"] + ,BulletList + [[Para [Str "Fee"]] + ,[Para [Str "Fie"]] + ,[Para [Str "Foe"]]]] + ,[Para [Str "Third"]]] +,Para [Str "Same",Space,Str "thing",Space,Str "but",Space,Str "with",Space,Str "paragraphs:"] +,OrderedList (1,DefaultStyle,DefaultDelim) + [[Para [Str "First"]] + ,[Para [Str "Second:"] + ,BulletList + [[Para [Str "Fee"]] + ,[Para [Str "Fie"]] + ,[Para [Str "Foe"]]]] + ,[Para [Str "Third"]]] +,Header 2 ("tabs-and-spaces",[],[]) [Str "Tabs",Space,Str "and",Space,Str "spaces"] +,BulletList + [[Para [Str "this",Space,Str "is",Space,Str "a",Space,Str "list",Space,Str "item",Space,Str "indented",Space,Str "with",Space,Str "tabs"]] + ,[Para [Str "this",Space,Str "is",Space,Str "a",Space,Str "list",Space,Str "item",Space,Str "indented",Space,Str "with",Space,Str "spaces"] + ,BulletList + [[Para [Str "this",Space,Str "is",Space,Str "an",Space,Str "example",Space,Str "list",Space,Str "item",Space,Str "indented",Space,Str "with",Space,Str "tabs"]] + ,[Para [Str "this",Space,Str "is",Space,Str "an",Space,Str "example",Space,Str "list",Space,Str "item",Space,Str "indented",Space,Str "with",Space,Str "spaces"]]]]] +,Header 2 ("fancy-list-markers",[],[]) [Str "Fancy",Space,Str "list",Space,Str "markers"] +,Para [Str "Autonumbering:"] +,OrderedList (1,DefaultStyle,DefaultDelim) + [[Para [Str "Autonumber."]] + ,[Para [Str "More."] + ,OrderedList (1,DefaultStyle,DefaultDelim) + [[Para [Str "Nested."]]]]] +,Header 2 ("definition",[],[]) [Str "Definition"] +,DefinitionList + [([Str "Violin"], + [[Para [Str "Stringed",Space,Str "musical",Space,Str "instrument."] + ,Para [Str "Torture",Space,Str "device."]]]) + ,([Str "Cello",LineBreak,Str "Violoncello"], + [[Para [Str "Low-voiced",Space,Str "stringed",Space,Str "instrument."]]])] +,Header 1 ("inline-markup",[],[]) [Str "Inline",Space,Str "Markup"] +,Para [Str "This",Space,Str "is",Space,Emph [Str "emphasized"],Str ",",Space,Str "and",Space,Str "so",Space,Emph [Str "is",SoftBreak,Str "this"],Str "."] +,Para [Str "This",Space,Str "is",Space,Strong [Str "strong"],Str ",",Space,Str "and",Space,Str "so",SoftBreak,Strong [Str "is",Space,Str "this"],Str "."] +,Para [Str "Empty",Space,Strong [],Space,Str "and",Space,Emph [],Str "."] +,Para [Str "An",SoftBreak,Emph [Link ("",[],[]) [Str "emphasized",SoftBreak,Str "link"] ("/url","")],Str "."] +,Para [Strong [Emph [Str "This",Space,Str "is",Space,Str "strong",Space,Str "and",Space,Str "em."]]] +,Para [Str "So",Space,Str "is",Space,Strong [Emph [Str "this"]],Space,Str "word."] +,Para [Strong [Emph [Str "This",Space,Str "is",Space,Str "strong",Space,Str "and",Space,Str "em."]]] +,Para [Str "So",Space,Str "is",Space,Strong [Emph [Str "this"]],Space,Str "word."] +,Para [Str "This",Space,Str "is",Space,Str "code:",Space,Code ("",[],[]) ">",Str ",",Space,Code ("",[],[]) "$",Str ",",SoftBreak,Code ("",[],[]) "\\",Str ",",Space,Code ("",[],[]) "\\$",Str ",",SoftBreak,Code ("",[],[]) "",Str "."] +,Para [Str "This",Space,Str "is",Space,SmallCaps [Str "small",Space,Str "caps"],Str "."] +,Para [Str "These",Space,Str "are",Space,Str "all",Space,Str "underlined:",Space,Str "foo",Space,Str "and",Space,Str "bar."] +,Para [Str "These",Space,Str "are",Space,Str "all",Space,Str "strikethrough:",Space,Strikeout [Str "foo"],Str ",",SoftBreak,Strikeout [Str "bar"],Str ",",Space,Str "and",Space,Strikeout [Str "baz"],Str "."] +,Header 1 ("smart-quotes-ellipses-dashes",[],[]) [Str "Smart",Space,Str "quotes,",Space,Str "ellipses,",Space,Str "dashes"] +,Para [Str "\"Hello,\"",Space,Str "said",Space,Str "the",Space,Str "spider.",Space,Str "\"'Shelob'",Space,Str "is",Space,Str "my",Space,Str "name.\""] +,Para [Str "'A',",Space,Str "'B',",Space,Str "and",Space,Str "'C'",Space,Str "are",Space,Str "letters."] +,Para [Str "'Oak,'",Space,Str "'elm,'",Space,Str "and",Space,Str "'beech'",Space,Str "are",Space,Str "names",Space,Str "of",Space,Str "trees.",Space,Str "So",Space,Str "is",Space,Str "'pine.'"] +,Para [Str "'He",Space,Str "said,",Space,Str "\"I",Space,Str "want",Space,Str "to",Space,Str "go.\"'",Space,Str "Were",Space,Str "you",Space,Str "alive",Space,Str "in",Space,Str "the",Space,Str "70's?"] +,Para [Str "Here",Space,Str "is",Space,Str "some",Space,Str "quoted",Space,Str "'",Code ("",[],[]) "code",Str "'",Space,Str "and",Space,Str "a",SoftBreak,Str "\"",Link ("",[],[]) [Str "quoted",SoftBreak,Str "link"] ("http://example.com/?foo=1&bar=2",""),Str "\"."] +,Para [Str "Some",Space,Str "dashes:",Space,Str "one---two",Space,Str "---",Space,Str "three--four",Space,Str "--",Space,Str "five."] +,Para [Str "Dashes",Space,Str "between",Space,Str "numbers:",Space,Str "5-7,",Space,Str "255-66,",Space,Str "1987-1999."] +,Para [Str "Ellipses...and.",Space,Str ".",Space,Str ".and",Space,Str ".",Space,Str ".",Space,Str ".",Space,Str "."] +,Header 1 ("latex",[],[]) [Str "LaTeX"] +,BulletList + [[Para [Str "\\cite[22-23]{smith.1899}"]] + ,[Para [Str "\\doublespacing"]] + ,[Para [Str "$2+2=4$"]] + ,[Para [Str "$x",Space,Str "\\in",Space,Str "y$"]] + ,[Para [Str "$\\alpha",Space,Str "\\wedge",Space,Str "\\omega$"]] + ,[Para [Str "$223$"]] + ,[Para [Str "$p$-Tree"]] + ,[Para [Str "$\\frac{d}{dx}f(x)=\\lim_{h\\to",Space,Str "0}\\frac{f(x+h)-f(x)}{h}$"]] + ,[Para [Str "Here's",Space,Str "one",Space,Str "that",Space,Str "has",Space,Str "a",Space,Str "line",Space,Str "break",Space,Str "in",Space,Str "it:",Space,Str "$\\alpha",Space,Str "+",Space,Str "\\omega",Space,Str "\\times",SoftBreak,Str "x^2$."]]] +,Para [Str "These",Space,Str "shouldn't",Space,Str "be",Space,Str "math:"] +,BulletList + [[Para [Str "To",Space,Str "get",Space,Str "the",Space,Str "famous",Space,Str "equation,",Space,Str "write",SoftBreak,Code ("",[],[]) "$e = mc^2$",Str "."]] + ,[Para [Str "$22,000",Space,Str "is",Space,Str "a",Space,Emph [Str "lot"],Space,Str "of",Space,Str "money.",Space,Str "So",Space,Str "is",Space,Str "$34,000.",Space,Str "(It",SoftBreak,Str "worked",Space,Str "if",Space,Str "\"lot\"",Space,Str "is",Space,Str "emphasized.)"]] + ,[Para [Str "Escaped",Space,Code ("",[],[]) "$",Str ":",Space,Str "$73",Space,Emph [Str "this",Space,Str "should",Space,Str "be",SoftBreak,Str "emphasized"],Space,Str "23$."]]] +,Para [Str "Here's",Space,Str "a",Space,Str "LaTeX",Space,Str "table:"] +,Para [Str "\\begin{tabular}{|l|l|}\\hline",Space,Str "Animal",Space,Str "&",Space,Str "Number",Space,Str "\\\\",Space,Str "\\hline",Space,Str "Dog",Space,Str "&",SoftBreak,Str "2",Space,Str "\\\\",Space,Str "Cat",Space,Str "&",Space,Str "1",Space,Str "\\\\",Space,Str "\\hline",Space,Str "\\end{tabular}"] +,Header 1 ("special-characters",[],[]) [Str "Special",Space,Str "Characters"] +,Para [Str "Here",Space,Str "is",Space,Str "some",Space,Str "unicode:"] +,BulletList + [[Para [Str "I",Space,Str "hat:",Space,Str "\206"]] + ,[Para [Str "o",Space,Str "umlaut:",Space,Str "\246"]] + ,[Para [Str "section:",Space,Str "\167"]] + ,[Para [Str "set",Space,Str "membership:",Space,Str "elem"]] + ,[Para [Str "copyright:",Space,Str "\169"]]] +,Para [Str "AT&T",Space,Str "has",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "their",Space,Str "name."] +,Para [Str "AT&T",Space,Str "is",Space,Str "another",Space,Str "way",Space,Str "to",Space,Str "write",Space,Str "it."] +,Para [Str "This",Space,Str "&",Space,Str "that."] +,Para [Str "4",Space,Str "<",Space,Str "5."] +,Para [Str "6",Space,Str ">",Space,Str "5."] +,Para [Str "Backslash:",Space,Str "\\"] +,Para [Str "Backtick:",Space,Str "`"] +,Para [Str "Asterisk:",Space,Str "*"] +,Para [Str "Underscore:",Space,Str "_"] +,Para [Str "Left",Space,Str "brace:",Space,Str "{"] +,Para [Str "Right",Space,Str "brace:",Space,Str "}"] +,Para [Str "Left",Space,Str "bracket:",Space,Str "["] +,Para [Str "Right",Space,Str "bracket:",Space,Str "]"] +,Para [Str "Left",Space,Str "paren:",Space,Str "("] +,Para [Str "Right",Space,Str "paren:",Space,Str ")"] +,Para [Str "Greater-than:",Space,Str ">"] +,Para [Str "Hash:",Space,Str "#"] +,Para [Str "Period:",Space,Str "."] +,Para [Str "Bang:",Space,Str "!"] +,Para [Str "Plus:",Space,Str "+"] +,Para [Str "Minus:",Space,Str "-"] +,Header 1 ("links",[],[]) [Str "Links"] +,Header 2 ("explicit",[],[]) [Str "Explicit"] +,Para [Str "Just",Space,Str "a",SoftBreak,Link ("",[],[]) [Str "URL"] ("/url/",""),Str "."] +,Para [Link ("",[],[]) [Str "URL",SoftBreak,Str "and",Space,Str "title"] ("/url/","title"),Str "."] +,Para [Link ("",[],[]) [Str "URL",SoftBreak,Str "and",Space,Str "title"] ("/url/","title preceded by two spaces"),Str "."] +,Para [Link ("",[],[]) [Str "URL",SoftBreak,Str "and",Space,Str "title"] ("/url/","title preceded by a tab"),Str "."] +,Para [Link ("",[],[]) [Str "URL",SoftBreak,Str "and",Space,Str "title"] ("/url/","title with \"quotes\" in it")] +,Para [Link ("",[],[]) [Str "URL",SoftBreak,Str "and",Space,Str "title"] ("/url/","title with single quotes")] +,Para [Str "Email",Space,Str "link",Space,Str "(nobody",Space,Str "[at]",Space,Str "nowhere.net)"] +,Para [Link ("",[],[]) [Str "Empty"] ("",""),Str "."] +,Header 2 ("reference",[],[]) [Str "Reference"] +,Para [Str "Foo",SoftBreak,Link ("",[],[]) [Str "bar"] ("/url/",""),Str "."] +,Para [Str "Foo",SoftBreak,Link ("",[],[]) [Str "bar"] ("/url/",""),Str "."] +,Para [Str "Foo",SoftBreak,Link ("",[],[]) [Str "bar"] ("/url/",""),Str "."] +,Para [Str "With",Space,Link ("",[],[]) [Str "embedded",SoftBreak,Str "[brackets]"] ("/url/",""),Str "."] +,Para [Link ("",[],[]) [Str "b"] ("/url/",""),Space,Str "by",SoftBreak,Str "itself",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "link."] +,Para [Str "Indented",SoftBreak,Link ("",[],[]) [Str "once"] ("/url",""),Str "."] +,Para [Str "Indented",SoftBreak,Link ("",[],[]) [Str "twice"] ("/url",""),Str "."] +,Para [Str "Indented",SoftBreak,Link ("",[],[]) [Str "thrice"] ("/url",""),Str "."] +,Para [Str "This",Space,Str "should",Space,Str "[not]",Space,Str "be",Space,Str "a",Space,Str "link."] +,CodeBlock ("",[],[]) "[not]: /url" +,Para [Str "Foo",SoftBreak,Link ("",[],[]) [Str "bar"] ("/url/","Title with \"quotes\" inside"),Str "."] +,Para [Str "Foo",SoftBreak,Link ("",[],[]) [Str "biz"] ("/url/","Title with \"quote\" inside"),Str "."] +,Header 2 ("with-ampersands",[],[]) [Str "With",Space,Str "ampersands"] +,Para [Str "Here's",Space,Str "a",SoftBreak,Link ("",[],[]) [Str "link",SoftBreak,Str "with",Space,Str "an",Space,Str "ampersand",Space,Str "in",Space,Str "the",Space,Str "URL"] ("http://example.com/?foo=1&bar=2",""),Str "."] +,Para [Str "Here's",Space,Str "a",Space,Str "link",Space,Str "with",Space,Str "an",Space,Str "amersand",Space,Str "in",Space,Str "the",Space,Str "link",Space,Str "text:",SoftBreak,Link ("",[],[]) [Str "AT&T"] ("http://att.com/","AT&T"),Str "."] +,Para [Str "Here's",Space,Str "an",SoftBreak,Link ("",[],[]) [Str "inline",SoftBreak,Str "link"] ("/script?foo=1&bar=2",""),Str "."] +,Para [Str "Here's",Space,Str "an",SoftBreak,Link ("",[],[]) [Str "inline",SoftBreak,Str "link",Space,Str "in",Space,Str "pointy",Space,Str "braces"] ("/script?foo=1&bar=2",""),Str "."] +,Header 2 ("autolinks",[],[]) [Str "Autolinks"] +,Para [Str "With",Space,Str "an",Space,Str "ampersand:",SoftBreak,Link ("",[],[]) [Str "http://example.com/?foo=1&bar=2"] ("http://example.com/?foo=1&bar=2","")] +,BulletList + [[Para [Str "In",Space,Str "a",Space,Str "list?"]] + ,[Para [Link ("",[],[]) [Str "http://example.com/"] ("http://example.com/","")]] + ,[Para [Str "It",Space,Str "should."]]] +,Para [Str "An",Space,Str "e-mail",Space,Str "address:",Space,Str "nobody",Space,Str "[at]",Space,Str "nowhere.net"] +,BlockQuote + [Para [Str "Blockquoted:",SoftBreak,Link ("",[],[]) [Str "http://example.com/"] ("http://example.com/","")]] +,Para [Str "Auto-links",Space,Str "should",Space,Str "not",Space,Str "occur",Space,Str "here:",SoftBreak,Code ("",[],[]) ""] +,CodeBlock ("",[],[]) "or here: " +,Header 1 ("images",[],[]) [Str "Images"] +,Para [Str "From",Space,Str "\"Voyage",Space,Str "dans",Space,Str "la",Space,Str "Lune\"",Space,Str "by",Space,Str "Georges",Space,Str "Melies",Space,Str "(1902):"] +,Para [Image ("",[],[]) [] ("lalune.jpg","Voyage dans la Lune")] +,Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "movie",SoftBreak,Image ("",[],[]) [] ("movie.jpg",""),SoftBreak,Str "icon."] +,Header 1 ("footnotes",[],[]) [Str "Footnotes"] +,Para [Str "Here",Space,Str "is",Space,Str "a",Space,Str "footnote",Space,Str "reference",Link ("",[],[]) [Str "(1)"] ("#note_1",""),Str ",",SoftBreak,Str "and",SoftBreak,Str "another",Link ("",[],[]) [Str "(longnote)"] ("#note_longnote",""),Str ".",SoftBreak,Str "This",Space,Str "should",Space,Emph [Str "not"],Space,Str "be",Space,Str "a",Space,Str "footnote",Space,Str "reference,",Space,Str "because",Space,Str "it",SoftBreak,Str "contains",Space,Str "a",Space,Str "space^(my",Space,Str "note)."] +,Para [Link ("",[],[]) [Str "(1)"] ("#ref_1",""),Space,Str "Here",Space,Str "is",Space,Str "the",Space,Str "footnote.",Space,Str "It",Space,Str "can",SoftBreak,Str "go",Space,Str "anywhere",Space,Str "in",Space,Str "the",Space,Str "document,",Space,Str "not",Space,Str "just",Space,Str "at",Space,Str "the",Space,Str "end."] +,Para [Link ("",[],[]) [Str "(longnote)"] ("#ref_longnote",""),Space,Str "Here's",SoftBreak,Str "the",Space,Str "other",Space,Str "note.",Space,Str "This",Space,Str "one",Space,Str "contains",Space,Str "multiple",Space,Str "blocks."] +,Para [Str "Caret",Space,Str "characters",Space,Str "are",Space,Str "used",Space,Str "to",Space,Str "indicate",Space,Str "that",Space,Str "the",Space,Str "blocks",Space,Str "all",Space,Str "belong",Space,Str "to",SoftBreak,Str "a",Space,Str "single",Space,Str "footnote",Space,Str "(as",Space,Str "with",Space,Str "block",Space,Str "quotes)."] +,CodeBlock ("",[],[]) " { }" +,Para [Str "If",Space,Str "you",Space,Str "want,",Space,Str "you",Space,Str "can",Space,Str "use",Space,Str "a",Space,Str "caret",Space,Str "at",Space,Str "the",Space,Str "beginning",Space,Str "of",Space,Str "every",Space,Str "line,",Space,Str "as",SoftBreak,Str "with",Space,Str "blockquotes,",Space,Str "but",Space,Str "all",Space,Str "that",Space,Str "you",Space,Str "need",Space,Str "is",Space,Str "a",Space,Str "caret",Space,Str "at",Space,Str "the",Space,Str "beginning",SoftBreak,Str "of",Space,Str "the",Space,Str "first",Space,Str "line",Space,Str "of",Space,Str "the",Space,Str "block",Space,Str "and",Space,Str "any",Space,Str "preceding",Space,Str "blank",Space,Str "lines."] +,Para [Str "text",Space,Emph [Str "Leading",Space,Str "space"]] +,Para [Emph [Str "Trailing",Space,Str "space"],Space,Str "text"] +,Para [Str "text",Space,Emph [Str "Leading",Space,Str "spaces"]] +,Para [Emph [Str "Trailing",Space,Str "spaces"],Space,Str "text"] +,Header 1 ("tables",[],[]) [Str "Tables"] +,Header 2 ("tables-with-headers",[],[]) [Str "Tables",Space,Str "with",Space,Str "Headers"] +,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.0,0.0,0.0] + [[Para [Str "X"]] + ,[Para [Str "Y"]] + ,[Para [Str "Z"]]] + [[[Para [Str "1"]] + ,[Para [Str "2"]] + ,[Para [Str "3"]]] + ,[[Para [Str "4"]] + ,[Para [Str "5"]] + ,[Para [Str "6"]]]] +,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.0,0.0,0.0] + [[Para [Str "X"]] + ,[Para [Str "Y"]] + ,[Para [Str "Z"]]] + [[[Para [Str "1"]] + ,[Para [Str "2"]] + ,[Para [Str "3"]]] + ,[[Para [Str "4"]] + ,[Para [Str "5"]] + ,[Para [Str "6"]]]] +,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.0,0.0,0.0] + [[Para [Str "X"]] + ,[Para [Str "Y"]] + ,[Para [Str "Z"]]] + [[[Para [Str "1"]] + ,[Para [Str "2"]] + ,[Para [Str "3"]]] + ,[[Para [Str "4"]] + ,[Para [Str "5"]] + ,[Para [Str "6"]]]] +,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.0,0.0,0.0] + [[Para [Str "X"]] + ,[Para [Str "Y"]] + ,[Para [Str "Z"]]] + [[[Para [Str "1"]] + ,[Para [Str "2"]] + ,[Para [Str "3"]]] + ,[[Para [Str "4"]] + ,[Para [Str "5"]] + ,[Para [Str "6"]]]] +,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.0,0.0,0.0] + [[Para [Str "X"]] + ,[Para [Str "Y"]] + ,[Para [Str "Z"]]] + [[[Para [Str "1"]] + ,[Para [Str "2"]] + ,[Para [Str "3"]]] + ,[[Para [Str "4"]] + ,[Para [Str "5"]] + ,[Para [Str "6"]]]] +,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.0,0.0,0.0] + [[Para [Str "X"]] + ,[Para [Str "Y"]] + ,[Para [Str "Z"]]] + [[[Para [Str "1"]] + ,[Para [Str "2"]] + ,[Para [Str "3"]]] + ,[[Para [Str "4"]] + ,[Para [Str "5"]] + ,[Para [Str "6"]]]] +,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.0,0.0,0.0] + [[Para [Str "X"]] + ,[Para [Str "Y"]] + ,[Para [Str "Z"]]] + [[[Para [Str "1"]] + ,[Para [Str "2"]] + ,[Para [Str "3"]]] + ,[[Para [Str "4"]] + ,[Para [Str "5"]] + ,[Para [Str "6"]]]] +,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.0,0.0,0.0] + [[Para [Str "X"]] + ,[Para [Str "Y"]] + ,[Para [Str "Z"]]] + [[[Para [Str "1"]] + ,[Para [Str "2"]] + ,[Para [Str "3"]]] + ,[[Para [Str "4"]] + ,[Para [Str "5"]] + ,[Para [Str "6"]]]] +,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.0,0.0,0.0] + [[Para [Str "X"]] + ,[Para [Str "Y"]] + ,[Para [Str "Z"]]] + [[[Para [Str "1"]] + ,[Para [Str "2"]] + ,[Para [Str "3"]]] + ,[[Para [Str "4"]] + ,[Para [Str "5"]] + ,[Para [Str "6"]]]] +,Header 2 ("tables-without-headers",[],[]) [Str "Tables",Space,Str "without",Space,Str "Headers"] +,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.0,0.0,0.0] + [[] + ,[] + ,[]] + [[[Para [Str "1"]] + ,[Para [Str "2"]] + ,[Para [Str "3"]]] + ,[[Para [Str "4"]] + ,[Para [Str "5"]] + ,[Para [Str "6"]]]] +,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.0,0.0,0.0] + [[] + ,[] + ,[]] + [[[Para [Str "1"]] + ,[Para [Str "2"]] + ,[Para [Str "3"]]] + ,[[Para [Str "4"]] + ,[Para [Str "5"]] + ,[Para [Str "6"]]]] +,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.0,0.0,0.0] + [[] + ,[] + ,[]] + [[[Para [Str "1"]] + ,[Para [Str "2"]] + ,[Para [Str "3"]]] + ,[[Para [Str "4"]] + ,[Para [Str "5"]] + ,[Para [Str "6"]]]] +,Table [] [AlignLeft,AlignLeft,AlignLeft] [0.0,0.0,0.0] + [[] + ,[] + ,[]] + [[[Para [Str "1"]] + ,[Para [Str "2"]] + ,[Para [Str "3"]]] + ,[[Para [Str "4"]] + ,[Para [Str "5"]] + ,[Para [Str "6"]]]] +,Header 2 ("empty-tables",[],[]) [Str "Empty",Space,Str "Tables"] +,Para [Str "This",Space,Str "section",Space,Str "should",Space,Str "be",Space,Str "empty."]] diff --git a/test/jats-reader.xml b/test/jats-reader.xml new file mode 100644 index 000000000..eb06fcc22 --- /dev/null +++ b/test/jats-reader.xml @@ -0,0 +1,1773 @@ + + +
+ + + + + + + + + + +Pandoc Test Suite + + + + +

+ This is a set of tests for pandoc. Most of them are adapted from John + Gruber's markdown test suite. +

+ + Headers + + Level 2 with an + <ext-link ext-link-type="uri" xlink:href="/url">embedded + link</ext-link> + + Level 3 with <italic>emphasis</italic> + + Level 4 + + Level 5 + + + + + + + Level 1 + + Level 2 with <italic>emphasis</italic> + + Level 3 +

+ with no blank line +

+
+
+ + Level 2 +

+ with no blank line +

+
+
+ + Paragraphs +

+ Here's a regular paragraph. +

+

+ In Markdown 1.0.0 and earlier. Version 8. This line turns into a + list item. Because a hard-wrapped line in the middle of a paragraph + looked like a list item. +

+

+ Here's one with a bullet. * criminey. +

+

+ There should be a hard line breakhere. +

+
+ + Block Quotes +

+ E-mail style: +

+ +

+ This is a block quote. It is pretty short. +

+
+ +

+ Code in a block quote: +

+ sub status { + print "working"; +} +

+ A list: +

+ + +

+ item one +

+
+ +

+ item two +

+
+
+

+ Nested block quotes: +

+ +

+ nested +

+
+ +

+ nested +

+
+
+

+ This should not be a block quote: 2 > 1. +

+

+ Box-style: +

+ +

+ Example: +

+ sub status { + print "working"; +} +
+ + + +

+ do laundry +

+
+ +

+ take out the trash +

+
+
+
+

+ Here's a nested one: +

+ +

+ Joe said: +

+ +

+ Don't quote me. +

+
+
+

+ And a following paragraph. +

+
+ + Code Blocks +

+ Code: +

+ ---- (should be four hyphens) + +sub status { + print "working"; +} + +this code block is indented by one tab +

+ And: +

+ this code block is indented by two tabs + +These should not be escaped: \$ \\ \> \[ \{ +
+ + Lists + + Unordered +

+ Asterisks tight: +

+ + +

+ asterisk 1 +

+
+ +

+ asterisk 2 +

+
+ +

+ asterisk 3 +

+
+
+

+ Asterisks loose: +

+ + +

+ asterisk 1 +

+
+ +

+ asterisk 2 +

+
+ +

+ asterisk 3 +

+
+
+

+ Pluses tight: +

+ + +

+ Plus 1 +

+
+ +

+ Plus 2 +

+
+ +

+ Plus 3 +

+
+
+

+ Pluses loose: +

+ + +

+ Plus 1 +

+
+ +

+ Plus 2 +

+
+ +

+ Plus 3 +

+
+
+

+ Minuses tight: +

+ + +

+ Minus 1 +

+
+ +

+ Minus 2 +

+
+ +

+ Minus 3 +

+
+
+

+ Minuses loose: +

+ + +

+ Minus 1 +

+
+ +

+ Minus 2 +

+
+ +

+ Minus 3 +

+
+
+
+ + Ordered +

+ Tight: +

+ + +

+ First +

+
+ +

+ Second +

+
+ +

+ Third +

+
+
+

+ and: +

+ + +

+ One +

+
+ +

+ Two +

+
+ +

+ Three +

+
+
+

+ Loose using tabs: +

+ + +

+ First +

+
+ +

+ Second +

+
+ +

+ Third +

+
+
+

+ and using spaces: +

+ + +

+ One +

+
+ +

+ Two +

+
+ +

+ Three +

+
+
+

+ Multiple paragraphs: +

+ + +

+ Item 1, graf one. +

+

+ Item 1. graf two. The quick brown fox jumped over the lazy + dog's back. +

+
+ +

+ Item 2. +

+
+ +

+ Item 3. +

+
+
+

+ List styles: +

+ + +
+ + Nested + + +

+ Tab +

+ + +

+ Tab +

+ + +

+ Tab +

+
+
+
+
+
+
+

+ Here's another: +

+ + +

+ First +

+
+ +

+ Second: +

+ + +

+ Fee +

+
+ +

+ Fie +

+
+ +

+ Foe +

+
+
+
+ +

+ Third +

+
+
+

+ Same thing but with paragraphs: +

+ + +

+ First +

+
+ +

+ Second: +

+ + +

+ Fee +

+
+ +

+ Fie +

+
+ +

+ Foe +

+
+
+
+ +

+ Third +

+
+
+
+ + Tabs and spaces + + +

+ this is a list item indented with tabs +

+
+ +

+ this is a list item indented with spaces +

+ + +

+ this is an example list item indented with tabs +

+
+ +

+ this is an example list item indented with spaces +

+
+
+
+
+
+ + Fancy list markers +

+ Autonumbering: +

+ + +

+ Autonumber. +

+
+ +

+ More. +

+ + +

+ Nested. +

+
+
+
+
+
+ + Definition + + + + Violin + + +

+ Stringed musical instrument. +

+

+ Torture device. +

+
+
+ + + CelloVioloncello + + +

+ Low-voiced stringed instrument. +

+
+
+
+
+
+ + Inline Markup +

+ This is emphasized, and so is + this. +

+

+ This is strong, and so + is this. +

+

+ Empty and . +

+

+ An + emphasized + link. +

+

+ This is strong and em. +

+

+ So is this word. +

+

+ This is strong and em. +

+

+ So is this word. +

+

+ This is code: >, $, + \, \$, + <html>. +

+

+ This is small caps. +

+

+ These are all underlined: foo and bar. +

+

+ These are all strikethrough: foo, + bar, and baz. +

+
+ + Smart quotes, ellipses, dashes +

+ "Hello," said the spider. "'Shelob' is my name." +

+

+ 'A', 'B', and 'C' are letters. +

+

+ 'Oak,' 'elm,' and 'beech' are names of trees. So is 'pine.' +

+

+ 'He said, "I want to go."' Were you alive in the 70's? +

+

+ Here is some quoted 'code' and a + "quoted + link". +

+

+ Some dashes: one---two --- three--four -- five. +

+

+ Dashes between numbers: 5-7, 255-66, 1987-1999. +

+

+ Ellipses...and. . .and . . . . +

+
+ + LaTeX + + +

+ \cite[22-23]{smith.1899} +

+
+ +

+ \doublespacing +

+
+ +

+ $2+2=4$ +

+
+ +

+ $x \in y$ +

+
+ +

+ $\alpha \wedge \omega$ +

+
+ +

+ $223$ +

+
+ +

+ $p$-Tree +

+
+ +

+ $\frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}$ +

+
+ +

+ Here's one that has a line break in it: $\alpha + \omega \times + x^2$. +

+
+
+

+ These shouldn't be math: +

+ + +

+ To get the famous equation, write + $e = mc^2$. +

+
+ +

+ $22,000 is a lot of money. So is $34,000. (It + worked if "lot" is emphasized.) +

+
+ +

+ Escaped $: $73 this should be + emphasized 23$. +

+
+
+

+ Here's a LaTeX table: +

+

+ \begin{tabular}{|l|l|}\hline Animal & Number \\ \hline Dog & + 2 \\ Cat & 1 \\ \hline \end{tabular} +

+
+ + Special Characters +

+ Here is some unicode: +

+ + +

+ I hat: Î +

+
+ +

+ o umlaut: ö +

+
+ +

+ section: § +

+
+ +

+ set membership: elem +

+
+ +

+ copyright: © +

+
+
+

+ AT&T has an ampersand in their name. +

+

+ AT&T is another way to write it. +

+

+ This & that. +

+

+ 4 < 5. +

+

+ 6 > 5. +

+

+ Backslash: \ +

+

+ Backtick: ` +

+

+ Asterisk: * +

+

+ Underscore: _ +

+

+ Left brace: { +

+

+ Right brace: } +

+

+ Left bracket: [ +

+

+ Right bracket: ] +

+

+ Left paren: ( +

+

+ Right paren: ) +

+

+ Greater-than: > +

+

+ Hash: # +

+

+ Period: . +

+

+ Bang: ! +

+

+ Plus: + +

+

+ Minus: - +

+
+ + Links + + Explicit +

+ Just a + URL. +

+

+ URL + and title. +

+

+ URL + and title. +

+

+ URL + and title. +

+

+ URL + and title +

+

+ URL + and title +

+

+ Email link (nobody [at] nowhere.net) +

+

+ Empty. +

+
+ + Reference +

+ Foo + bar. +

+

+ Foo + bar. +

+

+ Foo + bar. +

+

+ With embedded + [brackets]. +

+

+ b by + itself should be a link. +

+

+ Indented + once. +

+

+ Indented + twice. +

+

+ Indented + thrice. +

+

+ This should [not] be a link. +

+ [not]: /url +

+ Foo + bar. +

+

+ Foo + biz. +

+
+ + With ampersands +

+ Here's a + link + with an ampersand in the URL. +

+

+ Here's a link with an amersand in the link text: + AT&T. +

+

+ Here's an + inline + link. +

+

+ Here's an + inline + link in pointy braces. +

+
+ + Autolinks +

+ With an ampersand: + http://example.com/?foo=1&bar=2 +

+ + +

+ In a list? +

+
+ +

+ http://example.com/ +

+
+ +

+ It should. +

+
+
+

+ An e-mail address: nobody [at] nowhere.net +

+ +

+ Blockquoted: + http://example.com/ +

+
+

+ Auto-links should not occur here: + <http://example.com/> +

+ or here: <http://example.com/> +
+
+ + Images +

+ From "Voyage dans la Lune" by Georges Melies (1902): +

+

+ +

+

+ Here is a movie + + icon. +

+
+ + Footnotes +

+ Here is a footnote reference(1), + and + another(longnote). + This should not be a footnote reference, because it + contains a space^(my note). +

+

+ (1) Here is the footnote. It can + go anywhere in the document, not just at the end. +

+

+ (longnote) Here's + the other note. This one contains multiple blocks. +

+

+ Caret characters are used to indicate that the blocks all belong to + a single footnote (as with block quotes). +

+ { <code> } +

+ If you want, you can use a caret at the beginning of every line, as + with blockquotes, but all that you need is a caret at the beginning + of the first line of the block and any preceding blank lines. +

+

+ text Leading space +

+

+ Trailing space text +

+

+ text Leading spaces +

+

+ Trailing spaces text +

+
+ + Tables + + Tables with Headers + + + + + + + + + + + + + + + + + + + + + + + +
+

+ X +

+
+

+ Y +

+
+

+ Z +

+
+

+ 1 +

+
+

+ 2 +

+
+

+ 3 +

+
+

+ 4 +

+
+

+ 5 +

+
+

+ 6 +

+
+ + + + + + + + + + + + + + + + + + + + + + + +
+

+ X +

+
+

+ Y +

+
+

+ Z +

+
+

+ 1 +

+
+

+ 2 +

+
+

+ 3 +

+
+

+ 4 +

+
+

+ 5 +

+
+

+ 6 +

+
+ + + + + + + + + + + + + + + + + + + + + + + +
+

+ X +

+
+

+ Y +

+
+

+ Z +

+
+

+ 1 +

+
+

+ 2 +

+
+

+ 3 +

+
+

+ 4 +

+
+

+ 5 +

+
+

+ 6 +

+
+ + + + + + + + + + + + + + + + + + + + + + + +
+

+ X +

+
+

+ Y +

+
+

+ Z +

+
+

+ 1 +

+
+

+ 2 +

+
+

+ 3 +

+
+

+ 4 +

+
+

+ 5 +

+
+

+ 6 +

+
+ + + + + + + + + + + + + + + + + + + + + + + +
+

+ X +

+
+

+ Y +

+
+

+ Z +

+
+

+ 1 +

+
+

+ 2 +

+
+

+ 3 +

+
+

+ 4 +

+
+

+ 5 +

+
+

+ 6 +

+
+ + + + + + + + + + + + + + + + + + + + + + + +
+

+ X +

+
+

+ Y +

+
+

+ Z +

+
+

+ 1 +

+
+

+ 2 +

+
+

+ 3 +

+
+

+ 4 +

+
+

+ 5 +

+
+

+ 6 +

+
+ + + + + + + + + + + + + + + + + + + + + + + +
+

+ X +

+
+

+ Y +

+
+

+ Z +

+
+

+ 1 +

+
+

+ 2 +

+
+

+ 3 +

+
+

+ 4 +

+
+

+ 5 +

+
+

+ 6 +

+
+ + + + + + + + + + + + + + + + + + + + + + + +
+

+ X +

+
+

+ Y +

+
+

+ Z +

+
+

+ 1 +

+
+

+ 2 +

+
+

+ 3 +

+
+

+ 4 +

+
+

+ 5 +

+
+

+ 6 +

+
+ + + + + + + + + + + + + + + + + + + + + + + +
+

+ X +

+
+

+ Y +

+
+

+ Z +

+
+

+ 1 +

+
+

+ 2 +

+
+

+ 3 +

+
+

+ 4 +

+
+

+ 5 +

+
+

+ 6 +

+
+
+ + Tables without Headers + + + + + + + + + + + + + + + + +
+

+ 1 +

+
+

+ 2 +

+
+

+ 3 +

+
+

+ 4 +

+
+

+ 5 +

+
+

+ 6 +

+
+ + + + + + + + + + + + + + + + +
+

+ 1 +

+
+

+ 2 +

+
+

+ 3 +

+
+

+ 4 +

+
+

+ 5 +

+
+

+ 6 +

+
+ + + + + + + + + + + + + + + + +
+

+ 1 +

+
+

+ 2 +

+
+

+ 3 +

+
+

+ 4 +

+
+

+ 5 +

+
+

+ 6 +

+
+ + + + + + + + + + + + + + + + +
+

+ 1 +

+
+

+ 2 +

+
+

+ 3 +

+
+

+ 4 +

+
+

+ 5 +

+
+

+ 6 +

+
+
+ + Empty Tables +

+ This section should be empty. +

+
+
+ + + +
diff --git a/test/test-pandoc.hs b/test/test-pandoc.hs index 123434411..ff852ee0e 100644 --- a/test/test-pandoc.hs +++ b/test/test-pandoc.hs @@ -11,6 +11,7 @@ import qualified Tests.Readers.Creole import qualified Tests.Readers.Docx import qualified Tests.Readers.EPUB import qualified Tests.Readers.HTML +import qualified Tests.Readers.JATS import qualified Tests.Readers.LaTeX import qualified Tests.Readers.Markdown import qualified Tests.Readers.Muse @@ -61,6 +62,7 @@ tests = testGroup "pandoc tests" [ Tests.Command.tests [ testGroup "LaTeX" Tests.Readers.LaTeX.tests , testGroup "Markdown" Tests.Readers.Markdown.tests , testGroup "HTML" Tests.Readers.HTML.tests + , testGroup "JATS" Tests.Readers.JATS.tests , testGroup "Org" Tests.Readers.Org.tests , testGroup "RST" Tests.Readers.RST.tests , testGroup "Docx" Tests.Readers.Docx.tests -- cgit v1.2.3 From d85357139748ea657f030ab314c39e70f56764f4 Mon Sep 17 00:00:00 2001 From: Hamish Mackenzie Date: Wed, 20 Dec 2017 23:55:48 +1300 Subject: Improve support for code language in JATS --- src/Text/Pandoc/Readers/JATS.hs | 21 ++++++++++++++++-- src/Text/Pandoc/Writers/JATS.hs | 47 +++++++++++++++++++++++++---------------- test/Tests/Readers/JATS.hs | 5 +++++ test/Tests/Writers/JATS.hs | 7 +++++- 4 files changed, 59 insertions(+), 21 deletions(-) diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index fc71e9a51..851fbec35 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -15,6 +15,8 @@ import Text.Pandoc.Options import Text.Pandoc.Shared (underlineSpan, crFilter, safeRead) import Text.TeXMath (readMathML, writeTeX) import Text.XML.Light +import qualified Data.Set as S (fromList, member) +import Data.Set ((\\)) type JATS m = StateT JATSState m @@ -98,8 +100,8 @@ instance HasMeta JATSState where deleteMeta field s = s {jatsMeta = deleteMeta field (jatsMeta s)} isBlockElement :: Content -> Bool -isBlockElement (Elem e) = qName (elName e) `elem` blocktags - where blocktags = paragraphLevel ++ lists ++ mathML ++ other +isBlockElement (Elem e) = qName (elName e) `S.member` blocktags + where blocktags = S.fromList (paragraphLevel ++ lists ++ mathML ++ other) \\ S.fromList inlinetags paragraphLevel = ["address", "array", "boxed-text", "chem-struct-wrap", "code", "fig", "fig-group", "graphic", "media", "preformat", "supplementary-material", "table-wrap", "table-wrap-group", @@ -108,6 +110,21 @@ isBlockElement (Elem e) = qName (elName e) `elem` blocktags mathML = ["tex-math", "mml:math"] other = ["p", "related-article", "related-object", "ack", "disp-quote", "speech", "statement", "verse-group", "x"] + inlinetags = ["email", "ext-link", "uri", "inline-supplementary-material", + "related-article", "related-object", "hr", "bold", "fixed-case", + "italic", "monospace", "overline", "overline-start", "overline-end", + "roman", "sans-serif", "sc", "strike", "underline", "underline-start", + "underline-end", "ruby", "alternatives", "inline-graphic", "private-char", + "chem-struct", "inline-formula", "tex-math", "mml:math", "abbrev", + "milestone-end", "milestone-start", "named-content", "styled-content", + "fn", "target", "xref", "sub", "sup", "x", "address", "array", + "boxed-text", "chem-struct-wrap", "code", "fig", "fig-group", "graphic", + "media", "preformat", "supplementary-material", "table-wrap", + "table-wrap-group", "disp-formula", "disp-formula-group", + "citation-alternatives", "element-citation", "mixed-citation", + "nlm-citation", "award-id", "funding-source", "open-access", + "def-list", "list", "ack", "disp-quote", "speech", "statement", + "verse-group"] isBlockElement _ = False -- Trim leading and trailing newline characters diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index 8824eeb24..8dda969d9 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -170,6 +170,28 @@ imageMimeType src kvs = ((drop 1 . dropWhile (/='/')) <$> mbMT) in (maintype, subtype) +languageFor :: [String] -> String +languageFor classes = + case langs of + (l:_) -> escapeStringForXML l + [] -> "" + where isLang l = map toLower l `elem` map (map toLower) languages + langsFrom s = if isLang s + then [s] + else languagesByExtension . map toLower $ s + langs = concatMap langsFrom classes + +codeAttr :: Attr -> (String, [(String, String)]) +codeAttr (ident,classes,kvs) = (lang, attr) + where + attr = [("id",ident) | not (null ident)] ++ + [("language",lang) | not (null lang)] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["code-type", + "code-version", "executable", + "language-version", "orientation", + "platforms", "position", "specific-use"]] + lang = languageFor classes + -- | Convert a Pandoc block element to JATS. blockToJATS :: PandocMonad m => WriterOptions -> Block -> JATS m Doc blockToJATS _ Null = return empty @@ -233,23 +255,10 @@ blockToJATS opts (LineBlock lns) = blockToJATS opts $ linesToPara lns blockToJATS opts (BlockQuote blocks) = inTagsIndented "disp-quote" <$> blocksToJATS opts blocks -blockToJATS _ (CodeBlock (ident,classes,kvs) str) = return $ +blockToJATS _ (CodeBlock a str) = return $ inTags False tag attr (flush (text (escapeStringForXML str))) - where attr = [("id",ident) | not (null ident)] ++ - [("language",lang) | not (null lang)] ++ - [(k,v) | (k,v) <- kvs, k `elem` ["code-type", - "code-version", "executable", - "language-version", "orientation", - "platforms", "position", "specific-use"]] - tag = if null lang then "preformat" else "code" - lang = case langs of - (l:_) -> escapeStringForXML l - [] -> "" - isLang l = map toLower l `elem` map (map toLower) languages - langsFrom s = if isLang s - then [s] - else languagesByExtension . map toLower $ s - langs = concatMap langsFrom classes + where (lang, attr) = codeAttr a + tag = if null lang then "preformat" else "code" blockToJATS _ (BulletList []) = return empty blockToJATS opts (BulletList lst) = inTags True "list" [("list-type", "bullet")] <$> @@ -346,8 +355,10 @@ inlineToJATS opts (Quoted SingleQuote lst) = do inlineToJATS opts (Quoted DoubleQuote lst) = do contents <- inlinesToJATS opts lst return $ char '“' <> contents <> char '”' -inlineToJATS _ (Code _ str) = - return $ inTagsSimple "monospace" $ text (escapeStringForXML str) +inlineToJATS _ (Code a str) = + return $ inTags False tag attr $ text (escapeStringForXML str) + where (lang, attr) = codeAttr a + tag = if null lang then "monospace" else "code" inlineToJATS _ il@(RawInline f x) | f == "jats" = return $ text x | otherwise = do diff --git a/test/Tests/Readers/JATS.hs b/test/Tests/Readers/JATS.hs index ed6317c71..5c7dfa77c 100644 --- a/test/Tests/Readers/JATS.hs +++ b/test/Tests/Readers/JATS.hs @@ -14,6 +14,11 @@ jats = purely $ readJATS def tests :: [TestTree] tests = [ testGroup "inline code" [ test jats "basic" $ "

\n @&\n

" =?> para (code "@&") + , test jats "lang" $ "

\n @&\n

" =?> para (codeWith ("", ["c"], []) "@&") + ] + , testGroup "block code" + [ test jats "basic" $ "@&" =?> codeBlock "@&" + , test jats "lang" $ "@&" =?> codeBlockWith ("", ["c"], []) "@&" ] , testGroup "images" [ test jats "basic" $ "" diff --git a/test/Tests/Writers/JATS.hs b/test/Tests/Writers/JATS.hs index cd4609849..f14f1c229 100644 --- a/test/Tests/Writers/JATS.hs +++ b/test/Tests/Writers/JATS.hs @@ -31,6 +31,11 @@ infix 4 =: tests :: [TestTree] tests = [ testGroup "inline code" [ "basic" =: code "@&" =?> "

\n @&\n

" + , "lang" =: codeWith ("", ["c"], []) "@&" =?> "

\n @&\n

" + ] + , testGroup "block code" + [ "basic" =: codeBlock "@&" =?> "@&" + , "lang" =: codeBlockWith ("", ["c"], []) "@&" =?> "@&" ] , testGroup "images" [ "basic" =: @@ -38,7 +43,7 @@ tests = [ testGroup "inline code" =?> "" ] , testGroup "inlines" - [ "Emphasis" =: emph ("emphasized") + [ "Emphasis" =: emph "emphasized" =?> "

\n emphasized\n

" ] , "bullet list" =: bulletList [ plain $ text "first" -- cgit v1.2.3