From 8c38390038edcebd55f9dec8359ef983f3813425 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Fri, 27 Nov 2020 21:21:25 +0100 Subject: HTML reader tests: improve test coverage of new features --- src/Text/Pandoc/Readers/HTML/Table.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs index 5a783988f..91639fa4c 100644 --- a/src/Text/Pandoc/Readers/HTML/Table.hs +++ b/src/Text/Pandoc/Readers/HTML/Table.hs @@ -214,7 +214,8 @@ normalize :: [ColWidth] -> TableHead -> [TableBody] -> TableFoot -> Either String ([ColSpec], TableHead, [TableBody], TableFoot) normalize widths head' bodies foot = do let rows = headRows head' <> concatMap bodyRows bodies <> footRows foot - let rowLength = length . rowCells + let cellWidth (Cell _ _ _ (ColSpan cs) _) = cs + let rowLength = foldr (\cell acc -> cellWidth cell + acc) 0 . rowCells let ncols = maximum (map rowLength rows) let tblType = tableType (map rowCells rows) -- fail on empty table -- cgit v1.2.3 From 83d63b72e1b9eff9f2aa3b9f36b56d348f0909a2 Mon Sep 17 00:00:00 2001 From: Tassos Manganaris <tassos.manganaris@gmail.com> Date: Fri, 27 Nov 2020 23:42:53 -0500 Subject: Fix a tiny Typo in the CSV reader module Header comment in the CSV reader module says "RST" instead of "CSV". --- src/Text/Pandoc/Readers/CSV.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/CSV.hs b/src/Text/Pandoc/Readers/CSV.hs index f0edcaa16..45f4d88d4 100644 --- a/src/Text/Pandoc/Readers/CSV.hs +++ b/src/Text/Pandoc/Readers/CSV.hs @@ -2,7 +2,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {- | - Module : Text.Pandoc.Readers.RST + Module : Text.Pandoc.Readers.CSV Copyright : Copyright (C) 2006-2020 John MacFarlane License : GNU GPL, version 2 or above -- cgit v1.2.3 From bff9c129c3579f928a0067759d0a784eb5c07d30 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 29 Nov 2020 10:29:51 -0800 Subject: LaTeX reader: don't parse `\rule` with width 0 as horizontal rule. --- src/Text/Pandoc/Readers/LaTeX.hs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 2a9bff746..15a1a19fc 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1225,6 +1225,16 @@ preamble = mconcat <$> many preambleBlock anyTok return mempty) +rule :: PandocMonad m => LP m Blocks +rule = do + skipopts + width <- T.takeWhile (\c -> isDigit c || c == '.') . stringify <$> tok + _thickness <- tok + -- 0-width rules are used to fix spacing issues: + case safeRead width of + Just (0 :: Double) -> return mempty + _ -> return horizontalRule + paragraph :: PandocMonad m => LP m Blocks paragraph = do x <- trimInlines . mconcat <$> many1 inline @@ -1595,7 +1605,7 @@ blockCommands = M.fromList -- , ("hrule", pure horizontalRule) , ("strut", pure mempty) - , ("rule", skipopts *> tok *> tok $> horizontalRule) + , ("rule", rule) , ("item", looseItem) , ("documentclass", skipopts *> braced *> preamble) , ("centerline", para . trimInlines <$> (skipopts *> tok)) -- cgit v1.2.3 From 7b11cdee49ec491a33dcade6ea68c3084a425992 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 2 Dec 2020 10:46:23 -0800 Subject: Citeproc: ensure that BCP47 lang codes can be used. We ignore the variants and just use the base lang code and country code when passing off to citeproc. --- src/Text/Pandoc/Citeproc.hs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index a48f97c3b..0de2882ae 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -16,6 +16,7 @@ import Text.Pandoc.Citeproc.BibTeX (readBibtexString, Variant(..)) import Text.Pandoc.Citeproc.MetaValue (metaValueToReference, metaValueToText) import Text.Pandoc.Readers.Markdown (yamlToRefs) import Text.Pandoc.Class (setResourcePath, getResourcePath, getUserDataDir) +import qualified Text.Pandoc.BCP47 as BCP47 import Data.ByteString (ByteString) import qualified Data.ByteString.Lazy as L import Text.Pandoc.Definition as Pandoc @@ -91,8 +92,9 @@ processCitations (Pandoc meta bs) = do case styleRes of Left err -> throwError $ PandocAppError $ prettyCiteprocError err Right style -> return style{ styleAbbreviations = mbAbbrevs } - let mblang = parseLang <$> - ((lookupMeta "lang" meta <|> lookupMeta "locale" meta) >>= metaValueToText) + mblang <- maybe (return Nothing) bcp47LangToIETF + ((lookupMeta "lang" meta <|> lookupMeta "locale" meta) >>= + metaValueToText) let locale = Citeproc.mergeLocales mblang style let getCiteId (Cite cs _) = Set.fromList $ map B.citationId cs getCiteId _ = mempty @@ -579,3 +581,16 @@ removeFinalPeriod ils = isRightQuote "\8217" = True isRightQuote "\187" = True isRightQuote _ = False + +bcp47LangToIETF :: PandocMonad m => Text -> m (Maybe Lang) +bcp47LangToIETF bcplang = + case BCP47.parseBCP47 bcplang of + Left _ -> do + report $ InvalidLang bcplang + return Nothing + Right lang -> + return $ Just + $ Lang (BCP47.langLanguage lang) + (if T.null (BCP47.langRegion lang) + then Nothing + else Just (BCP47.langRegion lang)) -- cgit v1.2.3 From 5bbd5a9e80c4f4356d71b54315cf75b5ae2f3650 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 3 Dec 2020 09:51:23 -0800 Subject: Docx writer: Support bold and italic in "complex script." Previously bold and italics didn't work properly in LTR text. This commit causes the w:bCs and w:iCs attributes to be used, in addition to w:b and w:i, for bold and italics respectively. Closes #6911. --- src/Text/Pandoc/Writers/Docx.hs | 8 ++++++-- test/docx/golden/custom_style_preserve.docx | Bin 10573 -> 10578 bytes test/docx/golden/definition_list.docx | Bin 9844 -> 9850 bytes test/docx/golden/inline_formatting.docx | Bin 9959 -> 9969 bytes 4 files changed, 6 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index a380fd4fa..4cb879e6a 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -1272,9 +1272,13 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do $ inlinesToOpenXML opts ils wrapBookmark ident contents inlineToOpenXML' opts (Strong lst) = - withTextProp (mknode "w:b" [] ()) $ inlinesToOpenXML opts lst + withTextProp (mknode "w:b" [] ()) $ + withTextProp (mknode "w:bCs" [] ()) $ -- needed for LTR, #6911 + inlinesToOpenXML opts lst inlineToOpenXML' opts (Emph lst) = - withTextProp (mknode "w:i" [] ()) $ inlinesToOpenXML opts lst + withTextProp (mknode "w:i" [] ()) $ + withTextProp (mknode "w:iCs" [] ()) $ -- needed for LTR, #6911 + inlinesToOpenXML opts lst inlineToOpenXML' opts (Underline lst) = withTextProp (mknode "w:u" [("w:val","single")] ()) $ inlinesToOpenXML opts lst diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx index ac595cdd4..17804bb81 100644 Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx index 005f5dbe3..21629e208 100644 Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx index ce1d16daa..e12e3b38d 100644 Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ -- cgit v1.2.3 From 9c6cc79c11d191c74aae8646279a2b94ae253330 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 3 Dec 2020 21:24:27 -0800 Subject: EPUB writer: add frontmatter type on body element for nav.xhtml. Closes #6918. --- src/Text/Pandoc/Writers/EPUB.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 12004889f..eadd1100f 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -845,7 +845,9 @@ pandocToEPUB version opts doc = do [ unode "ol" landmarkItems ] | not (null landmarkItems)] navData <- lift $ writeHtml opts'{ writerVariables = - Context (M.fromList [("navpage", toVal' "true")]) + Context (M.fromList [("navpage", toVal' "true") + ,("body-type", toVal' "frontmatter") + ]) <> cssvars False <> vars } (Pandoc (setMeta "title" (walk removeNote $ fromList $ docTitle' meta) nullMeta) -- cgit v1.2.3 From 7199d68ba078148ff76a38f2c483da73edd62747 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 3 Dec 2020 21:39:19 -0800 Subject: EPUB writer: include title page in landmarks. Closes #6919. Note that the toc is also included if `--toc` is specified. --- src/Text/Pandoc/Writers/EPUB.hs | 9 +++++++-- test/command/5986.md | 3 ++- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index eadd1100f..5867b7c80 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -825,7 +825,12 @@ pandocToEPUB version opts doc = do [ unode "h1" ! [("id","toc-title")] $ tocTitle , unode "ol" ! [("class","toc")] $ tocBlocks ]] let landmarkItems = if epub3 - then [ unode "li" + then unode "li" + [ unode "a" ! [("href", + "text/title_page.xhtml") + ,("epub:type", "titlepage")] $ + ("Title Page" :: String) ] : + [ unode "li" [ unode "a" ! [("href", "text/cover.xhtml") ,("epub:type", "cover")] $ ("Cover" :: String)] | @@ -834,7 +839,7 @@ pandocToEPUB version opts doc = do [ unode "li" [ unode "a" ! [("href", "#toc") ,("epub:type", "toc")] $ - ("Table of contents" :: String) + ("Table of Contents" :: String) ] | writerTableOfContents opts ] else [] diff --git a/test/command/5986.md b/test/command/5986.md index e24aebb64..ea0ca70c1 100644 --- a/test/command/5986.md +++ b/test/command/5986.md @@ -6,7 +6,8 @@ <p><span id="nav.xhtml"></span></p> <nav type="landmarks" id="landmarks" hidden="hidden"> <ol> -<li><a href="#nav.xhtml#toc">Table of contents</a></li> +<li><a href="text/title_page.xhtml">Title Page</a></li> +<li><a href="#nav.xhtml#toc">Table of Contents</a></li> </ol> </nav> <p><span id="ch001.xhtml"></span></p> -- cgit v1.2.3 From 171d3db3848a8ca79480688748d0ffff67ed2039 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 4 Dec 2020 09:47:56 -0800 Subject: HTML writer: Fix handling of nested csl- display spans. Previously inner Spans used to represent CSL display attributes were not rendered as div tags. See #6921. --- src/Text/Pandoc/Writers/HTML.hs | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index c92131d5a..76f17f77a 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -695,12 +695,12 @@ blockToHtml opts (Div (ident, "section":dclasses, dkvs) let fragmentClass = case slideVariant of RevealJsSlides -> "fragment" _ -> "incremental" - let inDiv zs = RawBlock (Format "html") ("<div class=\"" + let inDiv' zs = RawBlock (Format "html") ("<div class=\"" <> fragmentClass <> "\">") : (zs ++ [RawBlock (Format "html") "</div>"]) let breakOnPauses zs = case splitBy isPause zs of [] -> [] - y:ys -> y ++ concatMap inDiv ys + y:ys -> y ++ concatMap inDiv' ys let (titleBlocks, innerSecs) = if titleSlide -- title slides have no content of their own @@ -783,9 +783,7 @@ blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do -- a newline between the column divs, which throws -- off widths! see #4028 mconcat <$> mapM (blockToHtml opts) bs - else if isCslBibEntry - then mconcat <$> mapM (cslEntryToHtml opts') bs - else blockListToHtml opts' bs + else blockListToHtml opts' bs let contents' = nl opts >> contents >> nl opts let (divtag, classes'') = if html5 && "section" `elem` classes' then (H5.section, filter (/= "section") classes') @@ -1213,6 +1211,10 @@ inlineToHtml opts inline = do LineBreak -> return $ do if html5 then H5.br else H.br strToHtml "\n" + (Span ("",[cls],[]) ils) + | cls == "csl-block" || cls == "csl-left-margin" || + cls == "csl-right-inline" || cls == "csl-indent" + -> inlineListToHtml opts ils >>= inDiv cls (Span (id',classes,kvs) ils) -> let spanLikeTag = case classes of @@ -1462,22 +1464,12 @@ blockListToNote opts ref blocks = do _ -> noteItem return $ nl opts >> noteItem' -cslEntryToHtml :: PandocMonad m - => WriterOptions - -> Block - -> StateT WriterState m Html -cslEntryToHtml opts (Para xs) = do +inDiv :: PandocMonad m=> Text -> Html -> StateT WriterState m Html +inDiv cls x = do html5 <- gets stHtml5 - let inDiv :: Text -> Html -> Html - inDiv cls x = (if html5 then H5.div else H.div) - x ! A.class_ (toValue cls) - let go (Span ("",[cls],[]) ils) - | cls == "csl-block" || cls == "csl-left-margin" || - cls == "csl-right-inline" || cls == "csl-indent" - = inDiv cls <$> inlineListToHtml opts ils - go il = inlineToHtml opts il - mconcat <$> mapM go xs -cslEntryToHtml opts x = blockToHtml opts x + return $ + (if html5 then H5.div else H.div) + x ! A.class_ (toValue cls) isMathEnvironment :: Text -> Bool isMathEnvironment s = "\\begin{" `T.isPrefixOf` s && -- cgit v1.2.3 From 68bcddeb2136242aabee716420ecad251fc38920 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 4 Dec 2020 10:14:19 -0800 Subject: LaTeX writer: Fix bug with nested csl- display Spans. See #6921. --- src/Text/Pandoc/Writers/LaTeX.hs | 68 +++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 36 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index d665269ab..e02cc2833 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -553,9 +553,7 @@ blockToLaTeX (Div (identifier,classes,kvs) bs) = do Just s -> braces (literal s)) $$ inner $+$ "\\end{CSLReferences}" - else if "csl-entry" `elem` classes - then vcat <$> mapM cslEntryToLaTeX bs - else blockListToLaTeX bs + else blockListToLaTeX bs modify $ \st -> st{ stIncremental = oldIncremental } linkAnchor' <- hypertarget True identifier empty -- see #2704 for the motivation for adding \leavevmode: @@ -1180,23 +1178,6 @@ isQuoted :: Inline -> Bool isQuoted (Quoted _ _) = True isQuoted _ = False -cslEntryToLaTeX :: PandocMonad m - => Block - -> LW m (Doc Text) -cslEntryToLaTeX (Para xs) = - mconcat <$> mapM go xs - where - go (Span ("",["csl-block"],[]) ils) = - (cr <>) . inCmd "CSLBlock" <$> inlineListToLaTeX ils - go (Span ("",["csl-left-margin"],[]) ils) = - inCmd "CSLLeftMargin" <$> inlineListToLaTeX ils - go (Span ("",["csl-right-inline"],[]) ils) = - (cr <>) . inCmd "CSLRightInline" <$> inlineListToLaTeX ils - go (Span ("",["csl-indent"],[]) ils) = - (cr <>) . inCmd "CSLIndent" <$> inlineListToLaTeX ils - go il = inlineToLaTeX il -cslEntryToLaTeX x = blockToLaTeX x - -- | Convert inline element to LaTeX inlineToLaTeX :: PandocMonad m => Inline -- ^ Inline to convert @@ -1204,23 +1185,38 @@ inlineToLaTeX :: PandocMonad m inlineToLaTeX (Span (id',classes,kvs) ils) = do linkAnchor <- hypertarget False id' empty lang <- toLang $ lookup "lang" kvs - let cmds = ["textup" | "csl-no-emph" `elem` classes] ++ - ["textnormal" | "csl-no-strong" `elem` classes || - "csl-no-smallcaps" `elem` classes] ++ - ["RL" | ("dir", "rtl") `elem` kvs] ++ - ["LR" | ("dir", "ltr") `elem` kvs] ++ - (case lang of - Just lng -> let (l, o) = toPolyglossia lng - ops = if T.null o then "" else "[" <> o <> "]" - in ["text" <> l <> ops] - Nothing -> []) + let classToCmd "csl-no-emph" = Just "textup" + classToCmd "csl-no-strong" = Just "textnormal" + classToCmd "csl-no-smallcaps" = Just "textnormal" + classToCmd "csl-block" = Just "CSLBlock" + classToCmd "csl-left-margin" = Just "CSLLeftMargin" + classToCmd "csl-right-inline" = Just "CSLRightInline" + classToCmd "csl-indent" = Just "CSLIndent" + classToCmd _ = Nothing + kvToCmd ("dir","rtl") = Just "RL" + kvToCmd ("dir","ltr") = Just "LR" + kvToCmd _ = Nothing + langCmds = + case lang of + Just lng -> let (l, o) = toPolyglossia lng + ops = if T.null o then "" else "[" <> o <> "]" + in ["text" <> l <> ops] + Nothing -> [] + let cmds = mapMaybe classToCmd classes ++ mapMaybe kvToCmd kvs ++ langCmds contents <- inlineListToLaTeX ils - return $ (if T.null id' - then empty - else "\\protect" <> linkAnchor) <> - (if null cmds - then braces contents - else foldr inCmd contents cmds) + return $ + (case classes of + ["csl-block"] -> (cr <>) + ["csl-left-margin"] -> (cr <>) + ["csl-right-inline"] -> (cr <>) + ["csl-indent"] -> (cr <>) + _ -> id) $ + (if T.null id' + then empty + else "\\protect" <> linkAnchor) <> + (if null cmds + then braces contents + else foldr inCmd contents cmds) inlineToLaTeX (Emph lst) = inCmd "emph" <$> inlineListToLaTeX lst inlineToLaTeX (Underline lst) = inCmd "underline" <$> inlineListToLaTeX lst inlineToLaTeX (Strong lst) = inCmd "textbf" <$> inlineListToLaTeX lst -- cgit v1.2.3 From dc3ef5201f9531bc405ac07e763d9f004bb6bc91 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 4 Dec 2020 10:55:48 -0800 Subject: Markdown writer: ensure that a new csl-block begins on a new line. This just looks better and doesn't affect the semantics. See #6921. --- src/Text/Pandoc/Writers/Markdown.hs | 7 ++++++- test/command/pandoc-citeproc-53.md | 6 ++++-- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 6aec6b244..5eb47b261 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -1058,7 +1058,12 @@ inlineToMarkdown opts (Span ("",["emoji"],kvs) [Str s]) = inlineToMarkdown opts (Span attrs ils) = do variant <- asks envVariant contents <- inlineListToMarkdown opts ils - return $ case variant of + return $ case attrs of + (_,["csl-block"],_) -> (cr <>) + (_,["csl-left-margin"],_) -> (cr <>) + (_,["csl-indent"],_) -> (cr <>) + _ -> id + $ case variant of PlainText -> contents _ | attrs == nullAttr -> contents | isEnabled Ext_bracketed_spans opts -> diff --git a/test/command/pandoc-citeproc-53.md b/test/command/pandoc-citeproc-53.md index 295f52049..fb8d5c35e 100644 --- a/test/command/pandoc-citeproc-53.md +++ b/test/command/pandoc-citeproc-53.md @@ -28,11 +28,13 @@ Doe[^1] Doe[^2] Roe[^3] Roe[^4] Doe[^5] Doe[^6] Roe[^7] Roe[^8] ::: {#refs .references .csl-bib-body} ::: {#ref-a .csl-entry} -[[Doe J.]{.smallcaps} ]{.csl-block}[2000, *Work A*,.]{.csl-left-margin} +[[Doe J.]{.smallcaps} ]{.csl-block} +[2000, *Work A*,.]{.csl-left-margin} ::: ::: {#ref-b .csl-entry} -[[Roe J.]{.smallcaps} ]{.csl-block}[1990, *Work B*,.]{.csl-left-margin} +[[Roe J.]{.smallcaps} ]{.csl-block} +[1990, *Work B*,.]{.csl-left-margin} ::: ::: -- cgit v1.2.3 From 6f35600204132c5a0907bb7cccb2e10ffb27506a Mon Sep 17 00:00:00 2001 From: Jan Tojnar <jtojnar@gmail.com> Date: Sat, 5 Dec 2020 06:00:21 +0100 Subject: Docbook writer: add XML namespaces to top-level elements (#6923) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, we only added xmlns attributes to chapter elements, even when running with --top-level-division=section. Let’s add the namespaces to part and section elements too, when they are the selected top-level divisions. We do not need to add namespaces to documents produced with --standalone flag, since those will already have xmlns attribute on the root element in the template. --- src/Text/Pandoc/Writers/Docbook.hs | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 408d8cc0c..3f4c67f10 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -15,6 +15,7 @@ Conversion of 'Pandoc' documents to Docbook XML. module Text.Pandoc.Writers.Docbook ( writeDocbook4, writeDocbook5 ) where import Control.Monad.Reader import Data.Generics (everywhere, mkT) +import Data.Maybe (isNothing) import Data.Monoid (Any (..)) import Data.Text (Text) import qualified Data.Text as T @@ -40,6 +41,18 @@ data DocBookVersion = DocBook4 | DocBook5 type DB = ReaderT DocBookVersion +-- | Get level of the top-level headers based on the configured top-level division. +-- The header level can then be used to determine appropriate DocBook element +-- for each subdivision associated with a header. +-- The numbering here follows LaTeX's internal numbering +getStartLvl :: WriterOptions -> Int +getStartLvl opts = + case writerTopLevelDivision opts of + TopLevelPart -> -1 + TopLevelChapter -> 0 + TopLevelSection -> 1 + TopLevelDefault -> 1 + -- | Convert list of authors to a docbook <author> section authorToDocbook :: PandocMonad m => WriterOptions -> [Inline] -> DB m B.Inlines authorToDocbook opts name' = do @@ -79,12 +92,7 @@ writeDocbook opts (Pandoc meta blocks) = do let colwidth = if writerWrapText opts == WrapAuto then Just $ writerColumns opts else Nothing - -- The numbering here follows LaTeX's internal numbering - let startLvl = case writerTopLevelDivision opts of - TopLevelPart -> -1 - TopLevelChapter -> 0 - TopLevelSection -> 1 - TopLevelDefault -> 1 + let startLvl = getStartLvl opts let fromBlocks = blocksToDocbook opts . makeSections False (Just startLvl) auths' <- mapM (authorToDocbook opts) $ docAuthors meta @@ -170,8 +178,12 @@ blockToDocbook opts (Div (id',"section":_,_) (Header lvl _ ils : xs)) = do then "xml:id" else "id" idAttr = [(idName, writerIdentifierPrefix opts <> id') | not (T.null id')] - nsAttr = if version == DocBook5 && lvl == 0 then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")] - else [] + -- We want to add namespaces to the root (top-level) element. + nsAttr = if version == DocBook5 && lvl == getStartLvl opts && isNothing (writerTemplate opts) + -- Though, DocBook 4 does not support namespaces and + -- standalone documents will include them in the template. + then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")] + else [] attribs = nsAttr <> idAttr title' <- inlinesToDocbook opts ils contents <- blocksToDocbook opts bs -- cgit v1.2.3 From ddb76cb356a82f6a9e51a6f3626dd154816e9205 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 5 Dec 2020 09:53:39 -0800 Subject: LaTeX reader: don't apply theorem default styling to a figure inside. If we put an image in italics, then when rendering to Markdown we no longer get an implicit figure. Closes #6925. --- src/Text/Pandoc/Readers/LaTeX.hs | 1 + test/command/6925.md | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 test/command/6925.md (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 15a1a19fc..afe960454 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1812,6 +1812,7 @@ theoremEnvironment name = do _ -> bs italicize :: Block -> Block +italicize x@(Para [Image{}]) = x -- see #6925 italicize (Para ils) = Para [Emph ils] italicize (Plain ils) = Plain [Emph ils] italicize x = x diff --git a/test/command/6925.md b/test/command/6925.md new file mode 100644 index 000000000..458a0b91d --- /dev/null +++ b/test/command/6925.md @@ -0,0 +1,34 @@ +``` +% pandoc -f latex -t markdown +\documentclass{amsart} +\newtheorem{thm}{Theorem}[section] +\theoremstyle{definition} +\newtheorem{thm2}[section]{Theorem} +\begin{document} +\begin{thm} +a +\begin{figure} +\includegraphics[]{1.png} +\end{figure} +\end{thm} + +\begin{thm2} +a +\begin{figure} +\includegraphics[]{1.png} +\end{figure} +\end{thm2} +\end{document} +^D +::: {.thm} +**Theorem 1**. *a* + + +::: + +::: {.thm2} +**Theorem 1**. a + + +::: +``` -- cgit v1.2.3 From c161893f442a3e001b64af1421e9f62376d71c92 Mon Sep 17 00:00:00 2001 From: Nils Carlson <nils@nilscarlson.se> Date: Sat, 5 Dec 2020 18:00:04 +0000 Subject: OpenDocument writer: Allow references for internal links (#6774) This commit adds two extensions to the OpenDocument writer, `xrefs_name` and `xrefs_number`. Links to headings, figures and tables inside the document are substituted with cross-references that will use the name or caption of the referenced item for `xrefs_name` or the number for `xrefs_number`. For the `xrefs_number` to be useful heading numbers must be enabled in the generated document and table and figure captions must be enabled using for example the `native_numbering` extension. In order for numbers and reference text to be updated the generated document must be refreshed. Co-authored-by: Nils Carlson <nils.carlson@ludd.ltu.se> --- MANUAL.txt | 37 ++++++++++++++ src/Text/Pandoc/Extensions.hs | 4 ++ src/Text/Pandoc/Writers/OpenDocument.hs | 91 ++++++++++++++++++++++++++------- test/command/6774.md | 63 +++++++++++++++++++++++ 4 files changed, 177 insertions(+), 18 deletions(-) create mode 100644 test/command/6774.md (limited to 'src') diff --git a/MANUAL.txt b/MANUAL.txt index fad885fed..4e1615ff2 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -3027,6 +3027,43 @@ This extension can be enabled/disabled for the following formats: output formats : `odt`, `opendocument` +#### Extension: `xrefs_name` #### + +Links to headings, figures and tables inside the document are +substituted with cross-references that will use the name or caption +of the referenced item. The original link text is replaced once +the generated document is refreshed. This extension can be combined +with `xrefs_number` in which case numbers will appear before the +name. + +Text in cross-references is only made consistent with the referenced +item once the document has been refreshed. + +This extension can be enabled/disabled for the following formats: + +output formats +: `odt`, `opendocument` + +#### Extension: `xrefs_number` #### + +Links to headings, figures and tables inside the document are +substituted with cross-references that will use the number +of the referenced item. The original link text is discarded. +This extension can be combined with `xrefs_name` in which case +the name or caption numbers will appear after the number. + +For the `xrefs_number` to be useful heading numbers must be enabled +in the generated document, also table and figure captions must be enabled +using for example the `native_numbering` extension. + +Numbers in cross-references are only visible in the final document once +it has been refreshed. + +This extension can be enabled/disabled for the following formats: + +output formats +: `odt`, `opendocument` + #### Extension: `styles` #### {#ext-styles} When converting from docx, read all docx styles as divs (for diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 646f7abfb..a94e24e2c 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -149,6 +149,8 @@ data Extension = | Ext_tex_math_dollars -- ^ TeX math between $..$ or $$..$$ | Ext_tex_math_double_backslash -- ^ TeX math btw \\(..\\) \\[..\\] | Ext_tex_math_single_backslash -- ^ TeX math btw \(..\) \[..\] + | Ext_xrefs_name -- ^ Use xrefs with names + | Ext_xrefs_number -- ^ Use xrefs with numbers | Ext_yaml_metadata_block -- ^ YAML metadata block | Ext_gutenberg -- ^ Use Project Gutenberg conventions for plain | Ext_attributes -- ^ Generic attribute syntax @@ -465,6 +467,8 @@ getAllExtensions f = universalExtensions <> getAll f getAll "opendocument" = extensionsFromList [ Ext_empty_paragraphs , Ext_native_numbering + , Ext_xrefs_name + , Ext_xrefs_number ] getAll "odt" = getAll "opendocument" <> autoIdExtensions getAll "muse" = autoIdExtensions <> diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 071a5542f..cf42f2228 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -17,6 +17,7 @@ module Text.Pandoc.Writers.OpenDocument ( writeOpenDocument ) where import Control.Arrow ((***), (>>>)) import Control.Monad.State.Strict hiding (when) import Data.Char (chr) +import Data.Foldable (find) import Data.List (sortOn, sortBy, foldl') import qualified Data.Map as Map import Data.Maybe (fromMaybe, isNothing) @@ -35,6 +36,7 @@ import Text.DocLayout import Text.Pandoc.Shared (linesToPara, tshow, blocksToInlines) import Text.Pandoc.Templates (renderTemplate) import qualified Text.Pandoc.Translations as Term (Term(Figure, Table)) +import Text.Pandoc.Walk import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared import qualified Text.Pandoc.Writers.AnnotatedTable as Ann @@ -54,6 +56,11 @@ plainToPara x = x type OD m = StateT WriterState m +data ReferenceType + = HeaderRef + | TableRef + | ImageRef + data WriterState = WriterState { stNotes :: [Doc Text] , stTableStyles :: [Doc Text] @@ -69,6 +76,7 @@ data WriterState = , stImageId :: Int , stTableCaptionId :: Int , stImageCaptionId :: Int + , stIdentTypes :: [(Text,ReferenceType)] } defaultWriterState :: WriterState @@ -86,6 +94,7 @@ defaultWriterState = , stImageId = 1 , stTableCaptionId = 1 , stImageCaptionId = 1 + , stIdentTypes = [] } when :: Bool -> Doc Text -> Doc Text @@ -243,6 +252,12 @@ writeOpenDocument opts (Pandoc meta blocks) = do meta ((body, metadata),s) <- flip runStateT defaultWriterState $ do + let collectInlineIdent (Image (ident,_,_) _ _) = [(ident,ImageRef)] + collectInlineIdent _ = [] + let collectBlockIdent (Header _ (ident,_,_) _) = [(ident,HeaderRef)] + collectBlockIdent (Table (ident,_,_) _ _ _ _ _) = [(ident,TableRef)] + collectBlockIdent _ = [] + modify $ \s -> s{ stIdentTypes = query collectBlockIdent blocks ++ query collectInlineIdent blocks } m <- metaToContext opts (blocksToOpenDocument opts) (fmap chomp . inlinesToOpenDocument opts) @@ -411,7 +426,7 @@ blockToOpenDocument o bs inTags True "text:list" [ ("text:style-name", "L" <> tshow ln)] <$> orderedListToOpenDocument o pn b table :: PandocMonad m => Ann.Table -> OD m (Doc Text) - table (Ann.Table _ (Caption _ c) colspecs thead tbodies _) = do + table (Ann.Table (ident, _, _) (Caption _ c) colspecs thead tbodies _) = do tn <- length <$> gets stTableStyles pn <- length <$> gets stParaStyles let genIds = map chr [65..] @@ -433,7 +448,7 @@ blockToOpenDocument o bs then return empty else inlinesToOpenDocument o (blocksToInlines c) >>= if isEnabled Ext_native_numbering o - then numberedTableCaption + then numberedTableCaption ident else unNumberedCaption "TableCaption" th <- colHeadsToOpenDocument o (map fst paraHStyles) thead tr <- mapM (tableBodyToOpenDocument o (map fst paraStyles)) tbodies @@ -442,36 +457,39 @@ blockToOpenDocument o bs , ("table:style-name", name) ] (vcat columns $$ th $$ vcat tr) return $ captionDoc $$ tableDoc - figure attr caption source title | null caption = + figure attr@(ident, _, _) caption source title | null caption = withParagraphStyle o "Figure" [Para [Image attr caption (source,title)]] | otherwise = do imageDoc <- withParagraphStyle o "FigureWithCaption" [Para [Image attr caption (source,title)]] captionDoc <- inlinesToOpenDocument o caption >>= if isEnabled Ext_native_numbering o - then numberedFigureCaption + then numberedFigureCaption ident else unNumberedCaption "FigureCaption" return $ imageDoc $$ captionDoc -numberedTableCaption :: PandocMonad m => Doc Text -> OD m (Doc Text) -numberedTableCaption caption = do +numberedTableCaption :: PandocMonad m => Text -> Doc Text -> OD m (Doc Text) +numberedTableCaption ident caption = do id' <- gets stTableCaptionId modify (\st -> st{ stTableCaptionId = id' + 1 }) capterm <- translateTerm Term.Table - return $ numberedCaption "TableCaption" capterm "Table" id' caption + return $ numberedCaption "TableCaption" capterm "Table" id' ident caption -numberedFigureCaption :: PandocMonad m => Doc Text -> OD m (Doc Text) -numberedFigureCaption caption = do +numberedFigureCaption :: PandocMonad m => Text -> Doc Text -> OD m (Doc Text) +numberedFigureCaption ident caption = do id' <- gets stImageCaptionId modify (\st -> st{ stImageCaptionId = id' + 1 }) capterm <- translateTerm Term.Figure - return $ numberedCaption "FigureCaption" capterm "Illustration" id' caption + return $ numberedCaption "FigureCaption" capterm "Illustration" id' ident caption -numberedCaption :: Text -> Text -> Text -> Int -> Doc Text -> Doc Text -numberedCaption style term name num caption = +numberedCaption :: Text -> Text -> Text -> Int -> Text -> Doc Text -> Doc Text +numberedCaption style term name num ident caption = let t = text $ T.unpack term r = num - 1 - s = inTags False "text:sequence" [ ("text:ref-name", "ref" <> name <> tshow r), + ident' = case ident of + "" -> "ref" <> name <> tshow r + _ -> ident + s = inTags False "text:sequence" [ ("text:ref-name", ident'), ("text:name", name), ("text:formula", "ooow:" <> name <> "+1"), ("style:num-format", "1") ] $ text $ show num @@ -607,7 +625,9 @@ inlineToOpenDocument o ils else do report $ InlineNotRendered ils return empty - Link _ l (s,t) -> mkLink s t <$> inlinesToOpenDocument o l + Link _ l (s,t) -> do + identTypes <- gets stIdentTypes + mkLink o identTypes s t <$> inlinesToOpenDocument o l Image attr _ (s,t) -> mkImg attr s t Note l -> mkNote l where @@ -619,10 +639,6 @@ inlineToOpenDocument o ils unhighlighted s = inlinedCode $ preformatted s preformatted s = handleSpaces $ escapeStringForXML s inlinedCode s = return $ inTags False "text:span" [("text:style-name", "Source_Text")] s - mkLink s t = inTags False "text:a" [ ("xlink:type" , "simple") - , ("xlink:href" , s ) - , ("office:name", t ) - ] . inSpanTags "Definition" mkImg (_, _, kvs) s _ = do id' <- gets stImageId modify (\st -> st{ stImageId = id' + 1 }) @@ -659,6 +675,45 @@ inlineToOpenDocument o ils addNote nn return nn +mkLink :: WriterOptions -> [(Text,ReferenceType)] -> Text -> Text -> Doc Text -> Doc Text +mkLink o identTypes s t d = + let maybeIdentAndType = case T.uncons s of + Just ('#', ident) -> find ((ident ==) . fst) identTypes + _ -> Nothing + d' = inSpanTags "Definition" d + ref refType format ident = inTags False refType + [ ("text:reference-format", format ), + ("text:ref-name", ident) ] + inlineSpace = selfClosingTag "text:s" [] + bookmarkRef = ref "text:bookmark-ref" + bookmarkRefNumber ident = bookmarkRef "number" ident mempty + bookmarkRefName ident = bookmarkRef "text" ident d + bookmarkRefNameNumber ident = bookmarkRefNumber ident <> inlineSpace <> bookmarkRefName ident + bookmarkRef' + | isEnabled Ext_xrefs_number o && isEnabled Ext_xrefs_name o = bookmarkRefNameNumber + | isEnabled Ext_xrefs_name o = bookmarkRefName + | otherwise = bookmarkRefNumber + sequenceRef = ref "text:sequence-ref" + sequenceRefNumber ident = sequenceRef "value" ident mempty + sequenceRefName ident = sequenceRef "caption" ident d + sequenceRefNameNumber ident = sequenceRefNumber ident <> inlineSpace <> sequenceRefName ident + sequenceRef' + | isEnabled Ext_xrefs_number o && isEnabled Ext_xrefs_name o = sequenceRefNameNumber + | isEnabled Ext_xrefs_name o = sequenceRefName + | otherwise = sequenceRefNumber + link = inTags False "text:a" [ ("xlink:type" , "simple") + , ("xlink:href" , s ) + , ("office:name", t ) + ] d' + linkOrReference = case maybeIdentAndType of + Just (ident, HeaderRef) -> bookmarkRef' ident + Just (ident, TableRef) -> sequenceRef' ident + Just (ident, ImageRef) -> sequenceRef' ident + _ -> link + in if isEnabled Ext_xrefs_name o || isEnabled Ext_xrefs_number o + then linkOrReference + else link + bulletListStyle :: PandocMonad m => Int -> OD m (Int,(Int,[Doc Text])) bulletListStyle l = do let doStyles i = inTags True "text:list-level-style-bullet" diff --git a/test/command/6774.md b/test/command/6774.md new file mode 100644 index 000000000..66549c0f2 --- /dev/null +++ b/test/command/6774.md @@ -0,0 +1,63 @@ +``` +% pandoc -f native -t opendocument --quiet +[Header 1 ("chapter1",[],[]) [Str "The",Space,Str "Chapter"] +,Para [Str "Chapter",Space,Str "1",Space,Str "references",Space,Link ("",[],[]) [Str "The",Space,Str "Chapter"] ("#chapter1","")]] +^D +<text:h text:style-name="Heading_20_1" text:outline-level="1"><text:bookmark-start text:name="chapter1" />The +Chapter<text:bookmark-end text:name="chapter1" /></text:h> +<text:p text:style-name="First_20_paragraph">Chapter 1 references +<text:a xlink:type="simple" xlink:href="#chapter1" office:name=""><text:span text:style-name="Definition">The +Chapter</text:span></text:a></text:p> +``` +``` +% pandoc -f native -t opendocument+xrefs_name --quiet +[Header 1 ("chapter1",[],[]) [Str "The",Space,Str "Chapter"] +,Para [Str "Chapter",Space,Str "1",Space,Str "references",Space,Link ("",[],[]) [Str "The",Space,Str "Chapter"] ("#chapter1","")] +,Para [Image ("lalune",[],[]) [Str "lalune"] ("lalune.jpg","fig:Voyage dans la Lune")] +,Para [Str "Image",Space,Str "1",Space,Str "references",Space,Link ("",[],[]) [Str "La",Space,Str "Lune"] ("#lalune","")]] +^D +<text:h text:style-name="Heading_20_1" text:outline-level="1"><text:bookmark-start text:name="chapter1" />The +Chapter<text:bookmark-end text:name="chapter1" /></text:h> +<text:p text:style-name="First_20_paragraph">Chapter 1 references +<text:bookmark-ref text:reference-format="text" text:ref-name="chapter1">The +Chapter</text:bookmark-ref></text:p> +<text:p text:style-name="FigureWithCaption"><draw:frame draw:name="img1"><draw:image xlink:href="lalune.jpg" xlink:type="simple" xlink:show="embed" xlink:actuate="onLoad" /></draw:frame></text:p> +<text:p text:style-name="FigureCaption">lalune</text:p> +<text:p text:style-name="Text_20_body">Image 1 references +<text:sequence-ref text:reference-format="caption" text:ref-name="lalune">La +Lune</text:sequence-ref></text:p> +``` +``` +% pandoc -f native -t opendocument+xrefs_number --quiet +[Header 1 ("chapter1",[],[]) [Str "The",Space,Str "Chapter"] +,Para [Str "Chapter",Space,Str "1",Space,Str "references",Space,Link ("",[],[]) [Str "The",Space,Str "Chapter"] ("#chapter1","")] +,Para [Image ("lalune",[],[]) [Str "lalune"] ("lalune.jpg","fig:Voyage dans la Lune")] +,Para [Str "Image",Space,Str "1",Space,Str "references",Space,Link ("",[],[]) [Str "La",Space,Str "Lune"] ("#lalune","")]] +^D +<text:h text:style-name="Heading_20_1" text:outline-level="1"><text:bookmark-start text:name="chapter1" />The +Chapter<text:bookmark-end text:name="chapter1" /></text:h> +<text:p text:style-name="First_20_paragraph">Chapter 1 references +<text:bookmark-ref text:reference-format="number" text:ref-name="chapter1"></text:bookmark-ref></text:p> +<text:p text:style-name="FigureWithCaption"><draw:frame draw:name="img1"><draw:image xlink:href="lalune.jpg" xlink:type="simple" xlink:show="embed" xlink:actuate="onLoad" /></draw:frame></text:p> +<text:p text:style-name="FigureCaption">lalune</text:p> +<text:p text:style-name="Text_20_body">Image 1 references +<text:sequence-ref text:reference-format="value" text:ref-name="lalune"></text:sequence-ref></text:p> +``` +``` +% pandoc -f native -t opendocument+xrefs_number+xrefs_name --quiet +[Header 1 ("chapter1",[],[]) [Str "The",Space,Str "Chapter"] +,Para [Str "Chapter",Space,Str "1",Space,Str "references",Space,Link ("",[],[]) [Str "The",Space,Str "Chapter"] ("#chapter1","")] +,Para [Image ("lalune",[],[]) [Str "lalune"] ("lalune.jpg","fig:Voyage dans la Lune")] +,Para [Str "Image",Space,Str "1",Space,Str "references",Space,Link ("",[],[]) [Str "La",Space,Str "Lune"] ("#lalune","")]] +^D +<text:h text:style-name="Heading_20_1" text:outline-level="1"><text:bookmark-start text:name="chapter1" />The +Chapter<text:bookmark-end text:name="chapter1" /></text:h> +<text:p text:style-name="First_20_paragraph">Chapter 1 references +<text:bookmark-ref text:reference-format="number" text:ref-name="chapter1"></text:bookmark-ref><text:s /><text:bookmark-ref text:reference-format="text" text:ref-name="chapter1">The +Chapter</text:bookmark-ref></text:p> +<text:p text:style-name="FigureWithCaption"><draw:frame draw:name="img1"><draw:image xlink:href="lalune.jpg" xlink:type="simple" xlink:show="embed" xlink:actuate="onLoad" /></draw:frame></text:p> +<text:p text:style-name="FigureCaption">lalune</text:p> +<text:p text:style-name="Text_20_body">Image 1 references +<text:sequence-ref text:reference-format="value" text:ref-name="lalune"></text:sequence-ref><text:s /><text:sequence-ref text:reference-format="caption" text:ref-name="lalune">La +Lune</text:sequence-ref></text:p> +``` -- cgit v1.2.3 From acf932825bfe40d9a18046c9d304f4f14363a88a Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Sat, 5 Dec 2020 22:05:37 +0100 Subject: Org reader: preserve targets of spurious links Links with (internal) targets that the reader doesn't know about are converted into emphasized text. Information on the link target is now preserved by wrapping the text in a Span of class `spurious-link`, with an attribute `target` set to the link's original target. This allows to recover and fix broken or unknown links with filters. See: #6916 --- src/Text/Pandoc/Readers/Org/Inlines.hs | 9 ++++----- test/Tests/Readers/Org/Meta.hs | 6 ++++-- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index b234bee58..0330cf55f 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -477,17 +477,17 @@ linkToInlinesF linkStr = internalLink :: Text -> Inlines -> F Inlines internalLink link title = do - anchorB <- (link `elem`) <$> asksF orgStateAnchorIds - if anchorB + ids <- asksF orgStateAnchorIds + if link `elem` ids then return $ B.link ("#" <> link) "" title - else return $ B.emph title + else let attr' = ("", ["spurious-link"] , [("target", link)]) + in return $ B.spanWith attr' (B.emph title) -- | Parse an anchor like @<<anchor-id>>@ and return an empty span with -- @anchor-id@ set as id. Legal anchors in org-mode are defined through -- @org-target-regexp@, which is fairly liberal. Since no link is created if -- @anchor-id@ contains spaces, we are more restrictive in what is accepted as -- an anchor. - anchor :: PandocMonad m => OrgParser m (F Inlines) anchor = try $ do anchorId <- parseAnchor @@ -501,7 +501,6 @@ anchor = try $ do -- | Replace every char but [a-zA-Z0-9_.-:] with a hyphen '-'. This mirrors -- the org function @org-export-solidify-link-text@. - solidify :: Text -> Text solidify = T.map replaceSpecialChar where replaceSpecialChar c diff --git a/test/Tests/Readers/Org/Meta.hs b/test/Tests/Readers/Org/Meta.hs index 041016f64..bc167f2a5 100644 --- a/test/Tests/Readers/Org/Meta.hs +++ b/test/Tests/Readers/Org/Meta.hs @@ -270,7 +270,8 @@ tests = , "Search links are read as emph" =: "[[Wally][Where's Wally?]]" =?> - para (emph $ "Where's" <> space <> "Wally?") + para (spanWith ("", ["spurious-link"], [("target", "Wally")]) + (emph $ "Where's" <> space <> "Wally?")) , "Link to nonexistent anchor" =: T.unlines [ "<<link-here>> Target." @@ -278,5 +279,6 @@ tests = , "[[link$here][See here!]]" ] =?> (para (spanWith ("link-here", [], []) mempty <> "Target.") <> - para (emph ("See" <> space <> "here!"))) + para (spanWith ("", ["spurious-link"], [("target", "link$here")]) + (emph ("See" <> space <> "here!")))) ] -- cgit v1.2.3 From dc6856530c2cb6ca58ed82721ab895b86cfe0c1c Mon Sep 17 00:00:00 2001 From: Jan Tojnar <jtojnar@gmail.com> Date: Fri, 4 Dec 2020 09:16:56 +0100 Subject: Docbook writer: handle admonitions Similarly to https://github.com/jgm/pandoc/commit/d6fdfe6f2bba2a8ed25d6c9f11861774001f7a91, we should handle admonitions. --- src/Text/Pandoc/Writers/Docbook.hs | 42 +++++++++++++++++++++--------- test/Tests/Writers/Docbook.hs | 52 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 3f4c67f10..da111cbc5 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -188,18 +188,36 @@ blockToDocbook opts (Div (id',"section":_,_) (Header lvl _ ils : xs)) = do title' <- inlinesToDocbook opts ils contents <- blocksToDocbook opts bs return $ inTags True tag attribs $ inTagsSimple "title" title' $$ contents -blockToDocbook opts (Div (ident,_,_) [Para lst]) = - let attribs = [("id", ident) | not (T.null ident)] in - if hasLineBreaks lst - then flush . nowrap . inTags False "literallayout" attribs - <$> inlinesToDocbook opts lst - else inTags True "para" attribs <$> inlinesToDocbook opts lst -blockToDocbook opts (Div (ident,_,_) bs) = do - contents <- blocksToDocbook opts (map plainToPara bs) - return $ - (if T.null ident - then mempty - else selfClosingTag "anchor" [("id", ident)]) $$ contents +blockToDocbook opts (Div (ident,classes,_) bs) = + let identAttribs = [("id", ident) | not (T.null ident)] + admonitions = ["attention","caution","danger","error","hint", + "important","note","tip","warning"] + in case classes of + (l:_) | l `elem` admonitions -> do + let (mTitleBs, bodyBs) = + case bs of + -- Matches AST produced by the Docbook reader. + (Div (_,["title"],_) ts : rest) -> (Just (blocksToDocbook opts ts), rest) + _ -> (Nothing, bs) + admonitionTitle <- case mTitleBs of + Nothing -> return mempty + -- id will be attached to the admonition so let’s pass empty identAttrs. + Just titleBs -> inTags False "title" [] <$> titleBs + admonitionBody <- handleDivBody [] bodyBs + return (inTags True l identAttribs (admonitionTitle $$ admonitionBody)) + _ -> handleDivBody identAttribs bs + where + handleDivBody identAttribs [Para lst] = + if hasLineBreaks lst + then flush . nowrap . inTags False "literallayout" identAttribs + <$> inlinesToDocbook opts lst + else inTags True "para" identAttribs <$> inlinesToDocbook opts lst + handleDivBody identAttribs bodyBs = do + contents <- blocksToDocbook opts (map plainToPara bodyBs) + return $ + (if null identAttribs + then mempty + else selfClosingTag "anchor" identAttribs) $$ contents blockToDocbook _ h@Header{} = do -- should be handled by Div section above, except inside lists/blockquotes report $ BlockNotRendered h diff --git a/test/Tests/Writers/Docbook.hs b/test/Tests/Writers/Docbook.hs index f6a047b0b..1d53dcfe7 100644 --- a/test/Tests/Writers/Docbook.hs +++ b/test/Tests/Writers/Docbook.hs @@ -70,6 +70,58 @@ tests = [ testGroup "line blocks" , "</para>" ] ) ] + , testGroup "divs" + [ "admonition" =: divWith ("foo", ["warning"], []) (para "This is a test") + =?> unlines + [ "<warning id=\"foo\">" + , " <para>" + , " This is a test" + , " </para>" + , "</warning>" + ] + , "admonition-with-title" =: + divWith ("foo", ["attention"], []) ( + divWith ("foo", ["title"], []) + (plain (text "This is title")) <> + para "This is a test" + ) + =?> unlines + [ "<attention id=\"foo\">" + , " <title>This is title</title>" + , " <para>" + , " This is a test" + , " </para>" + , "</attention>" + ] + , "single-child" =: + divWith ("foo", [], []) (para "This is a test") + =?> unlines + [ "<para id=\"foo\">" + , " This is a test" + , "</para>" + ] + , "single-literal-child" =: + divWith ("foo", [], []) lineblock + =?> unlines + [ "<literallayout id=\"foo\">some text" + , "and more lines" + , "and again</literallayout>" + ] + , "multiple-children" =: + divWith ("foo", [], []) ( + para "This is a test" <> + para "This is an another test" + ) + =?> unlines + [ "<anchor id=\"foo\" />" + , "<para>" + , " This is a test" + , "</para>" + , "<para>" + , " This is an another test" + , "</para>" + ] + ] , testGroup "compact lists" [ testGroup "bullet" [ "compact" =: bulletList [plain "a", plain "b", plain "c"] -- cgit v1.2.3 From 16ef87745702f69d5aa948fbe6d2101577dee8f4 Mon Sep 17 00:00:00 2001 From: Jan Tojnar <jtojnar@gmail.com> Date: Fri, 4 Dec 2020 09:28:32 +0100 Subject: Docbook writer: Use correct id attribute consistently MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DocBook5 should always use xml:id instead of id so let’s use it everywhere. --- src/Text/Pandoc/Writers/Docbook.hs | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index da111cbc5..398920839 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -53,6 +53,13 @@ getStartLvl opts = TopLevelSection -> 1 TopLevelDefault -> 1 +-- | Get correct name for the id attribute based on DocBook version. +-- DocBook 4 used custom id attribute but DocBook 5 adopted the xml:id specification. +-- https://www.w3.org/TR/xml-id/ +idName :: DocBookVersion -> Text +idName DocBook5 = "xml:id" +idName DocBook4 = "id" + -- | Convert list of authors to a docbook <author> section authorToDocbook :: PandocMonad m => WriterOptions -> [Inline] -> DB m B.Inlines authorToDocbook opts name' = do @@ -174,10 +181,7 @@ blockToDocbook opts (Div (id',"section":_,_) (Header lvl _ ils : xs)) = do then "section" else "sect" <> tshow n _ -> "simplesect" - idName = if version == DocBook5 - then "xml:id" - else "id" - idAttr = [(idName, writerIdentifierPrefix opts <> id') | not (T.null id')] + idAttr = [(idName version, writerIdentifierPrefix opts <> id') | not (T.null id')] -- We want to add namespaces to the root (top-level) element. nsAttr = if version == DocBook5 && lvl == getStartLvl opts && isNothing (writerTemplate opts) -- Though, DocBook 4 does not support namespaces and @@ -188,11 +192,12 @@ blockToDocbook opts (Div (id',"section":_,_) (Header lvl _ ils : xs)) = do title' <- inlinesToDocbook opts ils contents <- blocksToDocbook opts bs return $ inTags True tag attribs $ inTagsSimple "title" title' $$ contents -blockToDocbook opts (Div (ident,classes,_) bs) = - let identAttribs = [("id", ident) | not (T.null ident)] +blockToDocbook opts (Div (ident,classes,_) bs) = do + version <- ask + let identAttribs = [(idName version, ident) | not (T.null ident)] admonitions = ["attention","caution","danger","error","hint", "important","note","tip","warning"] - in case classes of + case classes of (l:_) | l `elem` admonitions -> do let (mTitleBs, bodyBs) = case bs of @@ -371,11 +376,12 @@ inlineToDocbook opts (Quoted _ lst) = inTagsSimple "quote" <$> inlinesToDocbook opts lst inlineToDocbook opts (Cite _ lst) = inlinesToDocbook opts lst -inlineToDocbook opts (Span (ident,_,_) ils) = +inlineToDocbook opts (Span (ident,_,_) ils) = do + version <- ask ((if T.null ident then mempty - else selfClosingTag "anchor" [("id", ident)]) <>) <$> - inlinesToDocbook opts ils + else selfClosingTag "anchor" [(idName version, ident)]) <>) <$> + inlinesToDocbook opts ils inlineToDocbook _ (Code _ str) = return $ inTagsSimple "literal" $ literal (escapeStringForXML str) inlineToDocbook opts (Math t str) -- cgit v1.2.3 From 70c7c5703afcbd1cbf2a80c2be515e038abcd419 Mon Sep 17 00:00:00 2001 From: Jan Tojnar <jtojnar@gmail.com> Date: Mon, 7 Dec 2020 07:28:39 +0100 Subject: Docbook writer: Handle admonition titles from Markdown reader MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Docbook reader produces a `Div` with `title` class for `<title>` element within an “admonition” element. Markdown writer then turns this into a fenced div with `title` class attribute. Since fenced divs are block elements, their content is recognized as a paragraph by the Markdown reader. This is an issue for Docbook writer because it would produce an invalid DocBook document from such AST – the `<title>` element can only contain “inline” elements. Let’s handle this invalid special case separately by unwrapping the paragraph before creating the `<title>` element. --- src/Text/Pandoc/Writers/Docbook.hs | 2 ++ test/Tests/Writers/Docbook.hs | 14 ++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 398920839..affa0de04 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -201,6 +201,8 @@ blockToDocbook opts (Div (ident,classes,_) bs) = do (l:_) | l `elem` admonitions -> do let (mTitleBs, bodyBs) = case bs of + -- Matches AST produced by the DocBook reader → Markdown writer → Markdown reader chain. + (Div (_,["title"],_) [Para ts] : rest) -> (Just (inlinesToDocbook opts ts), rest) -- Matches AST produced by the Docbook reader. (Div (_,["title"],_) ts : rest) -> (Just (blocksToDocbook opts ts), rest) _ -> (Nothing, bs) diff --git a/test/Tests/Writers/Docbook.hs b/test/Tests/Writers/Docbook.hs index 1d53dcfe7..621c1280b 100644 --- a/test/Tests/Writers/Docbook.hs +++ b/test/Tests/Writers/Docbook.hs @@ -93,6 +93,20 @@ tests = [ testGroup "line blocks" , " </para>" , "</attention>" ] + , "admonition-with-title-in-para" =: + divWith ("foo", ["attention"], []) ( + divWith ("foo", ["title"], []) + (para "This is title") <> + para "This is a test" + ) + =?> unlines + [ "<attention id=\"foo\">" + , " <title>This is title</title>" + , " <para>" + , " This is a test" + , " </para>" + , "</attention>" + ] , "single-child" =: divWith ("foo", [], []) (para "This is a test") =?> unlines -- cgit v1.2.3 From 501ea7f0c4735acdf1457da44fe04d811ac776d7 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 7 Dec 2020 12:15:14 -0800 Subject: Dokuwiki reader: handle unknown interwiki links better. DokuWiki lets the user define his own Interwiki links. Previously pandoc reacted to these by emitting a google search link, which is not helpful. Instead, we now just emit the full URL including the wikilink prefix, e.g. `faquk>FAQ-mathml`. This at least gives users the ability to modify the links using filters. Closes #6932. --- src/Text/Pandoc/Readers/DokuWiki.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/DokuWiki.hs b/src/Text/Pandoc/Readers/DokuWiki.hs index 336be09e5..dedc1f03f 100644 --- a/src/Text/Pandoc/Readers/DokuWiki.hs +++ b/src/Text/Pandoc/Readers/DokuWiki.hs @@ -317,7 +317,7 @@ interwikiToUrl "wpes" page = "https://es.wikipedia.org/wiki/" <> page interwikiToUrl "wpfr" page = "https://fr.wikipedia.org/wiki/" <> page interwikiToUrl "wpjp" page = "https://jp.wikipedia.org/wiki/" <> page interwikiToUrl "wppl" page = "https://pl.wikipedia.org/wiki/" <> page -interwikiToUrl _ page = "https://www.google.com/search?q=" <> page <> "&btnI=lucky" +interwikiToUrl unknown page = unknown <> ">" <> page linkText :: PandocMonad m => DWParser m B.Inlines linkText = parseLink fromRaw "[[" "]]" -- cgit v1.2.3 From f2749ba6cd0ec2473332394bfbb6f479b667f35c Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 7 Dec 2020 12:56:03 -0800 Subject: Parsing: in nonspaceChar use satisfy instead of oneOf. For efficiency. --- src/Text/Pandoc/Parsing.hs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 4bae8942b..1d9e182c5 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -443,7 +443,13 @@ spaceChar = satisfy $ \c -> c == ' ' || c == '\t' -- | Parses a nonspace, nonnewline character. nonspaceChar :: Stream s m Char => ParserT s st m Char -nonspaceChar = noneOf ['\t', '\n', ' ', '\r'] +nonspaceChar = satisfy (not . isSpaceChar) + where + isSpaceChar ' ' = True + isSpaceChar '\t' = True + isSpaceChar '\n' = True + isSpaceChar '\r' = True + isSpaceChar _ = False -- | Skips zero or more spaces or tabs. skipSpaces :: Stream s m Char => ParserT s st m () -- cgit v1.2.3 From 2f9b684b3a793896bc28a79a07722415cfdc075e Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 7 Dec 2020 13:01:30 -0800 Subject: Bibtex parser: avoid noneOf. --- src/Text/Pandoc/Citeproc/BibTeX.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs index 552339df0..ed723a11c 100644 --- a/src/Text/Pandoc/Citeproc/BibTeX.hs +++ b/src/Text/Pandoc/Citeproc/BibTeX.hs @@ -601,7 +601,7 @@ inBraces :: BibParser Text inBraces = do char '{' res <- manyTill - ( (T.pack <$> many1 (noneOf "{}\\")) + ( take1WhileP (\c -> c /= '{' && c /= '}' && c /= '\\') <|> (char '\\' >> ( (char '{' >> return "\\{") <|> (char '}' >> return "\\}") <|> return "\\")) @@ -616,7 +616,7 @@ inQuotes :: BibParser Text inQuotes = do char '"' T.concat <$> manyTill - ( (T.pack <$> many1 (noneOf "\"\\{")) + ( take1WhileP (\c -> c /= '{' && c /= '"' && c /= '\\') <|> (char '\\' >> T.cons '\\' . T.singleton <$> anyChar) <|> braced <$> inBraces ) (char '"') -- cgit v1.2.3 From ce1791913da713ec35f514006bb532cb9c9c8d22 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 7 Dec 2020 13:24:19 -0800 Subject: Small efficiency improvement in uri parser --- src/Text/Pandoc/Parsing.hs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 1d9e182c5..1e8518c90 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -654,7 +654,20 @@ uri = try $ do let uri' = scheme <> ":" <> fromEntities str' return (uri', escapeURI uri') where - wordChar = alphaNum <|> oneOf "#$%+/@\\_-&=" + isWordChar '#' = True + isWordChar '$' = True + isWordChar '%' = True + isWordChar '+' = True + isWordChar '/' = True + isWordChar '@' = True + isWordChar '\\' = True + isWordChar '_' = True + isWordChar '-' = True + isWordChar '&' = True + isWordChar '=' = True + isWordChar c = isAlphaNum c + + wordChar = satisfy isWordChar percentEscaped = try $ (:) <$> char '%' <*> many1 hexDigit entity = try $ pure <$> characterReference punct = try $ many1 (char ',') <|> fmap pure (satisfy (\c -> not (isSpace c) && c /= '<' && c /= '>')) -- cgit v1.2.3 From 0fa1023b9ef0f39be013befa9ba3d61b81d95928 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 7 Dec 2020 18:57:09 -0800 Subject: Parsing: More minor performance improvements. --- src/Text/Pandoc/Parsing.hs | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 1e8518c90..4ea0d788c 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -190,7 +190,7 @@ where import Control.Monad.Identity import Control.Monad.Reader -import Data.Char (chr, isAlphaNum, isAscii, isAsciiUpper, +import Data.Char (chr, isAlphaNum, isAscii, isAsciiUpper, isAsciiLower, isPunctuation, isSpace, ord, toLower, toUpper) import Data.Default import Data.Functor (($>)) @@ -444,12 +444,13 @@ spaceChar = satisfy $ \c -> c == ' ' || c == '\t' -- | Parses a nonspace, nonnewline character. nonspaceChar :: Stream s m Char => ParserT s st m Char nonspaceChar = satisfy (not . isSpaceChar) - where - isSpaceChar ' ' = True - isSpaceChar '\t' = True - isSpaceChar '\n' = True - isSpaceChar '\r' = True - isSpaceChar _ = False + +isSpaceChar :: Char -> Bool +isSpaceChar ' ' = True +isSpaceChar '\t' = True +isSpaceChar '\n' = True +isSpaceChar '\r' = True +isSpaceChar _ = False -- | Skips zero or more spaces or tabs. skipSpaces :: Stream s m Char => ParserT s st m () @@ -682,7 +683,9 @@ mathInlineWith :: Stream s m Char => Text -> Text -> ParserT s st m Text mathInlineWith op cl = try $ do textStr op when (op == "$") $ notFollowedBy space - words' <- many1Till (countChar 1 (noneOf " \t\n\\") + words' <- many1Till ( + (T.singleton <$> + satisfy (\c -> not (isSpaceChar c || c == '\\'))) <|> (char '\\' >> -- This next clause is needed because \text{..} can -- contain $, \(\), etc. @@ -840,13 +843,13 @@ defaultNum = do -- | Parses a lowercase letter and returns (LowerAlpha, number). lowerAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) lowerAlpha = do - ch <- oneOf ['a'..'z'] + ch <- satisfy isAsciiLower return (LowerAlpha, ord ch - ord 'a' + 1) -- | Parses an uppercase letter and returns (UpperAlpha, number). upperAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) upperAlpha = do - ch <- oneOf ['A'..'Z'] + ch <- satisfy isAsciiUpper return (UpperAlpha, ord ch - ord 'A' + 1) -- | Parses a roman numeral i or I -- cgit v1.2.3 From 5990cbb150f3581ab9b2f3d3a726c73dcadfd793 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 7 Dec 2020 21:34:23 -0800 Subject: Parsing: Small code improvements. --- src/Text/Pandoc/Parsing.hs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 4ea0d788c..979344f63 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -641,7 +641,7 @@ uri = try $ do scheme <- uriScheme char ':' -- Avoid parsing e.g. "**Notes:**" as a raw URI: - notFollowedBy (oneOf "*_]") + notFollowedBy $ satisfy (\c -> c == '*' || c == '_' || c == ']') -- We allow sentence punctuation except at the end, since -- we don't want the trailing '.' in 'http://google.com.' We want to allow -- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation) @@ -693,7 +693,7 @@ mathInlineWith op cl = try $ do (("\\text" <>) <$> inBalancedBraces 0 "")) <|> (\c -> T.pack ['\\',c]) <$> anyChar)) <|> do (blankline <* notFollowedBy' blankline) <|> - (oneOf " \t" <* skipMany (oneOf " \t")) + (spaceChar <* skipMany spaceChar) notFollowedBy (char '$') return " " ) (try $ textStr cl) @@ -723,7 +723,8 @@ mathInlineWith op cl = try $ do mathDisplayWith :: Stream s m Char => Text -> Text -> ParserT s st m Text mathDisplayWith op cl = try $ fmap T.pack $ do textStr op - many1Till (noneOf "\n" <|> (newline <* notFollowedBy' blankline)) (try $ textStr cl) + many1Till (satisfy (/= '\n') <|> (newline <* notFollowedBy' blankline)) + (try $ textStr cl) mathDisplay :: (HasReaderOptions st, Stream s m Char) => ParserT s st m Text -- cgit v1.2.3 From 8c9010864cd818031d7eff161a57459709751517 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 9 Dec 2020 21:05:40 -0800 Subject: Commonmark reader: refactor specFor, set input name to "". --- src/Text/Pandoc/Readers/CommonMark.hs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index c1773eaab..d32a38342 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -1,4 +1,5 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.Readers.CommonMark @@ -27,15 +28,20 @@ import Text.Pandoc.Options import Text.Pandoc.Error import Control.Monad.Except import Data.Functor.Identity (runIdentity) +import Data.Typeable -- | Parse a CommonMark formatted string into a 'Pandoc' structure. readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readCommonMark opts s = do - let res = runIdentity $ - commonmarkWith (foldr ($) defaultSyntaxSpec exts) "input" s + let res = runIdentity $ commonmarkWith (specFor opts) "" s case res of Left err -> throwError $ PandocParsecError s err Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls + +specFor :: (Monad m, Typeable m, Typeable a, + Rangeable (Cm a Inlines), Rangeable (Cm a Blocks)) + => ReaderOptions -> SyntaxSpec m (Cm a Inlines) (Cm a Blocks) +specFor opts = foldr ($) defaultSyntaxSpec exts where exts = [ (hardLineBreaksSpec <>) | isEnabled Ext_hard_line_breaks opts ] ++ [ (smartPunctuationSpec <>) | isEnabled Ext_smart opts ] ++ -- cgit v1.2.3 From a3eb87b2eab9def3e28364b43300043f5e13268d Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 9 Dec 2020 21:14:11 -0800 Subject: Add sourcepos extension for commonmarke * Add `Ext_sourcepos` constructor for `Extension`. * Add `sourcepos` extension (only for commonmark). * Bump to 2.11.3 With the `sourcepos` extension set set, `data-pos` attributes are added to the AST by the commonmark reader. No other readers are affected. The `data-pos` attributes are put on elements that accept attributes; for other elements, an enlosing Div or Span is added to hold the attributes. Closes #4565. --- MANUAL.txt | 7 +++++++ pandoc.cabal | 2 +- src/Text/Pandoc/Extensions.hs | 2 ++ src/Text/Pandoc/Options.hs | 1 + src/Text/Pandoc/Readers/CommonMark.hs | 14 +++++++++----- 5 files changed, 20 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/MANUAL.txt b/MANUAL.txt index 0a1300947..461ebf54d 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -5127,6 +5127,13 @@ for regular emphasis, add extra blank space around headings. [Project Gutenberg]: https://www.gutenberg.org +#### Extension: `sourcepos` #### + +Include source position attributes when parsing `commonmark`. +For elements that accept attributes, a `data-pos` attribute +is added; other elements are placed in a surrounding +Div or Span elemnet with a `data-pos` attribute. + ## Markdown variants In addition to pandoc's extended Markdown, the following Markdown diff --git a/pandoc.cabal b/pandoc.cabal index c02dfeb38..5829856da 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -1,6 +1,6 @@ cabal-version: 2.2 name: pandoc -version: 2.11.2 +version: 2.11.3 build-type: Simple license: GPL-2.0-or-later license-file: COPYING.md diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index a94e24e2c..9865f897b 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -154,6 +154,7 @@ data Extension = | Ext_yaml_metadata_block -- ^ YAML metadata block | Ext_gutenberg -- ^ Use Project Gutenberg conventions for plain | Ext_attributes -- ^ Generic attribute syntax + | Ext_sourcepos -- ^ Include source position attributes deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic) -- | Extensions to be used with pandoc-flavored markdown. @@ -503,6 +504,7 @@ getAllExtensions f = universalExtensions <> getAll f , Ext_implicit_header_references , Ext_attributes , Ext_fenced_code_attributes + , Ext_sourcepos ] getAll "commonmark_x" = getAll "commonmark" getAll "org" = autoIdExtensions <> diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index c7f1a56fa..ecd65a54d 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -65,6 +65,7 @@ data ReaderOptions = ReaderOptions{ , readerDefaultImageExtension :: Text -- ^ Default extension for images , readerTrackChanges :: TrackChanges -- ^ Track changes setting for docx , readerStripComments :: Bool -- ^ Strip HTML comments instead of parsing as raw HTML + -- (only implemented in commonmark) } deriving (Show, Read, Data, Typeable, Generic) instance HasSyntaxExtensions ReaderOptions where diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index d32a38342..9eef498e1 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -32,11 +32,15 @@ import Data.Typeable -- | Parse a CommonMark formatted string into a 'Pandoc' structure. readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc -readCommonMark opts s = do - let res = runIdentity $ commonmarkWith (specFor opts) "" s - case res of - Left err -> throwError $ PandocParsecError s err - Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls +readCommonMark opts s + | isEnabled Ext_sourcepos opts = + case runIdentity (commonmarkWith (specFor opts) "" s) of + Left err -> throwError $ PandocParsecError s err + Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls + | otherwise = + case runIdentity (commonmarkWith (specFor opts) "" s) of + Left err -> throwError $ PandocParsecError s err + Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls specFor :: (Monad m, Typeable m, Typeable a, Rangeable (Cm a Inlines), Rangeable (Cm a Blocks)) -- cgit v1.2.3 From 0a502e5ff52b251bbf3da69fd1f9a88d5e0fe92c Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 10 Dec 2020 15:44:10 -0800 Subject: HTML reader: retain attribute prefixes and avoid duplicates. Previously we stripped attribute prefixes, reading `xml:lang` as `lang` for example. This resulted in two duplicate `lang` attributes when `xml:lang` and `lang` were both used. This commit causes the prefixes to be retained, and also avoids invald duplicate attributes. Closes #6938. --- src/Text/Pandoc/Readers/HTML.hs | 28 +++++++++++----------------- src/Text/Pandoc/Readers/HTML/Parsing.hs | 20 +++++++++++++------- test/command/5986.md | 2 +- test/epub/wasteland.native | 8 ++++---- 4 files changed, 29 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index eb78979a3..f870a241d 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -74,7 +74,7 @@ readHtml :: PandocMonad m -> Text -- ^ String to parse (assumes @'\n'@ line endings) -> m Pandoc readHtml opts inp = do - let tags = stripPrefixes . canonicalizeTags $ + let tags = stripPrefixes $ canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } (crFilter inp) parseDoc = do @@ -95,6 +95,15 @@ readHtml opts inp = do Right doc -> return doc Left err -> throwError $ PandocParseError $ T.pack $ getError err +-- Strip namespace prefixes on tags (not attributes) +stripPrefixes :: [Tag Text] -> [Tag Text] +stripPrefixes = map stripPrefix + +stripPrefix :: Tag Text -> Tag Text +stripPrefix (TagOpen s as) = TagOpen (T.takeWhileEnd (/=':') s) as +stripPrefix (TagClose s) = TagClose (T.takeWhileEnd (/=':') s) +stripPrefix x = x + replaceNotes :: PandocMonad m => [Block] -> TagParser m [Block] replaceNotes bs = do st <- getState @@ -114,7 +123,7 @@ setInPlain = local (\s -> s {inPlain = True}) pHtml :: PandocMonad m => TagParser m Blocks pHtml = try $ do (TagOpen "html" attr) <- lookAhead pAny - for_ (lookup "lang" attr) $ + for_ (lookup "lang" attr <|> lookup "xml:lang" attr) $ updateState . B.setMeta "lang" . B.text pInTags "html" block @@ -1024,21 +1033,6 @@ htmlTag f = try $ do handleTag tagname _ -> mzero --- Strip namespace prefixes -stripPrefixes :: [Tag Text] -> [Tag Text] -stripPrefixes = map stripPrefix - -stripPrefix :: Tag Text -> Tag Text -stripPrefix (TagOpen s as) = - TagOpen (stripPrefix' s) (map (first stripPrefix') as) -stripPrefix (TagClose s) = TagClose (stripPrefix' s) -stripPrefix x = x - -stripPrefix' :: Text -> Text -stripPrefix' s = - if T.null t then s else T.drop 1 t - where (_, t) = T.span (/= ':') s - -- Utilities -- | Adjusts a url according to the document's base URL. diff --git a/src/Text/Pandoc/Readers/HTML/Parsing.hs b/src/Text/Pandoc/Readers/HTML/Parsing.hs index 2d58319da..e28ebe77b 100644 --- a/src/Text/Pandoc/Readers/HTML/Parsing.hs +++ b/src/Text/Pandoc/Readers/HTML/Parsing.hs @@ -193,14 +193,20 @@ t1 `closes` t2 | _ `closes` _ = False toStringAttr :: [(Text, Text)] -> [(Text, Text)] -toStringAttr = map go +toStringAttr = foldr go [] where - go (x,y) = - case T.stripPrefix "data-" x of - Just x' | x' `Set.notMember` (html5Attributes <> - html4Attributes <> rdfaAttributes) - -> (x',y) - _ -> (x,y) + go :: (Text, Text) -> [(Text, Text)] -> [(Text, Text)] + -- treat xml:lang as lang + go ("xml:lang",y) ats = go ("lang",y) ats + -- prevent duplicate attributes + go (x,y) ats + | any (\(x',_) -> x == x') ats = ats + | otherwise = + case T.stripPrefix "data-" x of + Just x' | x' `Set.notMember` (html5Attributes <> + html4Attributes <> rdfaAttributes) + -> go (x',y) ats + _ -> (x,y):ats -- Unlike fromAttrib from tagsoup, this distinguishes -- between a missing attribute and an attribute with empty content. diff --git a/test/command/5986.md b/test/command/5986.md index ea0ca70c1..ed8dd30c9 100644 --- a/test/command/5986.md +++ b/test/command/5986.md @@ -4,7 +4,7 @@ ^D <p><span id="title_page.xhtml"></span></p> <p><span id="nav.xhtml"></span></p> -<nav type="landmarks" id="landmarks" hidden="hidden"> +<nav epub:type="landmarks" id="landmarks" hidden="hidden"> <ol> <li><a href="text/title_page.xhtml">Title Page</a></li> <li><a href="#nav.xhtml#toc">Table of Contents</a></li> diff --git a/test/epub/wasteland.native b/test/epub/wasteland.native index 0ed23eba2..ff59182a6 100644 --- a/test/epub/wasteland.native +++ b/test/epub/wasteland.native @@ -1,8 +1,8 @@ [Para [Image ("",[],[]) [] ("wasteland-cover.jpg","")] ,Para [Span ("wasteland-content.xhtml",[],[]) []] -,Div ("wasteland-content.xhtml#frontmatter",["section"],[("type","frontmatter")]) +,Div ("wasteland-content.xhtml#frontmatter",["section","frontmatter"],[]) [] -,Div ("wasteland-content.xhtml#bodymatter",["section"],[("type","bodymatter")]) +,Div ("wasteland-content.xhtml#bodymatter",["section","bodymatter"],[]) [Div ("wasteland-content.xhtml#ch1",["section"],[]) [Header 2 ("",[],[]) [Str "I.",Space,Str "THE",Space,Str "BURIAL",Space,Str "OF",Space,Str "THE",Space,Str "DEAD"] ,Div ("",["linegroup"],[]) @@ -922,8 +922,8 @@ [Plain [Str "Datta.",Space,Str "Dayadhvam.",Space,Str "Damyata."]] ,Div ("wasteland-content.xhtml#ln434",["linegroup","indent"],[]) [Plain [Span ("",[],[("lang","sa")]) [Str "Shantih",Space,Str "shantih",Space,Str "shantih",Note [Para [Link ("",[],[]) [Str "434."] ("#wasteland-content.xhtml#ln434",""),Space,Str "Shantih.",Space,Str "Repeated",Space,Str "as",Space,Str "here,",Space,Str "a",Space,Str "formal",Space,Str "ending",Space,Str "to",Space,Str "an",Space,Str "Upanishad.",Space,Str "'The",SoftBreak,Str "Peace",Space,Str "which",Space,Str "passeth",Space,Str "understanding'",Space,Str "is",Space,Str "a",Space,Str "feeble",Space,Str "translation",Space,Str "of",Space,Str "the",SoftBreak,Str "content",Space,Str "of",Space,Str "this",Space,Str "word."]],SoftBreak]]]]]] -,Div ("wasteland-content.xhtml#backmatter",["section"],[("type","backmatter")]) - [Div ("wasteland-content.xhtml#rearnotes",["section"],[("type","rearnotes")]) +,Div ("wasteland-content.xhtml#backmatter",["section","backmatter"],[]) + [Div ("wasteland-content.xhtml#rearnotes",["section","rearnotes"],[]) [Header 2 ("",[],[]) [Str "NOTES",Space,Str "ON",Space,Str "\"THE",Space,Str "WASTE",Space,Str "LAND\""] ,Para [Str "Not",Space,Str "only",Space,Str "the",Space,Str "title,",Space,Str "but",Space,Str "the",Space,Str "plan",Space,Str "and",Space,Str "a",Space,Str "good",Space,Str "deal",Space,Str "of",Space,Str "the",Space,Str "incidental",Space,Str "symbolism",Space,Str "of",SoftBreak,Str "the",Space,Str "poem",Space,Str "were",Space,Str "suggested",Space,Str "by",Space,Str "Miss",Space,Str "Jessie",Space,Str "L.",Space,Str "Weston's",Space,Str "book",Space,Str "on",Space,Str "the",Space,Str "Grail",Space,Str "legend:",SoftBreak,Str "From",Space,Str "Ritual",Space,Str "to",Space,Str "Romance"] ,Para [Str "Indeed,",Space,Str "so",Space,Str "deeply",Space,Str "am",Space,Str "I",Space,Str "indebted,",Space,Str "Miss",Space,Str "Weston's",Space,Str "book",Space,Str "will",Space,Str "elucidate",Space,Str "the",SoftBreak,Str "difficulties",Space,Str "of",Space,Str "the",Space,Str "poem",Space,Str "much",Space,Str "better",Space,Str "than",Space,Str "my",Space,Str "notes",Space,Str "can",Space,Str "do;",Space,Str "and",Space,Str "I",Space,Str "recommend",Space,Str "it",SoftBreak,Str "(apart",Space,Str "from",Space,Str "the",Space,Str "great",Space,Str "interest",Space,Str "of",Space,Str "the",Space,Str "book",Space,Str "itself)",Space,Str "to",Space,Str "any",Space,Str "who",Space,Str "think",Space,Str "such",SoftBreak,Str "elucidation",Space,Str "of",Space,Str "the",Space,Str "poem",Space,Str "worth",Space,Str "the",Space,Str "trouble.",Space,Str "To",Space,Str "another",Space,Str "work",Space,Str "of",Space,Str "anthropology",Space,Str "I",Space,Str "am",SoftBreak,Str "indebted",Space,Str "in",Space,Str "general,",Space,Str "one",Space,Str "which",Space,Str "has",Space,Str "influenced",Space,Str "our",Space,Str "generation",Space,Str "profoundly;",Space,Str "I",Space,Str "mean",SoftBreak,Str "The",Space,Str "Golden",Space,Str "Bough;",Space,Str "I",Space,Str "have",Space,Str "used",Space,Str "especially",Space,Str "the",Space,Str "two",Space,Str "volumes",Space,Str "Adonis,",Space,Str "Attis,",Space,Str "Osiris.",SoftBreak,Str "Anyone",Space,Str "who",Space,Str "is",Space,Str "acquainted",Space,Str "with",Space,Str "these",Space,Str "works",Space,Str "will",Space,Str "immediately",Space,Str "recognise",Space,Str "in",Space,Str "the",Space,Str "poem",SoftBreak,Str "certain",Space,Str "references",Space,Str "to",Space,Str "vegetation",Space,Str "ceremonies."] -- cgit v1.2.3 From fcd065818901e57f01aca4c919f6102f9a047ba0 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 10 Dec 2020 15:51:20 -0800 Subject: HTML reader: pay attention to lang attributes on body. These (as well as lang attributes on html) should update lang in metadata. See #6938. --- src/Text/Pandoc/Readers/HTML.hs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index f870a241d..f8a17bb78 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -25,7 +25,6 @@ module Text.Pandoc.Readers.HTML ( readHtml ) where import Control.Applicative ((<|>)) -import Control.Arrow (first) import Control.Monad (guard, msum, mzero, unless, void) import Control.Monad.Except (throwError) import Control.Monad.Reader (ask, asks, lift, local, runReaderT) @@ -121,14 +120,18 @@ setInPlain :: PandocMonad m => HTMLParser m s a -> HTMLParser m s a setInPlain = local (\s -> s {inPlain = True}) pHtml :: PandocMonad m => TagParser m Blocks -pHtml = try $ do +pHtml = do (TagOpen "html" attr) <- lookAhead pAny for_ (lookup "lang" attr <|> lookup "xml:lang" attr) $ updateState . B.setMeta "lang" . B.text pInTags "html" block pBody :: PandocMonad m => TagParser m Blocks -pBody = pInTags "body" block +pBody = do + (TagOpen "body" attr) <- lookAhead pAny + for_ (lookup "lang" attr <|> lookup "xml:lang" attr) $ + updateState . B.setMeta "lang" . B.text + pInTags "body" block pHead :: PandocMonad m => TagParser m Blocks pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAny) -- cgit v1.2.3 From 208cb9619632ee9490b75158e44213ee6d4ab93e Mon Sep 17 00:00:00 2001 From: mb21 <mb21@users.noreply.github.com> Date: Sat, 12 Dec 2020 14:12:39 +0100 Subject: ICML writer: fix image bounding box for custom widths/heights fixes #6936 --- src/Text/Pandoc/Writers/ICML.hs | 7 ++++++- test/command/5541-nesting.md | 1 + test/command/svg.md | 4 ++++ test/writer.icml | 2 ++ 4 files changed, 13 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs index dcf5acfef..c254fbc58 100644 --- a/src/Text/Pandoc/Writers/ICML.hs +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -621,7 +621,12 @@ imageICML opts style attr (src, _) = do image = inTags True "Image" [("Self","ue6"), ("ItemTransform", scale<>" -"<>hw<>" -"<>hh)] $ vcat [ - inTags True "Properties" [] $ inTags True "Profile" [("type","string")] $ text "$ID/Embedded" + inTags True "Properties" [] $ vcat [ + inTags True "Profile" [("type","string")] $ text "$ID/Embedded" + , selfClosingTag "GraphicBounds" [("Left","0"), ("Top","0") + , ("Right", showFl $ ow*ow / imgWidth) + , ("Bottom", showFl $ oh*oh / imgHeight)] + ] , selfClosingTag "Link" [("Self", "ueb"), ("LinkResourceURI", src')] ] doc = inTags True "CharacterStyleRange" attrs diff --git a/test/command/5541-nesting.md b/test/command/5541-nesting.md index 194b79164..5abe41a9a 100644 --- a/test/command/5541-nesting.md +++ b/test/command/5541-nesting.md @@ -80,6 +80,7 @@ <Profile type="string"> $ID/Embedded </Profile> + <GraphicBounds Left="0" Top="0" Right="150" Bottom="150" /> </Properties> <Link Self="ueb" LinkResourceURI="file:lalune.jpg" /> </Image> diff --git a/test/command/svg.md b/test/command/svg.md index 26a8213f6..57506570b 100644 --- a/test/command/svg.md +++ b/test/command/svg.md @@ -23,6 +23,7 @@ <Profile type="string"> $ID/Embedded </Profile> + <GraphicBounds Left="0" Top="0" Right="300" Bottom="200" /> </Properties> <Link Self="ueb" LinkResourceURI="file:command/corrupt.svg" /> </Image> @@ -56,6 +57,7 @@ <Profile type="string"> $ID/Embedded </Profile> + <GraphicBounds Left="0" Top="0" Right="300" Bottom="200" /> </Properties> <Link Self="ueb" LinkResourceURI="file:command/SVG_logo.svg" /> </Image> @@ -89,6 +91,7 @@ <Profile type="string"> $ID/Embedded </Profile> + <GraphicBounds Left="0" Top="0" Right="300" Bottom="200" /> </Properties> <Link Self="ueb" LinkResourceURI="file:command/SVG_logo-without-xml-declaration.svg" /> </Image> @@ -122,6 +125,7 @@ <Profile type="string"> $ID/Embedded </Profile> + <GraphicBounds Left="0" Top="0" Right="109.5" Bottom="130.5" /> </Properties> <Link Self="ueb" LinkResourceURI="file:command/inkscape-cube.svg" /> </Image> diff --git a/test/writer.icml b/test/writer.icml index a4b40056a..0601ba494 100644 --- a/test/writer.icml +++ b/test/writer.icml @@ -2800,6 +2800,7 @@ These should not be escaped: \$ \\ \> \[ \{</Content> <Profile type="string"> $ID/Embedded </Profile> + <GraphicBounds Left="0" Top="0" Right="150" Bottom="150" /> </Properties> <Link Self="ueb" LinkResourceURI="file:lalune.jpg" /> </Image> @@ -2836,6 +2837,7 @@ These should not be escaped: \$ \\ \> \[ \{</Content> <Profile type="string"> $ID/Embedded </Profile> + <GraphicBounds Left="0" Top="0" Right="20" Bottom="22" /> </Properties> <Link Self="ueb" LinkResourceURI="file:movie.jpg" /> </Image> -- cgit v1.2.3 From ccd235e31ff00c4741ba52552ba58669f700bbdc Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Sat, 12 Dec 2020 16:45:50 +0100 Subject: LaTeX writer: extract table handling into separate module. --- pandoc.cabal | 4 + src/Text/Pandoc/Writers/LaTeX.hs | 249 ++----------------------------- src/Text/Pandoc/Writers/LaTeX/Caption.hs | 48 ++++++ src/Text/Pandoc/Writers/LaTeX/Notes.hs | 34 +++++ src/Text/Pandoc/Writers/LaTeX/Table.hs | 181 ++++++++++++++++++++++ src/Text/Pandoc/Writers/LaTeX/Types.hs | 80 ++++++++++ 6 files changed, 359 insertions(+), 237 deletions(-) create mode 100644 src/Text/Pandoc/Writers/LaTeX/Caption.hs create mode 100644 src/Text/Pandoc/Writers/LaTeX/Notes.hs create mode 100644 src/Text/Pandoc/Writers/LaTeX/Table.hs create mode 100644 src/Text/Pandoc/Writers/LaTeX/Types.hs (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index 7f8df272f..335ccc720 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -630,6 +630,10 @@ library Text.Pandoc.Writers.Docx.StyleMap, Text.Pandoc.Writers.JATS.Table, Text.Pandoc.Writers.JATS.Types, + Text.Pandoc.Writers.LaTeX.Caption, + Text.Pandoc.Writers.LaTeX.Notes, + Text.Pandoc.Writers.LaTeX.Table, + Text.Pandoc.Writers.LaTeX.Types, Text.Pandoc.Writers.Roff, Text.Pandoc.Writers.Powerpoint.Presentation, Text.Pandoc.Writers.Powerpoint.Output, diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index e02cc2833..6a4e3ba69 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -20,7 +20,6 @@ module Text.Pandoc.Writers.LaTeX ( ) where import Control.Applicative ((<|>)) import Control.Monad.State.Strict -import Data.Monoid (Any(..)) import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isSpace, isPunctuation, ord) import Data.List (foldl', intersperse, nubBy, (\\), uncons) @@ -42,69 +41,14 @@ import Text.Pandoc.Options import Text.DocLayout import Text.Pandoc.Shared import Text.Pandoc.Slides -import Text.Pandoc.Walk +import Text.Pandoc.Walk (query, walk, walkM) +import Text.Pandoc.Writers.LaTeX.Caption (getCaption) +import Text.Pandoc.Writers.LaTeX.Table (tableToLaTeX) +import Text.Pandoc.Writers.LaTeX.Types (LW, WriterState (..), startingState) import Text.Pandoc.Writers.Shared import Text.Printf (printf) import qualified Data.Text.Normalize as Normalize -data WriterState = - WriterState { stInNote :: Bool -- true if we're in a note - , stInQuote :: Bool -- true if in a blockquote - , stExternalNotes :: Bool -- true if in context where - -- we need to store footnotes - , stInMinipage :: Bool -- true if in minipage - , stInHeading :: Bool -- true if in a section heading - , stInItem :: Bool -- true if in \item[..] - , stNotes :: [Doc Text] -- notes in a minipage - , stOLLevel :: Int -- level of ordered list nesting - , stOptions :: WriterOptions -- writer options, so they don't have to be parameter - , stVerbInNote :: Bool -- true if document has verbatim text in note - , stTable :: Bool -- true if document has a table - , stStrikeout :: Bool -- true if document has strikeout - , stUrl :: Bool -- true if document has visible URL link - , stGraphics :: Bool -- true if document contains images - , stLHS :: Bool -- true if document has literate haskell code - , stHasChapters :: Bool -- true if document has chapters - , stCsquotes :: Bool -- true if document uses csquotes - , stHighlighting :: Bool -- true if document has highlighted code - , stIncremental :: Bool -- true if beamer lists should be displayed bit by bit - , stInternalLinks :: [Text] -- list of internal link targets - , stBeamer :: Bool -- produce beamer - , stEmptyLine :: Bool -- true if no content on line - , stHasCslRefs :: Bool -- has a Div with class refs - , stIsFirstInDefinition :: Bool -- first block in a defn list - } - -startingState :: WriterOptions -> WriterState -startingState options = WriterState { - stInNote = False - , stInQuote = False - , stExternalNotes = False - , stInHeading = False - , stInMinipage = False - , stInItem = False - , stNotes = [] - , stOLLevel = 1 - , stOptions = options - , stVerbInNote = False - , stTable = False - , stStrikeout = False - , stUrl = False - , stGraphics = False - , stLHS = False - , stHasChapters = case writerTopLevelDivision options of - TopLevelPart -> True - TopLevelChapter -> True - _ -> False - , stCsquotes = False - , stHighlighting = False - , stIncremental = writerIncremental options - , stInternalLinks = [] - , stBeamer = False - , stEmptyLine = True - , stHasCslRefs = False - , stIsFirstInDefinition = False } - -- | Convert Pandoc to LaTeX. writeLaTeX :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeLaTeX options document = @@ -117,8 +61,6 @@ writeBeamer options document = evalStateT (pandocToLaTeX options document) $ (startingState options){ stBeamer = True } -type LW m = StateT WriterState m - pandocToLaTeX :: PandocMonad m => WriterOptions -> Pandoc -> LW m Text pandocToLaTeX options (Pandoc meta blocks) = do @@ -573,7 +515,7 @@ blockToLaTeX (Plain lst) = blockToLaTeX (Para [Image attr@(ident, _, _) txt (src,tgt)]) | Just tit <- T.stripPrefix "fig:" tgt = do - (capt, captForLof, footnotes) <- getCaption True txt + (capt, captForLof, footnotes) <- getCaption inlineListToLaTeX True txt lab <- labelFor ident let caption = "\\caption" <> captForLof <> braces capt <> lab img <- inlineToLaTeX (Image attr txt (src,tit)) @@ -774,181 +716,14 @@ blockToLaTeX (Header level (id',classes,_) lst) = do hdr <- sectionHeader classes id' level lst modify $ \s -> s{stInHeading = False} return hdr -blockToLaTeX (Table _ blkCapt specs thead tbody tfoot) = do - let (caption, aligns, widths, heads, rows) = toLegacyTable blkCapt specs thead tbody tfoot - -- simple tables have to have simple cells: - let isSimple [Plain _] = True - isSimple [Para _] = True - isSimple [] = True - isSimple _ = False - let widths' = if all (== 0) widths && not (all (all isSimple) rows) - then replicate (length aligns) - (1 / fromIntegral (length aligns)) - else widths - (captionText, captForLof, captNotes) <- getCaption False caption - let toHeaders hs = do contents <- tableRowToLaTeX True aligns hs - return ("\\toprule" $$ contents $$ "\\midrule") - let removeNote (Note _) = Span ("", [], []) [] - removeNote x = x - firsthead <- if isEmpty captionText || all null heads - then return empty - else ($$ text "\\endfirsthead") <$> toHeaders heads - head' <- if all null heads - then return "\\toprule" - -- avoid duplicate notes in head and firsthead: - else toHeaders (if isEmpty firsthead - then heads - else walk removeNote heads) - let capt = if isEmpty captionText - then empty - else "\\caption" <> captForLof <> braces captionText - <> "\\tabularnewline" - rows' <- mapM (tableRowToLaTeX False aligns) rows - let colDescriptors = - (if all (== 0) widths' - then hcat . map literal - else (\xs -> cr <> nest 2 (vcat $ map literal xs))) $ - zipWith (toColDescriptor (length widths')) aligns widths' - modify $ \s -> s{ stTable = True } - notes <- notesToLaTeX <$> gets stNotes - return $ "\\begin{longtable}[]" <> - braces ("@{}" <> colDescriptors <> "@{}") - -- the @{} removes extra space at beginning and end - $$ capt - $$ firsthead - $$ head' - $$ "\\endhead" - $$ vcat rows' - $$ "\\bottomrule" - $$ "\\end{longtable}" - $$ captNotes - $$ notes - -getCaption :: PandocMonad m - => Bool -> [Inline] -> LW m (Doc Text, Doc Text, Doc Text) -getCaption externalNotes txt = do - oldExternalNotes <- gets stExternalNotes - modify $ \st -> st{ stExternalNotes = externalNotes, stNotes = [] } - capt <- inlineListToLaTeX txt - footnotes <- if externalNotes - then notesToLaTeX <$> gets stNotes - else return empty - modify $ \st -> st{ stExternalNotes = oldExternalNotes, stNotes = [] } - -- We can't have footnotes in the list of figures/tables, so remove them: - let getNote (Note _) = Any True - getNote _ = Any False - let hasNotes = getAny . query getNote - captForLof <- if hasNotes txt - then brackets <$> inlineListToLaTeX (walk deNote txt) - else return empty - return (capt, captForLof, footnotes) - -toColDescriptor :: Int -> Alignment -> Double -> Text -toColDescriptor _numcols align 0 = - case align of - AlignLeft -> "l" - AlignRight -> "r" - AlignCenter -> "c" - AlignDefault -> "l" -toColDescriptor numcols align width = - T.pack $ printf - ">{%s\\arraybackslash}p{(\\columnwidth - %d\\tabcolsep) * \\real{%0.2f}}" - align' - ((numcols - 1) * 2) - width - where - align' :: String - align' = case align of - AlignLeft -> "\\raggedright" - AlignRight -> "\\raggedleft" - AlignCenter -> "\\centering" - AlignDefault -> "\\raggedright" +blockToLaTeX (Table _ blkCapt specs thead tbodies tfoot) = + tableToLaTeX inlineListToLaTeX blockListToLaTeX + blkCapt specs thead tbodies tfoot blockListToLaTeX :: PandocMonad m => [Block] -> LW m (Doc Text) blockListToLaTeX lst = vsep `fmap` mapM (\b -> setEmptyLine True >> blockToLaTeX b) lst -tableRowToLaTeX :: PandocMonad m - => Bool - -> [Alignment] - -> [[Block]] - -> LW m (Doc Text) -tableRowToLaTeX header aligns cols = do - cells <- mapM (tableCellToLaTeX header) $ zip aligns cols - return $ hsep (intersperse "&" cells) <> " \\\\ \\addlinespace" - --- For simple latex tables (without minipages or parboxes), --- we need to go to some lengths to get line breaks working: --- as LineBreak bs = \vtop{\hbox{\strut as}\hbox{\strut bs}}. -fixLineBreaks :: Block -> Block -fixLineBreaks (Para ils) = Para $ fixLineBreaks' ils -fixLineBreaks (Plain ils) = Plain $ fixLineBreaks' ils -fixLineBreaks x = x - -fixLineBreaks' :: [Inline] -> [Inline] -fixLineBreaks' ils = case splitBy (== LineBreak) ils of - [] -> [] - [xs] -> xs - chunks -> RawInline "tex" "\\vtop{" : - concatMap tohbox chunks <> - [RawInline "tex" "}"] - where tohbox ys = RawInline "tex" "\\hbox{\\strut " : ys <> - [RawInline "tex" "}"] - --- We also change display math to inline math, since display --- math breaks in simple tables. -displayMathToInline :: Inline -> Inline -displayMathToInline (Math DisplayMath x) = Math InlineMath x -displayMathToInline x = x - -tableCellToLaTeX :: PandocMonad m - => Bool -> (Alignment, [Block]) - -> LW m (Doc Text) -tableCellToLaTeX header (align, blocks) = do - beamer <- gets stBeamer - externalNotes <- gets stExternalNotes - inMinipage <- gets stInMinipage - -- See #5367 -- footnotehyper/footnote don't work in beamer, - -- so we need to produce the notes outside the table... - modify $ \st -> st{ stExternalNotes = beamer } - let isPlainOrPara Para{} = True - isPlainOrPara Plain{} = True - isPlainOrPara _ = False - result <- - if all isPlainOrPara blocks - then - blockListToLaTeX $ walk fixLineBreaks $ walk displayMathToInline blocks - else do - modify $ \st -> st{ stInMinipage = True } - cellContents <- blockListToLaTeX blocks - modify $ \st -> st{ stInMinipage = inMinipage } - let valign = text $ if header then "[b]" else "[t]" - let halign = case align of - AlignLeft -> "\\raggedright" - AlignRight -> "\\raggedleft" - AlignCenter -> "\\centering" - AlignDefault -> "\\raggedright" - return $ "\\begin{minipage}" <> valign <> - braces "\\linewidth" <> halign <> cr <> - cellContents <> cr <> - "\\end{minipage}" - modify $ \st -> st{ stExternalNotes = externalNotes } - return result - - -notesToLaTeX :: [Doc Text] -> Doc Text -notesToLaTeX [] = empty -notesToLaTeX ns = (case length ns of - n | n > 1 -> "\\addtocounter" <> - braces "footnote" <> - braces (text $ show $ 1 - n) - | otherwise -> empty) - $$ - vcat (intersperse - ("\\addtocounter" <> braces "footnote" <> braces "1") - $ map (\x -> "\\footnotetext" <> braces x) - $ reverse ns) - listItemToLaTeX :: PandocMonad m => [Block] -> LW m (Doc Text) listItemToLaTeX lst -- we need to put some text before a header if it's the first @@ -1081,7 +856,7 @@ mapAlignment a = case a of "top-baseline" -> "t" "bottom" -> "b" "center" -> "c" - _ -> a + _ -> a wrapDiv :: PandocMonad m => Attr -> Doc Text -> LW m (Doc Text) wrapDiv (_,classes,kvs) t = do @@ -1095,7 +870,7 @@ wrapDiv (_,classes,kvs) t = do (lookup "totalwidth" kvs) onlytextwidth = filter ("onlytextwidth" ==) classes options = text $ T.unpack $ T.intercalate "," $ - valign : totalwidth ++ onlytextwidth + valign : totalwidth ++ onlytextwidth in inCmd "begin" "columns" <> brackets options $$ contents $$ inCmd "end" "columns" @@ -1106,8 +881,8 @@ wrapDiv (_,classes,kvs) t = do maybe "" (brackets . text . T.unpack . mapAlignment) (lookup "align" kvs) - w = maybe "0.48" fromPct (lookup "width" kvs) - in inCmd "begin" "column" <> + w = maybe "0.48" fromPct (lookup "width" kvs) + in inCmd "begin" "column" <> valign <> braces (literal w <> "\\textwidth") $$ contents diff --git a/src/Text/Pandoc/Writers/LaTeX/Caption.hs b/src/Text/Pandoc/Writers/LaTeX/Caption.hs new file mode 100644 index 000000000..61ca41fb1 --- /dev/null +++ b/src/Text/Pandoc/Writers/LaTeX/Caption.hs @@ -0,0 +1,48 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Writers.LaTeX.Caption + Copyright : Copyright (C) 2006-2020 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Write figure or table captions as LaTeX. +-} +module Text.Pandoc.Writers.LaTeX.Caption + ( getCaption + ) where + +import Control.Monad.State.Strict +import Data.Monoid (Any(..)) +import Data.Text (Text) +import Text.Pandoc.Class.PandocMonad (PandocMonad) +import Text.Pandoc.Definition +import Text.DocLayout (Doc, brackets, empty) +import Text.Pandoc.Shared +import Text.Pandoc.Walk +import Text.Pandoc.Writers.LaTeX.Notes (notesToLaTeX) +import Text.Pandoc.Writers.LaTeX.Types + ( LW, WriterState (stExternalNotes, stNotes) ) + +getCaption :: PandocMonad m + => ([Inline] -> LW m (Doc Text)) + -> Bool -> [Inline] + -> LW m (Doc Text, Doc Text, Doc Text) +getCaption inlineListToLaTeX externalNotes txt = do + oldExternalNotes <- gets stExternalNotes + modify $ \st -> st{ stExternalNotes = externalNotes, stNotes = [] } + capt <- inlineListToLaTeX txt + footnotes <- if externalNotes + then notesToLaTeX <$> gets stNotes + else return empty + modify $ \st -> st{ stExternalNotes = oldExternalNotes, stNotes = [] } + -- We can't have footnotes in the list of figures/tables, so remove them: + let getNote (Note _) = Any True + getNote _ = Any False + let hasNotes = getAny . query getNote + captForLof <- if hasNotes txt + then brackets <$> inlineListToLaTeX (walk deNote txt) + else return empty + return (capt, captForLof, footnotes) diff --git a/src/Text/Pandoc/Writers/LaTeX/Notes.hs b/src/Text/Pandoc/Writers/LaTeX/Notes.hs new file mode 100644 index 000000000..216a7bfc3 --- /dev/null +++ b/src/Text/Pandoc/Writers/LaTeX/Notes.hs @@ -0,0 +1,34 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Writers.LaTeX.Notes + Copyright : Copyright (C) 2006-2020 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Output tables as LaTeX. +-} +module Text.Pandoc.Writers.LaTeX.Notes + ( notesToLaTeX + ) where + +import Data.List (intersperse) +import Text.DocLayout ( Doc, braces, empty, text, vcat, ($$)) +import Data.Text (Text) + +notesToLaTeX :: [Doc Text] -> Doc Text +notesToLaTeX = \case + [] -> empty + ns -> (case length ns of + n | n > 1 -> "\\addtocounter" <> + braces "footnote" <> + braces (text $ show $ 1 - n) + | otherwise -> empty) + $$ + vcat (intersperse + ("\\addtocounter" <> braces "footnote" <> braces "1") + $ map (\x -> "\\footnotetext" <> braces x) + $ reverse ns) diff --git a/src/Text/Pandoc/Writers/LaTeX/Table.hs b/src/Text/Pandoc/Writers/LaTeX/Table.hs new file mode 100644 index 000000000..5299efa37 --- /dev/null +++ b/src/Text/Pandoc/Writers/LaTeX/Table.hs @@ -0,0 +1,181 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Writers.LaTeX.Table + Copyright : Copyright (C) 2006-2020 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Output LaTeX formatted tables. +-} +module Text.Pandoc.Writers.LaTeX.Table + ( tableToLaTeX + ) where +import Control.Monad.State.Strict +import Data.List (intersperse) +import Data.Text (Text) +import qualified Data.Text as T +import Text.Pandoc.Class.PandocMonad (PandocMonad) +import Text.Pandoc.Definition +import Text.DocLayout + ( Doc, braces, cr, empty, hcat, hsep, isEmpty, literal, nest + , text, vcat, ($$) ) +import Text.Pandoc.Shared (splitBy) +import Text.Pandoc.Walk (walk) +import Text.Pandoc.Writers.Shared (toLegacyTable) +import Text.Pandoc.Writers.LaTeX.Caption (getCaption) +import Text.Pandoc.Writers.LaTeX.Notes (notesToLaTeX) +import Text.Pandoc.Writers.LaTeX.Types + ( LW, WriterState (stBeamer, stExternalNotes, stInMinipage, stNotes, stTable) ) +import Text.Printf (printf) + +tableToLaTeX :: PandocMonad m + => ([Inline] -> LW m (Doc Text)) + -> ([Block] -> LW m (Doc Text)) + -> Caption -> [ColSpec] -> TableHead -> [TableBody] -> TableFoot + -> LW m (Doc Text) +tableToLaTeX inlnsToLaTeX blksToLaTeX blkCapt specs thead tbody tfoot = do + let (caption, aligns, widths, heads, rows) = + toLegacyTable blkCapt specs thead tbody tfoot + -- simple tables have to have simple cells: + let isSimple = \case + [Plain _] -> True + [Para _] -> True + [] -> True + _ -> False + let widths' = if all (== 0) widths && not (all (all isSimple) rows) + then replicate (length aligns) + (1 / fromIntegral (length aligns)) + else widths + (captionText, captForLof, captNotes) <- getCaption inlnsToLaTeX False caption + let toHeaders hs = do contents <- tableRowToLaTeX blksToLaTeX True aligns hs + return ("\\toprule" $$ contents $$ "\\midrule") + let removeNote (Note _) = Span ("", [], []) [] + removeNote x = x + firsthead <- if isEmpty captionText || all null heads + then return empty + else ($$ text "\\endfirsthead") <$> toHeaders heads + head' <- if all null heads + then return "\\toprule" + -- avoid duplicate notes in head and firsthead: + else toHeaders (if isEmpty firsthead + then heads + else walk removeNote heads) + let capt = if isEmpty captionText + then empty + else "\\caption" <> captForLof <> braces captionText + <> "\\tabularnewline" + rows' <- mapM (tableRowToLaTeX blksToLaTeX False aligns) rows + let colDescriptors = + (if all (== 0) widths' + then hcat . map literal + else (\xs -> cr <> nest 2 (vcat $ map literal xs))) $ + zipWith (toColDescriptor (length widths')) aligns widths' + modify $ \s -> s{ stTable = True } + notes <- notesToLaTeX <$> gets stNotes + return $ "\\begin{longtable}[]" <> + braces ("@{}" <> colDescriptors <> "@{}") + -- the @{} removes extra space at beginning and end + $$ capt + $$ firsthead + $$ head' + $$ "\\endhead" + $$ vcat rows' + $$ "\\bottomrule" + $$ "\\end{longtable}" + $$ captNotes + $$ notes + +toColDescriptor :: Int -> Alignment -> Double -> Text +toColDescriptor _numcols align 0 = + case align of + AlignLeft -> "l" + AlignRight -> "r" + AlignCenter -> "c" + AlignDefault -> "l" +toColDescriptor numcols align width = + T.pack $ printf + ">{%s\\arraybackslash}p{(\\columnwidth - %d\\tabcolsep) * \\real{%0.2f}}" + align' + ((numcols - 1) * 2) + width + where + align' :: String + align' = case align of + AlignLeft -> "\\raggedright" + AlignRight -> "\\raggedleft" + AlignCenter -> "\\centering" + AlignDefault -> "\\raggedright" + +tableRowToLaTeX :: PandocMonad m + => ([Block] -> LW m (Doc Text)) + -> Bool + -> [Alignment] + -> [[Block]] + -> LW m (Doc Text) +tableRowToLaTeX blockListToLaTeX header aligns cols = do + cells <- mapM (tableCellToLaTeX blockListToLaTeX header) $ zip aligns cols + return $ hsep (intersperse "&" cells) <> " \\\\ \\addlinespace" + +-- For simple latex tables (without minipages or parboxes), +-- we need to go to some lengths to get line breaks working: +-- as LineBreak bs = \vtop{\hbox{\strut as}\hbox{\strut bs}}. +fixLineBreaks :: Block -> Block +fixLineBreaks (Para ils) = Para $ fixLineBreaks' ils +fixLineBreaks (Plain ils) = Plain $ fixLineBreaks' ils +fixLineBreaks x = x + +fixLineBreaks' :: [Inline] -> [Inline] +fixLineBreaks' ils = case splitBy (== LineBreak) ils of + [] -> [] + [xs] -> xs + chunks -> RawInline "tex" "\\vtop{" : + concatMap tohbox chunks <> + [RawInline "tex" "}"] + where tohbox ys = RawInline "tex" "\\hbox{\\strut " : ys <> + [RawInline "tex" "}"] + +-- We also change display math to inline math, since display +-- math breaks in simple tables. +displayMathToInline :: Inline -> Inline +displayMathToInline (Math DisplayMath x) = Math InlineMath x +displayMathToInline x = x + +tableCellToLaTeX :: PandocMonad m + => ([Block] -> LW m (Doc Text)) + -> Bool -> (Alignment, [Block]) + -> LW m (Doc Text) +tableCellToLaTeX blockListToLaTeX header (align, blocks) = do + beamer <- gets stBeamer + externalNotes <- gets stExternalNotes + inMinipage <- gets stInMinipage + -- See #5367 -- footnotehyper/footnote don't work in beamer, + -- so we need to produce the notes outside the table... + modify $ \st -> st{ stExternalNotes = beamer } + let isPlainOrPara = \case + Para{} -> True + Plain{} -> True + _ -> False + result <- + if all isPlainOrPara blocks + then + blockListToLaTeX $ walk fixLineBreaks $ walk displayMathToInline blocks + else do + modify $ \st -> st{ stInMinipage = True } + cellContents <- blockListToLaTeX blocks + modify $ \st -> st{ stInMinipage = inMinipage } + let valign = text $ if header then "[b]" else "[t]" + let halign = case align of + AlignLeft -> "\\raggedright" + AlignRight -> "\\raggedleft" + AlignCenter -> "\\centering" + AlignDefault -> "\\raggedright" + return $ "\\begin{minipage}" <> valign <> + braces "\\linewidth" <> halign <> cr <> + cellContents <> cr <> + "\\end{minipage}" + modify $ \st -> st{ stExternalNotes = externalNotes } + return result diff --git a/src/Text/Pandoc/Writers/LaTeX/Types.hs b/src/Text/Pandoc/Writers/LaTeX/Types.hs new file mode 100644 index 000000000..a76388729 --- /dev/null +++ b/src/Text/Pandoc/Writers/LaTeX/Types.hs @@ -0,0 +1,80 @@ +module Text.Pandoc.Writers.LaTeX.Types + ( LW + , WriterState (..) + , startingState + ) where + +import Control.Monad.State.Strict (StateT) +import Data.Text (Text) +import Text.DocLayout (Doc) +import Text.Pandoc.Options + ( WriterOptions (writerIncremental, writerTopLevelDivision) + , TopLevelDivision (..) + ) + +-- | LaTeX writer type. The type constructor @m@ will typically be an +-- instance of PandocMonad. +type LW m = StateT WriterState m + +data WriterState = + WriterState + { stInNote :: Bool -- ^ true if we're in a note + , stInQuote :: Bool -- ^ true if in a blockquote + , stExternalNotes :: Bool -- ^ true if in context where + -- we need to store footnotes + , stInMinipage :: Bool -- ^ true if in minipage + , stInHeading :: Bool -- ^ true if in a section heading + , stInItem :: Bool -- ^ true if in \item[..] + , stNotes :: [Doc Text] -- ^ notes in a minipage + , stOLLevel :: Int -- ^ level of ordered list nesting + , stOptions :: WriterOptions -- ^ writer options, so they don't have to + -- be parameter + , stVerbInNote :: Bool -- ^ true if document has verbatim text in note + , stTable :: Bool -- ^ true if document has a table + , stStrikeout :: Bool -- ^ true if document has strikeout + , stUrl :: Bool -- ^ true if document has visible URL link + , stGraphics :: Bool -- ^ true if document contains images + , stLHS :: Bool -- ^ true if document has literate haskell code + , stHasChapters :: Bool -- ^ true if document has chapters + , stCsquotes :: Bool -- ^ true if document uses csquotes + , stHighlighting :: Bool -- ^ true if document has highlighted code + , stIncremental :: Bool -- ^ true if beamer lists should be + -- displayed bit by bit + , stInternalLinks :: [Text] -- ^ list of internal link targets + , stBeamer :: Bool -- ^ produce beamer + , stEmptyLine :: Bool -- ^ true if no content on line + , stHasCslRefs :: Bool -- ^ has a Div with class refs + , stIsFirstInDefinition :: Bool -- ^ first block in a defn list + } + +startingState :: WriterOptions -> WriterState +startingState options = + WriterState + { stInNote = False + , stInQuote = False + , stExternalNotes = False + , stInHeading = False + , stInMinipage = False + , stInItem = False + , stNotes = [] + , stOLLevel = 1 + , stOptions = options + , stVerbInNote = False + , stTable = False + , stStrikeout = False + , stUrl = False + , stGraphics = False + , stLHS = False + , stHasChapters = case writerTopLevelDivision options of + TopLevelPart -> True + TopLevelChapter -> True + _ -> False + , stCsquotes = False + , stHighlighting = False + , stIncremental = writerIncremental options + , stInternalLinks = [] + , stBeamer = False + , stEmptyLine = True + , stHasCslRefs = False + , stIsFirstInDefinition = False + } -- cgit v1.2.3 From 8cf58d96e0801d8073e118f05279a9f473efcee0 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Sun, 13 Dec 2020 14:09:53 +0100 Subject: Docx writer: use Content instead of Element. --- src/Text/Pandoc/Writers/Docx.hs | 134 ++++++++++++++++++++++------------------ 1 file changed, 75 insertions(+), 59 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 4cb879e6a..97048e980 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -441,7 +441,7 @@ writeDocx opts doc = do Nothing -> mknode "w:sectPr" [] () -- let sectpr = fromMaybe (mknode "w:sectPr" [] ()) mbsectpr' - let contents' = contents ++ [sectpr] + let contents' = contents ++ [Elem sectpr] let docContents = mknode "w:document" stdAttributes $ mknode "w:body" [] contents' @@ -538,7 +538,8 @@ writeDocx opts doc = do -- docProps/custom.xml let customProperties :: [(String, String)] - customProperties = [(T.unpack k, T.unpack $ lookupMetaString k meta) | k <- M.keys (unMeta meta) + customProperties = [ (T.unpack k, T.unpack $ lookupMetaString k meta) + | k <- M.keys (unMeta meta) , k `notElem` (["title", "author", "keywords"] ++ extraCoreProps)] let mkCustomProp (k, v) pid = mknode "property" @@ -788,7 +789,7 @@ makeTOC opts = do mknode "w:docPartUnique" [] ()] -- w:docPartObj ), -- w:sdtPr - mknode "w:sdtContent" [] (title++[ + mknode "w:sdtContent" [] (title ++ [ Elem $ mknode "w:p" [] ( mknode "w:r" [] [ mknode "w:fldChar" [("w:fldCharType","begin"),("w:dirty","true")] (), @@ -802,7 +803,9 @@ makeTOC opts = do -- | Convert Pandoc document to two lists of -- OpenXML elements (the main document and footnotes). -writeOpenXML :: (PandocMonad m) => WriterOptions -> Pandoc -> WS m ([Element], [Element],[Element]) +writeOpenXML :: (PandocMonad m) + => WriterOptions -> Pandoc + -> WS m ([Content], [Element], [Element]) writeOpenXML opts (Pandoc meta blocks) = do let tit = docTitle meta let auths = docAuthors meta @@ -830,6 +833,7 @@ writeOpenXML opts (Pandoc meta blocks) = do return $ mknode "w:comment" [('w':':':T.unpack k,T.unpack v) | (k,v) <- kvs] [ mknode "w:p" [] $ + map Elem [ mknode "w:pPr" [] [ mknode "w:pStyle" [("w:val", "CommentText")] () ] , mknode "w:r" [] @@ -844,11 +848,11 @@ writeOpenXML opts (Pandoc meta blocks) = do toc <- if includeTOC then makeTOC opts else return [] - let meta' = title ++ subtitle ++ authors ++ date ++ abstract ++ toc + let meta' = title ++ subtitle ++ authors ++ date ++ abstract ++ map Elem toc return (meta' ++ doc', notes', comments') -- | Convert a list of Pandoc blocks to OpenXML. -blocksToOpenXML :: (PandocMonad m) => WriterOptions -> [Block] -> WS m [Element] +blocksToOpenXML :: (PandocMonad m) => WriterOptions -> [Block] -> WS m [Content] blocksToOpenXML opts = fmap concat . mapM (blockToOpenXML opts) . separateTables -- Word combines adjacent tables unless you put an empty paragraph between @@ -884,10 +888,10 @@ dynamicStyleKey :: T.Text dynamicStyleKey = "custom-style" -- | Convert a Pandoc block element to OpenXML. -blockToOpenXML :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element] +blockToOpenXML :: (PandocMonad m) => WriterOptions -> Block -> WS m [Content] blockToOpenXML opts blk = withDirection $ blockToOpenXML' opts blk -blockToOpenXML' :: (PandocMonad m) => WriterOptions -> Block -> WS m [Element] +blockToOpenXML' :: (PandocMonad m) => WriterOptions -> Block -> WS m [Content] blockToOpenXML' _ Null = return [] blockToOpenXML' opts (Div (ident,_classes,kvs) bs) = do stylemod <- case lookup dynamicStyleKey kvs of @@ -921,18 +925,18 @@ blockToOpenXML' opts (Header lev (ident,_,kvs) lst) = do Just n -> do num <- withTextPropM (rStyleM "SectionNumber") (inlineToOpenXML opts (Str n)) - return $ num ++ [mknode "w:r" [] [mknode "w:tab" [] ()]] + return $ num ++ [Elem $ mknode "w:r" [] [mknode "w:tab" [] ()]] Nothing -> return [] else return [] contents <- (number ++) <$> inlinesToOpenXML opts lst if T.null ident - then return [mknode "w:p" [] (paraProps ++ contents)] + then return [Elem $ mknode "w:p" [] (map Elem paraProps ++ contents)] else do let bookmarkName = ident modify $ \s -> s{ stSectionIds = Set.insert bookmarkName $ stSectionIds s } bookmarkedContents <- wrapBookmark bookmarkName contents - return [mknode "w:p" [] (paraProps ++ bookmarkedContents)] + return [Elem $ mknode "w:p" [] (map Elem paraProps ++ bookmarkedContents)] blockToOpenXML' opts (Plain lst) = do isInTable <- gets stInTable isInList <- gets stInList @@ -952,7 +956,9 @@ blockToOpenXML' opts (Para [Image attr alt (src,T.stripPrefix "fig:" -> Just tit contents <- inlinesToOpenXML opts [Image attr alt (src,tit)] captionNode <- withParaPropM (pStyleM "Image Caption") $ blockToOpenXML opts (Para alt) - return $ mknode "w:p" [] (paraProps ++ contents) : captionNode + return $ + Elem (mknode "w:p" [] (map Elem paraProps ++ contents)) + : captionNode blockToOpenXML' opts (Para lst) | null lst && not (isEnabled Ext_empty_paragraphs opts) = return [] | otherwise = do @@ -969,10 +975,10 @@ blockToOpenXML' opts (Para lst) ps -> ps modify $ \s -> s { stFirstPara = False } contents <- inlinesToOpenXML opts lst - return [mknode "w:p" [] (paraProps' ++ contents)] + return [Elem $ mknode "w:p" [] (map Elem paraProps' ++ contents)] blockToOpenXML' opts (LineBlock lns) = blockToOpenXML opts $ linesToPara lns blockToOpenXML' _ b@(RawBlock format str) - | format == Format "openxml" = return [ x | Elem x <- parseXML str ] + | format == Format "openxml" = return (parseXML str) | otherwise = do report $ BlockNotRendered b return [] @@ -987,7 +993,7 @@ blockToOpenXML' opts (CodeBlock attrs@(ident, _, _) str) = do wrapBookmark ident p blockToOpenXML' _ HorizontalRule = do setFirstPara - return [ + return [ Elem $ mknode "w:p" [] $ mknode "w:r" [] $ mknode "w:pict" [] $ mknode "v:rect" [("style","width:0;height:1.5pt"), ("o:hralign","center"), @@ -1006,26 +1012,28 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do -- Not in the spec but in Word 2007, 2010. See #4953. let cellToOpenXML (al, cell) = do es <- withParaProp (alignmentFor al) $ blocksToOpenXML opts cell - return $ if any (\e -> qName (elName e) == "p") es + return $ if any (\e -> qName (elName e) == "p") (onlyElems es) then es - else es ++ [mknode "w:p" [] ()] + else es ++ [Elem $ mknode "w:p" [] ()] headers' <- mapM cellToOpenXML $ zip aligns headers rows' <- mapM (mapM cellToOpenXML . zip aligns) rows - let borderProps = mknode "w:tcPr" [] + let borderProps = Elem $ mknode "w:tcPr" [] [ mknode "w:tcBorders" [] $ mknode "w:bottom" [("w:val","single")] () , mknode "w:vAlign" [("w:val","bottom")] () ] compactStyle <- pStyleM "Compact" - let emptyCell' = [mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]] + let emptyCell' = [Elem $ mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]] let mkcell border contents = mknode "w:tc" [] $ [ borderProps | border ] ++ if null contents then emptyCell' else contents - let mkrow border cells = mknode "w:tr" [] $ - [mknode "w:trPr" [] [ - mknode "w:cnfStyle" [("w:firstRow","1")] ()] | border] - ++ map (mkcell border) cells + let mkrow border cells = + mknode "w:tr" [] $ + [ mknode "w:trPr" [] + [ mknode "w:cnfStyle" [("w:firstRow","1")] ()] + | border] + ++ map (mkcell border) cells let textwidth = 7920 -- 5.5 in in twips, 1/20 pt let fullrow = 5000 -- 100% specified in pct let rowwidth = fullrow * sum widths @@ -1035,7 +1043,8 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do modify $ \s -> s { stInTable = False } return $ caption' ++ - [mknode "w:tbl" [] + [Elem $ + mknode "w:tbl" [] ( mknode "w:tblPr" [] ( mknode "w:tblStyle" [("w:val","Table")] () : mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () : @@ -1070,7 +1079,9 @@ blockToOpenXML' opts (DefinitionList items) = do setFirstPara return l -definitionListItemToOpenXML :: (PandocMonad m) => WriterOptions -> ([Inline],[[Block]]) -> WS m [Element] +definitionListItemToOpenXML :: (PandocMonad m) + => WriterOptions -> ([Inline],[[Block]]) + -> WS m [Content] definitionListItemToOpenXML opts (term,defs) = do term' <- withParaPropM (pStyleM "Definition Term") $ blockToOpenXML opts (Para term) @@ -1083,8 +1094,11 @@ addList marker = do lists <- gets stLists modify $ \st -> st{ stLists = lists ++ [marker] } -listItemToOpenXML :: (PandocMonad m) => WriterOptions -> Int -> [Block] -> WS m [Element] -listItemToOpenXML _ _ [] = return [] +listItemToOpenXML :: (PandocMonad m) + => WriterOptions + -> Int -> [Block] + -> WS m [Content] +listItemToOpenXML _ _ [] = return [] listItemToOpenXML opts numid (first:rest) = do oldInList <- gets stInList modify $ \st -> st{ stInList = True } @@ -1111,7 +1125,7 @@ alignmentToString alignment = case alignment of AlignDefault -> "left" -- | Convert a list of inline elements to OpenXML. -inlinesToOpenXML :: (PandocMonad m) => WriterOptions -> [Inline] -> WS m [Element] +inlinesToOpenXML :: PandocMonad m => WriterOptions -> [Inline] -> WS m [Content] inlinesToOpenXML opts lst = concat `fmap` mapM (inlineToOpenXML opts) lst withNumId :: (PandocMonad m) => Int -> WS m a -> WS m a @@ -1186,12 +1200,12 @@ setFirstPara :: PandocMonad m => WS m () setFirstPara = modify $ \s -> s { stFirstPara = True } -- | Convert an inline element to OpenXML. -inlineToOpenXML :: PandocMonad m => WriterOptions -> Inline -> WS m [Element] +inlineToOpenXML :: PandocMonad m => WriterOptions -> Inline -> WS m [Content] inlineToOpenXML opts il = withDirection $ inlineToOpenXML' opts il -inlineToOpenXML' :: PandocMonad m => WriterOptions -> Inline -> WS m [Element] +inlineToOpenXML' :: PandocMonad m => WriterOptions -> Inline -> WS m [Content] inlineToOpenXML' _ (Str str) = - formattedString str + map Elem <$> formattedString str inlineToOpenXML' opts Space = inlineToOpenXML opts (Str " ") inlineToOpenXML' opts SoftBreak = inlineToOpenXML opts (Str " ") inlineToOpenXML' opts (Span ("",["csl-block"],[]) ils) = @@ -1199,10 +1213,11 @@ inlineToOpenXML' opts (Span ("",["csl-block"],[]) ils) = inlineToOpenXML' opts (Span ("",["csl-left-margin"],[]) ils) = inlinesToOpenXML opts ils inlineToOpenXML' opts (Span ("",["csl-right-inline"],[]) ils) = - ([mknode "w:r" [] - (mknode "w:t" - [("xml:space","preserve")] - ("\t" :: String))] ++) + ([Elem $ + mknode "w:r" [] + (mknode "w:t" + [("xml:space","preserve")] + ("\t" :: String))] ++) <$> inlinesToOpenXML opts ils inlineToOpenXML' opts (Span ("",["csl-indent"],[]) ils) = inlinesToOpenXML opts ils @@ -1212,18 +1227,18 @@ inlineToOpenXML' _ (Span (ident,["comment-start"],kvs) ils) = do let ident' = fromMaybe ident (lookup "id" kvs) kvs' = filter (("id" /=) . fst) kvs modify $ \st -> st{ stComments = (("id",ident'):kvs', ils) : stComments st } - return [ mknode "w:commentRangeStart" [("w:id", T.unpack ident')] () ] + return [ Elem $ mknode "w:commentRangeStart" [("w:id", T.unpack ident')] () ] inlineToOpenXML' _ (Span (ident,["comment-end"],kvs) _) = -- prefer the "id" in kvs, since that is the one produced by the docx -- reader. let ident' = fromMaybe ident (lookup "id" kvs) - in - return [ mknode "w:commentRangeEnd" [("w:id", T.unpack ident')] () - , mknode "w:r" [] - [ mknode "w:rPr" [] - [ mknode "w:rStyle" [("w:val", "CommentReference")] () ] - , mknode "w:commentReference" [("w:id", T.unpack ident')] () ] - ] + in return . map Elem $ + [ mknode "w:commentRangeEnd" [("w:id", T.unpack ident')] () + , mknode "w:r" [] + [ mknode "w:rPr" [] + [ mknode "w:rStyle" [("w:val", "CommentReference")] () ] + , mknode "w:commentReference" [("w:id", T.unpack ident')] () ] + ] inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do stylemod <- case lookup dynamicStyleKey kvs of Just (fromString . T.unpack -> sty) -> do @@ -1255,8 +1270,9 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do modify $ \s -> s{stInsId = insId + 1} return $ \f -> do x <- f - return [ mknode "w:ins" - (("w:id", show insId) : changeAuthorDate) x] + return [Elem $ + mknode "w:ins" + (("w:id", show insId) : changeAuthorDate) x] else return id delmod <- if "deletion" `elem` classes then do @@ -1265,8 +1281,8 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do modify $ \s -> s{stDelId = delId + 1} return $ \f -> local (\env->env{envInDel=True}) $ do x <- f - return [mknode "w:del" - (("w:id", show delId) : changeAuthorDate) x] + return [Elem $ mknode "w:del" + (("w:id", show delId) : changeAuthorDate) x] else return id contents <- insmod $ delmod $ dirmod $ stylemod $ pmod $ inlinesToOpenXML opts ils @@ -1294,9 +1310,9 @@ inlineToOpenXML' opts (SmallCaps lst) = inlineToOpenXML' opts (Strikeout lst) = withTextProp (mknode "w:strike" [] ()) $ inlinesToOpenXML opts lst -inlineToOpenXML' _ LineBreak = return [br] +inlineToOpenXML' _ LineBreak = return [Elem br] inlineToOpenXML' _ il@(RawInline f str) - | f == Format "openxml" = return [ x | Elem x <- parseXML str ] + | f == Format "openxml" = return (parseXML str) | otherwise = do report $ InlineNotRendered il return [] @@ -1309,13 +1325,13 @@ inlineToOpenXML' opts (Math mathType str) = do when (mathType == DisplayMath) setFirstPara res <- (lift . lift) (convertMath writeOMML mathType str) case res of - Right r -> return [r] + Right r -> return [Elem r] Left il -> inlineToOpenXML' opts il inlineToOpenXML' opts (Cite _ lst) = inlinesToOpenXML opts lst inlineToOpenXML' opts (Code attrs str) = do let alltoktypes = [KeywordTok ..] tokTypesMap <- mapM (\tt -> (,) tt <$> rStyleM (fromString $ show tt)) alltoktypes - let unhighlighted = intercalate [br] `fmap` + let unhighlighted = (map Elem . intercalate [br]) `fmap` mapM formattedString (T.lines str) formatOpenXML _fmtOpts = intercalate [br] . map (map toHlTok) toHlTok (toktype,tok) = @@ -1328,7 +1344,7 @@ inlineToOpenXML' opts (Code attrs str) = do then unhighlighted else case highlight (writerSyntaxMap opts) formatOpenXML attrs str of - Right h -> return h + Right h -> return (map Elem h) Left msg -> do unless (T.null msg) $ report $ CouldNotHighlight msg unhighlighted @@ -1351,14 +1367,14 @@ inlineToOpenXML' opts (Note bs) = do $ insertNoteRef bs) let newnote = mknode "w:footnote" [("w:id", notenum)] contents modify $ \s -> s{ stFootnotes = newnote : notes } - return [ mknode "w:r" [] + return [ Elem $ mknode "w:r" [] [ mknode "w:rPr" [] footnoteStyle , mknode "w:footnoteReference" [("w:id", notenum)] () ] ] -- internal link: inlineToOpenXML' opts (Link _ txt (T.uncons -> Just ('#', xs),_)) = do contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt return - [ mknode "w:hyperlink" [("w:anchor", T.unpack $ toBookmarkName xs)] contents ] + [ Elem $ mknode "w:hyperlink" [("w:anchor", T.unpack $ toBookmarkName xs)] contents ] -- external link: inlineToOpenXML' opts (Link _ txt (src,_)) = do contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt @@ -1370,7 +1386,7 @@ inlineToOpenXML' opts (Link _ txt (src,_)) = do modify $ \st -> st{ stExternalLinks = M.insert (T.unpack src) i extlinks } return i - return [ mknode "w:hyperlink" [("r:id",id')] contents ] + return [ Elem $ mknode "w:hyperlink" [("r:id",id')] contents ] inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do pageWidth <- asks envPrintWidth imgs <- gets stImages @@ -1434,7 +1450,7 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do imgElt wrapBookmark imgident =<< case stImage of - Just imgData -> return [generateImgElt imgData] + Just imgData -> return [Elem $ generateImgElt imgData] Nothing -> ( do --try (img, mt) <- P.fetchItem src ident <- ("rId"++) `fmap` getUniqueId @@ -1462,7 +1478,7 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do else do -- insert mime type to use in constructing [Content_Types].xml modify $ \st -> st { stImages = M.insert (T.unpack src) imgData $ stImages st } - return [generateImgElt imgData] + return [Elem $ generateImgElt imgData] ) `catchError` ( \e -> do report $ CouldNotFetchResource src $ T.pack (show e) @@ -1512,7 +1528,7 @@ withDirection x = do , envTextProperties = EnvProps textStyle textProps' } -wrapBookmark :: (PandocMonad m) => T.Text -> [Element] -> WS m [Element] +wrapBookmark :: (PandocMonad m) => T.Text -> [Content] -> WS m [Content] wrapBookmark "" contents = return contents wrapBookmark ident contents = do id' <- getUniqueId @@ -1520,7 +1536,7 @@ wrapBookmark ident contents = do [("w:id", id') ,("w:name", T.unpack $ toBookmarkName ident)] () bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () - return $ bookmarkStart : contents ++ [bookmarkEnd] + return $ Elem bookmarkStart : contents ++ [Elem bookmarkEnd] -- Word imposes a 40 character limit on bookmark names and requires -- that they begin with a letter. So we just use a hash of the -- cgit v1.2.3 From 00031fc809117cb436397aba83a41ca1d4056f61 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Sun, 13 Dec 2020 14:09:59 +0100 Subject: Docx writer: keep raw openxml strings verbatim. Closes: #6933 --- src/Text/Pandoc/Writers/Docx.hs | 7 +++++-- test/Tests/Writers/Docx.hs | 10 ++++++++++ test/docx/golden/raw-blocks.docx | Bin 0 -> 9888 bytes test/docx/golden/raw-bookmarks.docx | Bin 0 -> 10023 bytes test/docx/raw-blocks.native | 6 ++++++ test/docx/raw-bookmarks.native | 3 +++ 6 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 test/docx/golden/raw-blocks.docx create mode 100644 test/docx/golden/raw-bookmarks.docx create mode 100644 test/docx/raw-blocks.native create mode 100644 test/docx/raw-bookmarks.native (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 97048e980..0174a8501 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -978,7 +978,9 @@ blockToOpenXML' opts (Para lst) return [Elem $ mknode "w:p" [] (map Elem paraProps' ++ contents)] blockToOpenXML' opts (LineBlock lns) = blockToOpenXML opts $ linesToPara lns blockToOpenXML' _ b@(RawBlock format str) - | format == Format "openxml" = return (parseXML str) + | format == Format "openxml" = return [ + Text (CData CDataRaw (T.unpack str) Nothing) + ] | otherwise = do report $ BlockNotRendered b return [] @@ -1312,7 +1314,8 @@ inlineToOpenXML' opts (Strikeout lst) = $ inlinesToOpenXML opts lst inlineToOpenXML' _ LineBreak = return [Elem br] inlineToOpenXML' _ il@(RawInline f str) - | f == Format "openxml" = return (parseXML str) + | f == Format "openxml" = return + [Text (CData CDataRaw (T.unpack str) Nothing)] | otherwise = do report $ InlineNotRendered il return [] diff --git a/test/Tests/Writers/Docx.hs b/test/Tests/Writers/Docx.hs index 8f051b4b7..66a5c3d36 100644 --- a/test/Tests/Writers/Docx.hs +++ b/test/Tests/Writers/Docx.hs @@ -128,6 +128,16 @@ tests = [ testGroup "inlines" def "docx/codeblock.native" "docx/golden/codeblock.docx" + , docxTest + "raw OOXML blocks" + def + "docx/raw-blocks.native" + "docx/golden/raw-blocks.docx" + , docxTest + "raw bookmark markers" + def + "docx/raw-bookmarks.native" + "docx/golden/raw-bookmarks.docx" ] , testGroup "track changes" [ docxTest diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx new file mode 100644 index 000000000..ae7f8f1f0 Binary files /dev/null and b/test/docx/golden/raw-blocks.docx differ diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx new file mode 100644 index 000000000..5e433b736 Binary files /dev/null and b/test/docx/golden/raw-bookmarks.docx differ diff --git a/test/docx/raw-blocks.native b/test/docx/raw-blocks.native new file mode 100644 index 000000000..d7f985bc3 --- /dev/null +++ b/test/docx/raw-blocks.native @@ -0,0 +1,6 @@ +[Para [Str "Cell",Space,Str "compartments"] +,RawBlock (Format "openxml") "<w:tbl>\n<w:tblPr>\n<w:tblW w:w=\"2000\" w:type=\"pct\"/>\n<w:tblBorders>\n<w:top w:val=\"single\" w:sz=\"4\" w:color=\"198200\"/>\n<w:start w:val=\"single\" w:sz=\"4\" w:color=\"198200\"/>\n<w:bottom w:val=\"single\" w:sz=\"4\" w:color=\"198200\"/>\n<w:end w:val=\"single\" w:sz=\"4\" w:color=\"198200\"/>\n</w:tblBorders>\n</w:tblPr>\n<w:tblGrid>\n<w:gridCol w:w=\"1871\" />\n<w:gridCol w:w=\"1872\" />\n</w:tblGrid>\n<w:tr>\n<w:tc>" +,Para [Str "Ribosome"] +,RawBlock (Format "openxml") "</w:tc>\n<w:tc>" +,Para [Str "Lysosome"] +,RawBlock (Format "openxml") "</w:tc>\n</w:tr>\n</w:tbl>"] diff --git a/test/docx/raw-bookmarks.native b/test/docx/raw-bookmarks.native new file mode 100644 index 000000000..1e76655d6 --- /dev/null +++ b/test/docx/raw-bookmarks.native @@ -0,0 +1,3 @@ +[Para [Str "Manual",Space,Str "endnotes."] +,Para [Str "Nullam",Space,Str "eu",Space,Str "ante",Space,Str "vel",Space,Str "est",Space,Str "convallis",Space,Str "dignissim.",Space,Str "Nunc",Space,Str "porta",Space,Str "vulputate",Space,Str "tellus.",Space,Str "Nunc",Space,Str "rutrum",Space,Str "turpis",Space,Str "sed",Space,Str "pede.",Space,Str "Sed",Space,Str "bibendum.",RawInline (Format "openxml") "<w:bookmarkStart w:id=\"0\" w:name=\"Aliquam\"/>",Str "Aliquam",Space,Str "posuere."] +,Para [Str "Nunc",Space,Str "aliquet,",Space,Str "augue",Space,Str "nec",Space,Str "adipiscing",Space,Str "interdum,",Space,Str "lacus",Space,Str "tellus",Space,Str "malesuada",Space,Str "massa,",Space,Str "quis",Space,Str "varius",Space,Str "mi",Space,Str "purus",Space,Str "non",Space,Str "odio.",RawInline (Format "openxml") "<w:bookmarkEnd w:id=\"0\"/>",Str "Pellentesque",Space,Str "condimentum,",Space,Str "magna",Space,Str "ut",Space,Str "suscipit",Space,Str "hendrerit,",Space,Str "ipsum",Space,Str "augue",Space,Str "ornare",Space,Str "nulla,",Space,Str "non",Space,Str "luctus",Space,Str "diam",Space,Str "neque",Space,Str "sit",Space,Str "amet",Space,Str "urna.",Space,Str "Curabitur",Space,Str "vulputate",Space,Str "vestibulum",Space,Str "lorem."]] -- cgit v1.2.3 From ef62b70646c501f8b9fd7f94ede4da9a79b63fa0 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 13 Dec 2020 10:18:29 -0800 Subject: ImageSize: use JuicyPixels to determine png size. --- src/Text/Pandoc/ImageSize.hs | 50 +++++++++++++++++--------------------------- 1 file changed, 19 insertions(+), 31 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index fc9e1854b..8517d07c2 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -54,6 +54,8 @@ import Control.Monad.Except import Control.Applicative import Data.Maybe (fromMaybe) import qualified Data.Attoparsec.ByteString.Char8 as A +import qualified Codec.Picture.Metadata as Metadata +import Codec.Picture (decodeImageWithMetadata) -- quick and dirty functions to get image sizes -- algorithms borrowed from wwwis.pl @@ -122,7 +124,7 @@ findSvgTag img = "<svg" `B.isInfixOf` img || "<SVG" `B.isInfixOf` img imageSize :: WriterOptions -> ByteString -> Either T.Text ImageSize imageSize opts img = checkDpi <$> case imageType img of - Just Png -> mbToEither "could not determine PNG size" $ pngSize img + Just Png -> pngSize img Just Gif -> mbToEither "could not determine GIF size" $ gifSize img Just Jpeg -> jpegSize img Just Svg -> mbToEither "could not determine SVG size" $ svgSize opts img @@ -300,36 +302,22 @@ pPdfSize = do , dpiY = 72 } ) <|> pPdfSize -pngSize :: ByteString -> Maybe ImageSize -pngSize img = do - let (h, rest) = B.splitAt 8 img - guard $ h == "\x8a\x4d\x4e\x47\x0d\x0a\x1a\x0a" || - h == "\x89\x50\x4e\x47\x0d\x0a\x1a\x0a" - let (i, rest') = B.splitAt 4 $ B.drop 4 rest - guard $ i == "MHDR" || i == "IHDR" - let (sizes, rest'') = B.splitAt 8 rest' - (x,y) <- case map fromIntegral $unpack sizes of - ([w1,w2,w3,w4,h1,h2,h3,h4] :: [Integer]) -> return - (shift w1 24 + shift w2 16 + shift w3 8 + w4, - shift h1 24 + shift h2 16 + shift h3 8 + h4) - _ -> Nothing -- "PNG parse error" - (dpix, dpiy) <- findpHYs rest'' - return ImageSize { pxX = x, pxY = y, dpiX = dpix, dpiY = dpiy } - -findpHYs :: ByteString -> Maybe (Integer, Integer) -findpHYs x - | B.null x || "IDAT" `B.isPrefixOf` x = return (72,72) - | "pHYs" `B.isPrefixOf` x = - case map fromIntegral $ unpack $ B.take 9 $ B.drop 4 x of - [x1,x2,x3,x4,y1,y2,y3,y4,u] -> do - let factor = if u == 1 -- dots per meter - then \z -> z * 254 `div` 10000 - else const 72 - return - ( factor $ shift x1 24 + shift x2 16 + shift x3 8 + x4, - factor $ shift y1 24 + shift y2 16 + shift y3 8 + y4 ) - _ -> mzero - | otherwise = findpHYs $ B.drop 1 x -- read another byte +pngSize :: ByteString -> Either T.Text ImageSize +pngSize img = + case decodeImageWithMetadata img of + Left e -> Left (T.pack e) + Right (_, meta) -> do + pxx <- maybe (Left "Could not determine width") Right $ + Metadata.lookup Metadata.Width meta + pxy <- maybe (Left "Could not determine height") Right $ + Metadata.lookup Metadata.Height meta + dpix <- maybe (Right 72) Right $ Metadata.lookup Metadata.DpiX meta + dpiy <- maybe (Right 72) Right $ Metadata.lookup Metadata.DpiY meta + return $ ImageSize + { pxX = fromIntegral pxx + , pxY = fromIntegral pxy + , dpiX = fromIntegral dpix + , dpiY = fromIntegral dpiy } gifSize :: ByteString -> Maybe ImageSize gifSize img = do -- cgit v1.2.3 From c3aa90b57a8b9bdafb588098b115280044531bba Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 13 Dec 2020 10:31:57 -0800 Subject: ImageSize: use JuicyPixels to extract size... ...for png, jpeg, gif, instead of doing our own binary parsing. See #6936. --- src/Text/Pandoc/ImageSize.hs | 313 ++----------------------------------------- 1 file changed, 8 insertions(+), 305 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index 8517d07c2..665a94690 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE OverloadedStrings, ScopedTypeVariables, CPP #-} +{-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-} {-# LANGUAGE ViewPatterns #-} {-# OPTIONS_GHC -fno-warn-type-defaults #-} {- | @@ -32,14 +32,12 @@ module Text.Pandoc.ImageSize ( ImageType(..) , showInPixel , showFl ) where -import Data.ByteString (ByteString, unpack) +import Data.ByteString (ByteString) import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy as BL +import Data.Binary.Get import Data.Char (isDigit) import Control.Monad -import Data.Bits -import Data.Binary -import Data.Binary.Get import Text.Pandoc.Shared (safeRead) import Data.Default (Default) import Numeric (showFFloat) @@ -47,12 +45,9 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import qualified Text.Pandoc.UTF8 as UTF8 import qualified Text.XML.Light as Xml -import qualified Data.Map as M import qualified Data.Text as T import qualified Data.Text.Encoding as TE -import Control.Monad.Except import Control.Applicative -import Data.Maybe (fromMaybe) import qualified Data.Attoparsec.ByteString.Char8 as A import qualified Codec.Picture.Metadata as Metadata import Codec.Picture (decodeImageWithMetadata) @@ -124,9 +119,9 @@ findSvgTag img = "<svg" `B.isInfixOf` img || "<SVG" `B.isInfixOf` img imageSize :: WriterOptions -> ByteString -> Either T.Text ImageSize imageSize opts img = checkDpi <$> case imageType img of - Just Png -> pngSize img - Just Gif -> mbToEither "could not determine GIF size" $ gifSize img - Just Jpeg -> jpegSize img + Just Png -> getSize img + Just Gif -> getSize img + Just Jpeg -> getSize img Just Svg -> mbToEither "could not determine SVG size" $ svgSize opts img Just Eps -> mbToEither "could not determine EPS size" $ epsSize img Just Pdf -> mbToEither "could not determine PDF size" $ pdfSize img @@ -141,9 +136,6 @@ imageSize opts img = checkDpi <$> , dpiY = if dpiY size == 0 then 72 else dpiY size } -defaultSize :: (Integer, Integer) -defaultSize = (72, 72) - sizeInPixels :: ImageSize -> (Integer, Integer) sizeInPixels s = (pxX s, pxY s) @@ -302,8 +294,8 @@ pPdfSize = do , dpiY = 72 } ) <|> pPdfSize -pngSize :: ByteString -> Either T.Text ImageSize -pngSize img = +getSize :: ByteString -> Either T.Text ImageSize +getSize img = case decodeImageWithMetadata img of Left e -> Left (T.pack e) Right (_, meta) -> do @@ -319,18 +311,6 @@ pngSize img = , dpiX = fromIntegral dpix , dpiY = fromIntegral dpiy } -gifSize :: ByteString -> Maybe ImageSize -gifSize img = do - let (h, rest) = B.splitAt 6 img - guard $ h == "GIF87a" || h == "GIF89a" - case map fromIntegral $ unpack $ B.take 4 rest of - [w2,w1,h2,h1] -> return ImageSize { - pxX = shift w1 8 + w2, - pxY = shift h1 8 + h2, - dpiX = 72, - dpiY = 72 - } - _ -> Nothing -- "GIF parse error" svgSize :: WriterOptions -> ByteString -> Maybe ImageSize svgSize opts img = do @@ -378,280 +358,3 @@ emfSize img = case parseheader . BL.fromStrict $ img of Left _ -> Nothing Right (_, _, size) -> Just size - - -jpegSize :: ByteString -> Either T.Text ImageSize -jpegSize img = - let (hdr, rest) = B.splitAt 4 img - in if B.length rest < 14 - then Left "unable to determine JPEG size" - else case hdr of - "\xff\xd8\xff\xe0" -> jfifSize rest - "\xff\xd8\xff\xe1" -> exifSize rest - _ -> Left "unable to determine JPEG size" - -jfifSize :: ByteString -> Either T.Text ImageSize -jfifSize rest = - case map fromIntegral $ unpack $ B.take 5 $ B.drop 9 rest of - [dpiDensity,dpix1,dpix2,dpiy1,dpiy2] -> - let factor = case dpiDensity of - 1 -> id - 2 -> \x -> x * 254 `div` 10 - _ -> const 72 - dpix = factor (shift dpix1 8 + dpix2) - dpiy = factor (shift dpiy1 8 + dpiy2) - in case findJfifSize rest of - Left msg -> Left msg - Right (w,h) -> Right ImageSize { pxX = w - , pxY = h - , dpiX = dpix - , dpiY = dpiy } - _ -> Left "unable to determine JFIF size" - -findJfifSize :: ByteString -> Either T.Text (Integer,Integer) -findJfifSize bs = - let bs' = B.dropWhile (=='\xff') $ B.dropWhile (/='\xff') bs - in case B.uncons bs' of - Just (c,bs'') | c >= '\xc0' && c <= '\xc3' -> - case map fromIntegral $ unpack $ B.take 4 $ B.drop 3 bs'' of - [h1,h2,w1,w2] -> Right (shift w1 8 + w2, shift h1 8 + h2) - _ -> Left "JFIF parse error" - Just (_,bs'') -> - case map fromIntegral $ unpack $ B.take 2 bs'' of - [c1,c2] -> - let len = shift c1 8 + c2 - -- skip variables - in findJfifSize $ B.drop len bs'' - _ -> Left "JFIF parse error" - Nothing -> Left "Did not find JFIF length record" - -runGet' :: Get (Either T.Text a) -> BL.ByteString -> Either T.Text a -runGet' p bl = -#if MIN_VERSION_binary(0,7,0) - case runGetOrFail p bl of - Left (_,_,msg) -> Left $ T.pack msg - Right (_,_,x) -> x -#else - runGet p bl -#endif - -exifSize :: ByteString -> Either T.Text ImageSize -exifSize bs = runGet' header bl - where bl = BL.fromChunks [bs] - header = runExceptT $ exifHeader bl --- NOTE: It would be nicer to do --- runGet ((Just <$> exifHeader) <|> return Nothing) --- which would prevent pandoc from raising an error when an exif header can't --- be parsed. But we only get an Alternative instance for Get in binary 0.6, --- and binary 0.5 ships with ghc 7.6. - -exifHeader :: BL.ByteString -> ExceptT T.Text Get ImageSize -exifHeader hdr = do - _app1DataSize <- lift getWord16be - exifHdr <- lift getWord32be - unless (exifHdr == 0x45786966) $ throwError "Did not find exif header" - zeros <- lift getWord16be - unless (zeros == 0) $ throwError "Expected zeros after exif header" - -- beginning of tiff header -- we read whole thing to use - -- in getting data from offsets: - let tiffHeader = BL.drop 8 hdr - byteAlign <- lift getWord16be - let bigEndian = byteAlign == 0x4d4d - let (getWord16, getWord32, getWord64) = - if bigEndian - then (getWord16be, getWord32be, getWord64be) - else (getWord16le, getWord32le, getWord64le) - let getRational = do - num <- getWord32 - den <- getWord32 - return $ fromIntegral num / fromIntegral den - tagmark <- lift getWord16 - unless (tagmark == 0x002a) $ throwError "Failed alignment sanity check" - ifdOffset <- lift getWord32 - lift $ skip (fromIntegral ifdOffset - 8) -- skip to IDF - numentries <- lift getWord16 - let ifdEntry :: ExceptT T.Text Get (TagType, DataFormat) - ifdEntry = do - tag <- fromMaybe UnknownTagType . flip M.lookup tagTypeTable - <$> lift getWord16 - dataFormat <- lift getWord16 - numComponents <- lift getWord32 - (fmt, bytesPerComponent) <- - case dataFormat of - 1 -> return (UnsignedByte <$> getWord8, 1) - 2 -> return (AsciiString <$> - getLazyByteString - (fromIntegral numComponents), 1) - 3 -> return (UnsignedShort <$> getWord16, 2) - 4 -> return (UnsignedLong <$> getWord32, 4) - 5 -> return (UnsignedRational <$> getRational, 8) - 6 -> return (SignedByte <$> getWord8, 1) - 7 -> return (Undefined <$> getLazyByteString - (fromIntegral numComponents), 1) - 8 -> return (SignedShort <$> getWord16, 2) - 9 -> return (SignedLong <$> getWord32, 4) - 10 -> return (SignedRational <$> getRational, 8) - 11 -> return (SingleFloat <$> getWord32 {- TODO -}, 4) - 12 -> return (DoubleFloat <$> getWord64 {- TODO -}, 8) - _ -> throwError $ "Unknown data format " <> T.pack (show dataFormat) - let totalBytes = fromIntegral $ numComponents * bytesPerComponent - payload <- if totalBytes <= 4 -- data is right here - then lift $ fmt <* skip (4 - totalBytes) - else do -- get data from offset - offs <- lift getWord32 - let bytesAtOffset = - BL.take (fromIntegral totalBytes) - $ BL.drop (fromIntegral offs) tiffHeader - case runGet' (Right <$> fmt) bytesAtOffset of - Left msg -> throwError msg - Right x -> return x - return (tag, payload) - entries <- replicateM (fromIntegral numentries) ifdEntry - subentries <- case lookup ExifOffset entries of - Just (UnsignedLong offset') -> do - pos <- lift bytesRead - lift $ skip (fromIntegral offset' - (fromIntegral pos - 8)) - numsubentries <- lift getWord16 - replicateM (fromIntegral numsubentries) ifdEntry - _ -> return [] - let allentries = entries ++ subentries - (wdth, hght) <- case (lookup ExifImageWidth allentries, - lookup ExifImageHeight allentries) of - (Just (UnsignedLong w), Just (UnsignedLong h)) -> - return (fromIntegral w, fromIntegral h) - _ -> return defaultSize - -- we return a default width and height when - -- the exif header doesn't contain these - let resfactor = case lookup ResolutionUnit allentries of - Just (UnsignedShort 1) -> 100 / 254 - _ -> 1 - let xres = case lookup XResolution allentries of - Just (UnsignedRational x) -> floor (x * resfactor) - _ -> 72 - let yres = case lookup YResolution allentries of - Just (UnsignedRational y) -> floor (y * resfactor) - _ -> 72 - return ImageSize{ - pxX = wdth - , pxY = hght - , dpiX = xres - , dpiY = yres } - -data DataFormat = UnsignedByte Word8 - | AsciiString BL.ByteString - | UnsignedShort Word16 - | UnsignedLong Word32 - | UnsignedRational Rational - | SignedByte Word8 - | Undefined BL.ByteString - | SignedShort Word16 - | SignedLong Word32 - | SignedRational Rational - | SingleFloat Word32 - | DoubleFloat Word64 - deriving (Show) - -data TagType = ImageDescription - | Make - | Model - | Orientation - | XResolution - | YResolution - | ResolutionUnit - | Software - | DateTime - | WhitePoint - | PrimaryChromaticities - | YCbCrCoefficients - | YCbCrPositioning - | ReferenceBlackWhite - | Copyright - | ExifOffset - | ExposureTime - | FNumber - | ExposureProgram - | ISOSpeedRatings - | ExifVersion - | DateTimeOriginal - | DateTimeDigitized - | ComponentConfiguration - | CompressedBitsPerPixel - | ShutterSpeedValue - | ApertureValue - | BrightnessValue - | ExposureBiasValue - | MaxApertureValue - | SubjectDistance - | MeteringMode - | LightSource - | Flash - | FocalLength - | MakerNote - | UserComment - | FlashPixVersion - | ColorSpace - | ExifImageWidth - | ExifImageHeight - | RelatedSoundFile - | ExifInteroperabilityOffset - | FocalPlaneXResolution - | FocalPlaneYResolution - | FocalPlaneResolutionUnit - | SensingMethod - | FileSource - | SceneType - | UnknownTagType - deriving (Show, Eq, Ord) - -tagTypeTable :: M.Map Word16 TagType -tagTypeTable = M.fromList - [ (0x010e, ImageDescription) - , (0x010f, Make) - , (0x0110, Model) - , (0x0112, Orientation) - , (0x011a, XResolution) - , (0x011b, YResolution) - , (0x0128, ResolutionUnit) - , (0x0131, Software) - , (0x0132, DateTime) - , (0x013e, WhitePoint) - , (0x013f, PrimaryChromaticities) - , (0x0211, YCbCrCoefficients) - , (0x0213, YCbCrPositioning) - , (0x0214, ReferenceBlackWhite) - , (0x8298, Copyright) - , (0x8769, ExifOffset) - , (0x829a, ExposureTime) - , (0x829d, FNumber) - , (0x8822, ExposureProgram) - , (0x8827, ISOSpeedRatings) - , (0x9000, ExifVersion) - , (0x9003, DateTimeOriginal) - , (0x9004, DateTimeDigitized) - , (0x9101, ComponentConfiguration) - , (0x9102, CompressedBitsPerPixel) - , (0x9201, ShutterSpeedValue) - , (0x9202, ApertureValue) - , (0x9203, BrightnessValue) - , (0x9204, ExposureBiasValue) - , (0x9205, MaxApertureValue) - , (0x9206, SubjectDistance) - , (0x9207, MeteringMode) - , (0x9208, LightSource) - , (0x9209, Flash) - , (0x920a, FocalLength) - , (0x927c, MakerNote) - , (0x9286, UserComment) - , (0xa000, FlashPixVersion) - , (0xa001, ColorSpace) - , (0xa002, ExifImageWidth) - , (0xa003, ExifImageHeight) - , (0xa004, RelatedSoundFile) - , (0xa005, ExifInteroperabilityOffset) - , (0xa20e, FocalPlaneXResolution) - , (0xa20f, FocalPlaneYResolution) - , (0xa210, FocalPlaneResolutionUnit) - , (0xa217, SensingMethod) - , (0xa300, FileSource) - , (0xa301, SceneType) - ] -- cgit v1.2.3 From c43e2dc0f4ce639c8d0bd156afea2fd07ffc91ff Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 13 Dec 2020 10:45:03 -0800 Subject: RST writer: better image handling. - An image alone in its paragraph (but not a figure) is now rendered as an independent image, with an `alt` attribute if a description is supplied. - An inline image that is not alone in its paragraph will be rendered, as before, using a substitution. Such an image cannot have a "center", "left", or "right" alignment, so the classes `align-center`, `align-left`, or `align-right` are ignored. However, `align-top`, `align-middle`, `align-bottom` will generate a corresponding `align` attribute. Closes #6948. --- src/Text/Pandoc/Writers/RST.hs | 30 +++++++++++++++++++++--------- test/command/4420.md | 11 ----------- test/command/6948.md | 31 +++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 20 deletions(-) create mode 100644 test/command/6948.md (limited to 'src') diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 43bf382b7..8beeef46a 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -143,9 +143,12 @@ pictToRST (label, (attr, src, _, mbtarget)) = do let (_, cls, _) = attr classes = case cls of [] -> empty - ["align-right"] -> ":align: right" - ["align-left"] -> ":align: left" - ["align-center"] -> ":align: center" + ["align-top"] -> ":align: top" + ["align-middle"] -> ":align: middle" + ["align-bottom"] -> ":align: bottom" + ["align-center"] -> empty + ["align-right"] -> empty + ["align-left"] -> empty _ -> ":class: " <> literal (T.unwords cls) return $ nowrap $ ".. |" <> label' <> "| image:: " <> literal src $$ hang 3 empty (classes $$ dims) @@ -215,19 +218,28 @@ blockToRST (Div (ident,classes,_kvs) bs) = do nest 3 contents $$ blankline blockToRST (Plain inlines) = inlineListToRST inlines --- title beginning with fig: indicates that the image is a figure -blockToRST (Para [Image attr txt (src,T.stripPrefix "fig:" -> Just tit)]) = do - capt <- inlineListToRST txt +blockToRST (Para [Image attr txt (src, rawtit)]) = do + description <- inlineListToRST txt dims <- imageDimsToRST attr - let fig = "figure:: " <> literal src - alt = ":alt: " <> if T.null tit then capt else literal tit + -- title beginning with fig: indicates that the image is a figure + let (isfig, tit) = case T.stripPrefix "fig:" rawtit of + Nothing -> (False, rawtit) + Just tit' -> (True, tit') + let fig | isfig = "figure:: " <> literal src + | otherwise = "image:: " <> literal src + alt | isfig = ":alt: " <> if T.null tit then description else literal tit + | null txt = empty + | otherwise = ":alt: " <> description + capt | isfig = description + | otherwise = empty (_,cls,_) = attr classes = case cls of [] -> empty ["align-right"] -> ":align: right" ["align-left"] -> ":align: left" ["align-center"] -> ":align: center" - _ -> ":figclass: " <> literal (T.unwords cls) + _ | isfig -> ":figclass: " <> literal (T.unwords cls) + | otherwise -> ":class: " <> literal (T.unwords cls) return $ hang 3 ".. " (fig $$ alt $$ classes $$ dims $+$ capt) $$ blankline blockToRST (Para inlines) | LineBreak `elem` inlines = diff --git a/test/command/4420.md b/test/command/4420.md index 3e7008935..36d697234 100644 --- a/test/command/4420.md +++ b/test/command/4420.md @@ -1,14 +1,3 @@ -``` -% pandoc -f native -t rst -[Image ("",["align-right"],[("width","100px")]) [Str "image"] ("foo.png","")] -^D -|image| - -.. |image| image:: foo.png - :align: right - :width: 100px -``` - ``` % pandoc -f native -t rst [Para [Image ("",["align-right"],[("width","100px")]) [Str "image"] ("foo.png","fig:test")]] diff --git a/test/command/6948.md b/test/command/6948.md new file mode 100644 index 000000000..8803aebe9 --- /dev/null +++ b/test/command/6948.md @@ -0,0 +1,31 @@ +Treat an image alone in its paragraph (but not a figure) +as an independent image: +``` +% pandoc -f native -t rst +[Para [Image ("",["align-center"],[]) [Str "https://pandoc.org/diagram.jpg"] ("https://pandoc.org/diagram.jpg","")]] +^D +.. image:: https://pandoc.org/diagram.jpg + :alt: https://pandoc.org/diagram.jpg + :align: center +``` + +Here we just omit the center attribute as it's not valid: +``` +% pandoc -f native -t rst +[Para [Str "hi",Space,Image ("",["align-center"],[]) [Str "https://pandoc.org/diagram.jpg"] ("https://pandoc.org/diagram.jpg","")]] +^D +hi |https://pandoc.org/diagram.jpg| + +.. |https://pandoc.org/diagram.jpg| image:: https://pandoc.org/diagram.jpg +``` + +But we can use top, middle, or bottom alignment: +``` +% pandoc -f native -t rst +[Para [Str "hi",Space,Image ("",["align-top"],[]) [Str "https://pandoc.org/diagram.jpg"] ("https://pandoc.org/diagram.jpg","")]] +^D +hi |https://pandoc.org/diagram.jpg| + +.. |https://pandoc.org/diagram.jpg| image:: https://pandoc.org/diagram.jpg + :align: top +``` -- cgit v1.2.3 From 39153ea6e2a77604e3302c5591fbb65496c3457a Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 14 Dec 2020 09:39:07 -0800 Subject: ImageSize: use exif width and height when available. After the move to JuicyPixels, we were getting incorrect width and heigh information for some images (see #6936, test-3.jpg). The correct information was encoded in Exif tags that JuicyPixels seemed to ignore. So we check these first before looking at the Width and Height identified by JuicyPixels. Closes #6936. --- src/Text/Pandoc/ImageSize.hs | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'src') diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index 665a94690..9ce5c668d 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -50,6 +50,7 @@ import qualified Data.Text.Encoding as TE import Control.Applicative import qualified Data.Attoparsec.ByteString.Char8 as A import qualified Codec.Picture.Metadata as Metadata +import qualified Codec.Picture.Metadata.Exif as Exif import Codec.Picture (decodeImageWithMetadata) -- quick and dirty functions to get image sizes @@ -300,8 +301,16 @@ getSize img = Left e -> Left (T.pack e) Right (_, meta) -> do pxx <- maybe (Left "Could not determine width") Right $ + -- first look for exif image width, then width + (Metadata.lookup + (Metadata.Exif (Exif.TagUnknown 0xA002)) meta >>= + exifDataToWord) <|> Metadata.lookup Metadata.Width meta pxy <- maybe (Left "Could not determine height") Right $ + -- first look for exif image height, then height + (Metadata.lookup + (Metadata.Exif (Exif.TagUnknown 0xA003)) meta >>= + exifDataToWord) <|> Metadata.lookup Metadata.Height meta dpix <- maybe (Right 72) Right $ Metadata.lookup Metadata.DpiX meta dpiy <- maybe (Right 72) Right $ Metadata.lookup Metadata.DpiY meta @@ -310,6 +319,10 @@ getSize img = , pxY = fromIntegral pxy , dpiX = fromIntegral dpix , dpiY = fromIntegral dpiy } + where + exifDataToWord (Exif.ExifLong x) = Just $ fromIntegral x + exifDataToWord (Exif.ExifShort x) = Just $ fromIntegral x + exifDataToWord _ = Nothing svgSize :: WriterOptions -> ByteString -> Maybe ImageSize -- cgit v1.2.3 From 7d799bfcda749e4a3ad6fcae59ac5ccb80b77ffd Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 15 Dec 2020 08:51:43 -0800 Subject: Allow both inline and external references to be used with `--citeproc`. This fixes a regression, since pandoc-citeproc allowed these to be combined. Closes #6951. --- src/Text/Pandoc/Citeproc.hs | 29 +++++++++++++++-------------- test/command/6951.md | 18 ++++++++++++++++++ 2 files changed, 33 insertions(+), 14 deletions(-) create mode 100644 test/command/6951.md (limited to 'src') diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index 0de2882ae..82d1dc32f 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -105,20 +105,21 @@ processCitations (Pandoc meta bs) = do let idpred = if "*" `Set.member` nocites then const True else (`Set.member` citeIds) - refs <- map (linkifyVariables . legacyDateRanges) <$> - case lookupMeta "references" meta of - Just (MetaList rs) -> return $ mapMaybe metaValueToReference rs - _ -> - case lookupMeta "bibliography" meta of - Just (MetaList xs) -> - mconcat <$> - mapM (getRefsFromBib locale idpred) - (mapMaybe metaValueToText xs) - Just x -> - case metaValueToText x of - Just fp -> getRefsFromBib locale idpred fp - Nothing -> return [] - Nothing -> return [] + let inlineRefs = case lookupMeta "references" meta of + Just (MetaList rs) -> mapMaybe metaValueToReference rs + _ -> [] + externalRefs <- case lookupMeta "bibliography" meta of + Just (MetaList xs) -> + mconcat <$> + mapM (getRefsFromBib locale idpred) + (mapMaybe metaValueToText xs) + Just x -> + case metaValueToText x of + Just fp -> getRefsFromBib locale idpred fp + Nothing -> return [] + Nothing -> return [] + let refs = map (linkifyVariables . legacyDateRanges) + (inlineRefs ++ externalRefs) let otherIdsMap = foldr (\ref m -> case T.words . extractText <$> M.lookup "other-ids" diff --git a/test/command/6951.md b/test/command/6951.md new file mode 100644 index 000000000..4a6301d80 --- /dev/null +++ b/test/command/6951.md @@ -0,0 +1,18 @@ +``` +% pandoc --citeproc -t plain --bibliography command/biblio.bib +--- +references: +- id: foo + title: Crazy + type: book +... + +[@foo; @item1] + +^D +(Crazy, n.d.; Doe 2005) + +Crazy. n.d. + +Doe, John. 2005. First Book. Cambridge: Cambridge University Press. +``` -- cgit v1.2.3 From 87033b285669901505f6da8b2969911a29c448fd Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 15 Dec 2020 09:09:51 -0800 Subject: Use fetchItem to get external bibliography. This means that: - a URL may be provided, and pandoc will fetch the resource. - Pandoc will search the resource path for the bibliography if it is not found relative to the working directory. Closes #6940. --- src/Text/Pandoc/Citeproc.hs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index 82d1dc32f..770d571a6 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -190,24 +190,23 @@ insertSpace ils = getRefsFromBib :: PandocMonad m => Locale -> (Text -> Bool) -> Text -> m [Reference Inlines] -getRefsFromBib locale idpred t = do - let fp = T.unpack t - raw <- readFileStrict fp - case formatFromExtension fp of +getRefsFromBib locale idpred fp = do + (raw, _) <- fetchItem fp + case formatFromExtension (T.unpack fp) of Just f -> getRefs locale f idpred (Just fp) raw Nothing -> throwError $ PandocAppError $ - "Could not determine bibliography format for " <> t + "Could not determine bibliography format for " <> fp getRefs :: PandocMonad m => Locale -> BibFormat -> (Text -> Bool) - -> Maybe FilePath + -> Maybe Text -> ByteString -> m [Reference Inlines] getRefs locale format idpred mbfp raw = do let err' = throwError . - PandocBibliographyError (maybe mempty T.pack mbfp) + PandocBibliographyError (fromMaybe mempty mbfp) case format of Format_bibtex -> either (err' . tshow) return . @@ -222,7 +221,7 @@ getRefs locale format idpred mbfp raw = do Format_yaml -> do rs <- yamlToRefs idpred def{ readerExtensions = pandocExtensions } - mbfp + (T.unpack <$> mbfp) (L.fromStrict raw) return $ mapMaybe metaValueToReference rs -- cgit v1.2.3 From b4b4e32307499aafa2e6c1c713ee41f1c787ea76 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 15 Dec 2020 23:45:34 -0800 Subject: Properly handle boolean values in writing YAML metadata. (Markdown writer.) This requires doctemplates >= 0.9. Closes #6388. --- pandoc.cabal | 4 ++-- src/Text/Pandoc/Writers/Markdown.hs | 2 ++ src/Text/Pandoc/Writers/Shared.hs | 3 +-- stack.yaml | 5 ++++- test/command/6388.md | 16 ++++++++++++++++ 5 files changed, 25 insertions(+), 5 deletions(-) create mode 100644 test/command/6388.md (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index 6483690b9..9244d25cd 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -410,7 +410,7 @@ library deepseq >= 1.3 && < 1.5, directory >= 1.2.3 && < 1.4, doclayout >= 0.3 && < 0.4, - doctemplates >= 0.8.2 && < 0.9, + doctemplates >= 0.9 && < 0.10, emojis >= 0.1 && < 0.2, exceptions >= 0.8 && < 0.11, file-embed >= 0.0 && < 0.1, @@ -767,7 +767,7 @@ test-suite test-pandoc bytestring >= 0.9 && < 0.12, containers >= 0.4.2.1 && < 0.7, directory >= 1.2.3 && < 1.4, - doctemplates >= 0.8.2 && < 0.9, + doctemplates >= 0.9 && < 0.10, exceptions >= 0.8 && < 0.11, executable-path >= 0.0 && < 0.1, filepath >= 1.1 && < 1.5, diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 5eb47b261..c349fd713 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -174,6 +174,8 @@ valToYaml :: Val Text -> Doc Text valToYaml (ListVal xs) = vcat $ map (\v -> hang 2 "- " (valToYaml v)) xs valToYaml (MapVal c) = contextToYaml c +valToYaml (BoolVal True) = "true" +valToYaml (BoolVal False) = "false" valToYaml (SimpleVal x) | isEmpty x = empty | otherwise = diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index b399afbf3..129e45e9d 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -109,8 +109,7 @@ metaValueToVal blockWriter inlineWriter (MetaMap metamap) = MapVal . Context <$> mapM (metaValueToVal blockWriter inlineWriter) metamap metaValueToVal blockWriter inlineWriter (MetaList xs) = ListVal <$> mapM (metaValueToVal blockWriter inlineWriter) xs -metaValueToVal _ _ (MetaBool True) = return $ SimpleVal "true" -metaValueToVal _ _ (MetaBool False) = return NullVal +metaValueToVal _ _ (MetaBool b) = return $ BoolVal b metaValueToVal _ inlineWriter (MetaString s) = SimpleVal <$> inlineWriter (Builder.toList (Builder.text s)) metaValueToVal blockWriter _ (MetaBlocks bs) = SimpleVal <$> blockWriter bs diff --git a/stack.yaml b/stack.yaml index ef0bc7a83..7c1bde8ae 100644 --- a/stack.yaml +++ b/stack.yaml @@ -16,7 +16,10 @@ extra-deps: - commonmark-0.1.1.2 - commonmark-extensions-0.2.0.4 - commonmark-pandoc-0.2.0.1 -- doctemplates-0.8.3 +# - doctemplates-0.8.3 +- doctemplates: + git: https://github.com/jgm/doctemplates.git + commit: 6cb8d09264f7c1f4b59ac67ff98b1a664a85e36a - citeproc-0.2.0.1 # - citeproc: # git: https://github.com/jgm/citeproc.git diff --git a/test/command/6388.md b/test/command/6388.md new file mode 100644 index 000000000..29a9156d7 --- /dev/null +++ b/test/command/6388.md @@ -0,0 +1,16 @@ +``` +% pandoc -t markdown -s +--- +nvalue: false +value: true +--- + +text +^D +--- +nvalue: false +value: true +--- + +text +``` -- cgit v1.2.3 From 57241e201a637d32b5d37b197463a4b96825a6ea Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 16 Dec 2020 07:56:07 -0800 Subject: Support Lua marshalling of doctemplates BoolVal. This updates T.P.Lua.Marshaling.Context for doctemplates >= 0.9. --- src/Text/Pandoc/Lua/Marshaling/Context.hs | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/Text/Pandoc/Lua/Marshaling/Context.hs b/src/Text/Pandoc/Lua/Marshaling/Context.hs index effcc675d..c0e7aef60 100644 --- a/src/Text/Pandoc/Lua/Marshaling/Context.hs +++ b/src/Text/Pandoc/Lua/Marshaling/Context.hs @@ -22,6 +22,7 @@ instance (TemplateTarget a, Pushable a) => Pushable (Context a) where instance (TemplateTarget a, Pushable a) => Pushable (Val a) where push NullVal = Lua.push () + push (BoolVal b) = Lua.push b push (MapVal ctx) = Lua.push ctx push (ListVal xs) = Lua.push xs push (SimpleVal d) = Lua.push $ render Nothing d -- cgit v1.2.3 From 914cf0b602f0585cfd1b401b956c8edfd129d47d Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 16 Dec 2020 15:37:40 -0800 Subject: Fix citeproc regression with duplicate references. - Use dev version of citeproc, which handles duplicate ids better, preferring the last one in the list and discarding the rest. - Ensure that inline citations take priority over external ones. See jgm/citeproc#36. This restores the behavior of pandoc-citeproc. --- cabal.project | 8 ++++---- pandoc.cabal | 2 +- src/Text/Pandoc/Citeproc.hs | 3 ++- stack.yaml | 8 ++++---- 4 files changed, 11 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/cabal.project b/cabal.project index 091c78ce6..55d4abaa2 100644 --- a/cabal.project +++ b/cabal.project @@ -9,8 +9,8 @@ source-repository-package location: https://github.com/jgm/doctemplates.git tag: 7ccbf7df16edbc7c5d835d955b242c61fd4e6601 --- source-repository-package --- type: git --- location: https://github.com/jgm/citeproc --- tag: 42b1d154b02435229acbe98ae0f17d01b757ee93 +source-repository-package + type: git + location: https://github.com/jgm/citeproc + tag: a8193fe375fa2354049bf9a967ba3bad4b1ba053 diff --git a/pandoc.cabal b/pandoc.cabal index 9244d25cd..d21d5e03c 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -400,7 +400,7 @@ library blaze-markup >= 0.8 && < 0.9, bytestring >= 0.9 && < 0.12, case-insensitive >= 1.2 && < 1.3, - citeproc >= 0.2.0.1 && < 0.3, + citeproc >= 0.3 && < 0.4, commonmark >= 0.1.1.2 && < 0.2, commonmark-extensions >= 0.2.0.4 && < 0.3, commonmark-pandoc >= 0.2 && < 0.3, diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index 770d571a6..ca1ab9f96 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -119,7 +119,8 @@ processCitations (Pandoc meta bs) = do Nothing -> return [] Nothing -> return [] let refs = map (linkifyVariables . legacyDateRanges) - (inlineRefs ++ externalRefs) + (externalRefs ++ inlineRefs) + -- note that inlineRefs can override externalRefs let otherIdsMap = foldr (\ref m -> case T.words . extractText <$> M.lookup "other-ids" diff --git a/stack.yaml b/stack.yaml index 518c24b63..92a80c4ff 100644 --- a/stack.yaml +++ b/stack.yaml @@ -20,10 +20,10 @@ extra-deps: - doctemplates: git: https://github.com/jgm/doctemplates.git commit: 7ccbf7df16edbc7c5d835d955b242c61fd4e6601 -- citeproc-0.2.0.1 -# - citeproc: -# git: https://github.com/jgm/citeproc.git -# commit: 42b1d154b02435229acbe98ae0f17d01b757ee93 +# - citeproc-0.2.0.1 +- citeproc: + git: https://github.com/jgm/citeproc.git + commit: a8193fe375fa2354049bf9a967ba3bad4b1ba053 ghc-options: "$locals": -fhide-source-paths -Wno-missing-home-modules resolver: lts-16.23 -- cgit v1.2.3 From 8f402beab922646d4c428b40a75fe4d140ab5e9e Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Mon, 21 Dec 2020 03:04:54 +0100 Subject: LaTeX writer: support colspans and rowspans in tables. (#6950) Note that the multirow package is needed for rowspans. It is included in the latex template under a variable, so that it won't be used unless needed for a table. --- data/templates/default.latex | 3 + pandoc.cabal | 1 + src/Text/Pandoc/Writers/AnnotatedTable.hs | 23 +++ src/Text/Pandoc/Writers/LaTeX.hs | 6 +- src/Text/Pandoc/Writers/LaTeX/Table.hs | 301 ++++++++++++++++++++---------- src/Text/Pandoc/Writers/LaTeX/Types.hs | 2 + test/Tests/Old.hs | 2 +- test/tables/nordics.latex | 26 +++ test/tables/planets.latex | 36 ++++ test/tables/students.latex | 23 +++ 10 files changed, 326 insertions(+), 97 deletions(-) create mode 100644 test/tables/nordics.latex create mode 100644 test/tables/planets.latex create mode 100644 test/tables/students.latex (limited to 'src') diff --git a/data/templates/default.latex b/data/templates/default.latex index 169661582..c567278e3 100644 --- a/data/templates/default.latex +++ b/data/templates/default.latex @@ -255,6 +255,9 @@ $highlighting-macros$ $endif$ $if(tables)$ \usepackage{longtable,booktabs,array} +$if(multirow)$ +\usepackage{multirow} +$endif$ \usepackage{calc} % for calculating minipage widths $if(beamer)$ \usepackage{caption} diff --git a/pandoc.cabal b/pandoc.cabal index 8a4faa3e1..4a893d672 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -299,6 +299,7 @@ extra-source-files: test/tables.xwiki test/tables/*.html4 test/tables/*.html5 + test/tables/*.latex test/tables/*.native test/tables/*.jats_archiving test/testsuite.txt diff --git a/src/Text/Pandoc/Writers/AnnotatedTable.hs b/src/Text/Pandoc/Writers/AnnotatedTable.hs index 48c9d61f2..3f69496a9 100644 --- a/src/Text/Pandoc/Writers/AnnotatedTable.hs +++ b/src/Text/Pandoc/Writers/AnnotatedTable.hs @@ -1,8 +1,12 @@ {-# LANGUAGE BangPatterns #-} {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE UndecidableInstances #-} {- | Module : Text.Pandoc.Writers.AnnotatedTable @@ -45,6 +49,7 @@ import Data.Generics ( Data import Data.List.NonEmpty ( NonEmpty(..) ) import GHC.Generics ( Generic ) import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Walk ( Walkable (..) ) -- | An annotated table type, corresponding to the Pandoc 'B.Table' -- constructor and the HTML @\<table\>@ element. It records the data @@ -298,3 +303,21 @@ fromBodyRow (BodyRow attr _ rh rb) = fromCell :: Cell -> B.Cell fromCell (Cell _ _ c) = c + +-- +-- Instances +-- +instance Walkable a B.Cell => Walkable a Cell where + walkM f (Cell colspecs colnum cell) = + Cell colspecs colnum <$> walkM f cell + query f (Cell _colspecs _colnum cell) = query f cell + +instance Walkable a B.Cell => Walkable a HeaderRow where + walkM f (HeaderRow attr rownum cells) = + HeaderRow attr rownum <$> walkM f cells + query f (HeaderRow _attr _rownum cells) = query f cells + +instance Walkable a B.Cell => Walkable a TableHead where + walkM f (TableHead attr rows) = + TableHead attr <$> walkM f rows + query f (TableHead _attr rows) = query f rows diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 6a4e3ba69..2281290c0 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -48,6 +48,7 @@ import Text.Pandoc.Writers.LaTeX.Types (LW, WriterState (..), startingState) import Text.Pandoc.Writers.Shared import Text.Printf (printf) import qualified Data.Text.Normalize as Normalize +import qualified Text.Pandoc.Writers.AnnotatedTable as Ann -- | Convert Pandoc to LaTeX. writeLaTeX :: PandocMonad m => WriterOptions -> Pandoc -> m Text @@ -154,6 +155,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do defField "documentclass" documentClass $ defField "verbatim-in-note" (stVerbInNote st) $ defField "tables" (stTable st) $ + defField "multirow" (stMultiRow st) $ defField "strikeout" (stStrikeout st) $ defField "url" (stUrl st) $ defField "numbersections" (writerNumberSections options) $ @@ -716,9 +718,9 @@ blockToLaTeX (Header level (id',classes,_) lst) = do hdr <- sectionHeader classes id' level lst modify $ \s -> s{stInHeading = False} return hdr -blockToLaTeX (Table _ blkCapt specs thead tbodies tfoot) = +blockToLaTeX (Table attr blkCapt specs thead tbodies tfoot) = tableToLaTeX inlineListToLaTeX blockListToLaTeX - blkCapt specs thead tbodies tfoot + (Ann.toTable attr blkCapt specs thead tbodies tfoot) blockListToLaTeX :: PandocMonad m => [Block] -> LW m (Doc Text) blockListToLaTeX lst = diff --git a/src/Text/Pandoc/Writers/LaTeX/Table.hs b/src/Text/Pandoc/Writers/LaTeX/Table.hs index 5299efa37..9dd66c8a3 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Table.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Table.hs @@ -16,6 +16,7 @@ module Text.Pandoc.Writers.LaTeX.Table ) where import Control.Monad.State.Strict import Data.List (intersperse) +import Data.List.NonEmpty (NonEmpty ((:|))) import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.Class.PandocMonad (PandocMonad) @@ -23,102 +24,196 @@ import Text.Pandoc.Definition import Text.DocLayout ( Doc, braces, cr, empty, hcat, hsep, isEmpty, literal, nest , text, vcat, ($$) ) -import Text.Pandoc.Shared (splitBy) +import Text.Pandoc.Shared (blocksToInlines, splitBy, tshow) import Text.Pandoc.Walk (walk) -import Text.Pandoc.Writers.Shared (toLegacyTable) import Text.Pandoc.Writers.LaTeX.Caption (getCaption) import Text.Pandoc.Writers.LaTeX.Notes (notesToLaTeX) import Text.Pandoc.Writers.LaTeX.Types - ( LW, WriterState (stBeamer, stExternalNotes, stInMinipage, stNotes, stTable) ) + ( LW, WriterState (stBeamer, stExternalNotes, stInMinipage, stMultiRow + , stNotes, stTable) ) import Text.Printf (printf) +import qualified Text.Pandoc.Builder as B +import qualified Text.Pandoc.Writers.AnnotatedTable as Ann tableToLaTeX :: PandocMonad m => ([Inline] -> LW m (Doc Text)) -> ([Block] -> LW m (Doc Text)) - -> Caption -> [ColSpec] -> TableHead -> [TableBody] -> TableFoot + -> Ann.Table -> LW m (Doc Text) -tableToLaTeX inlnsToLaTeX blksToLaTeX blkCapt specs thead tbody tfoot = do - let (caption, aligns, widths, heads, rows) = - toLegacyTable blkCapt specs thead tbody tfoot - -- simple tables have to have simple cells: - let isSimple = \case - [Plain _] -> True - [Para _] -> True - [] -> True - _ -> False - let widths' = if all (== 0) widths && not (all (all isSimple) rows) - then replicate (length aligns) - (1 / fromIntegral (length aligns)) - else widths - (captionText, captForLof, captNotes) <- getCaption inlnsToLaTeX False caption - let toHeaders hs = do contents <- tableRowToLaTeX blksToLaTeX True aligns hs - return ("\\toprule" $$ contents $$ "\\midrule") +tableToLaTeX inlnsToLaTeX blksToLaTeX tbl = do + let (Ann.Table _attr caption _specs thead tbodies tfoot) = tbl + CaptionDocs capt captNotes <- captionToLaTeX inlnsToLaTeX caption let removeNote (Note _) = Span ("", [], []) [] removeNote x = x - firsthead <- if isEmpty captionText || all null heads - then return empty - else ($$ text "\\endfirsthead") <$> toHeaders heads - head' <- if all null heads - then return "\\toprule" - -- avoid duplicate notes in head and firsthead: - else toHeaders (if isEmpty firsthead - then heads - else walk removeNote heads) - let capt = if isEmpty captionText - then empty - else "\\caption" <> captForLof <> braces captionText - <> "\\tabularnewline" - rows' <- mapM (tableRowToLaTeX blksToLaTeX False aligns) rows - let colDescriptors = - (if all (== 0) widths' - then hcat . map literal - else (\xs -> cr <> nest 2 (vcat $ map literal xs))) $ - zipWith (toColDescriptor (length widths')) aligns widths' + firsthead <- if isEmpty capt || isEmptyHead thead + then return empty + else ($$ text "\\endfirsthead") <$> + headToLaTeX blksToLaTeX thead + head' <- if isEmptyHead thead + then return "\\toprule" + -- avoid duplicate notes in head and firsthead: + else headToLaTeX blksToLaTeX + (if isEmpty firsthead + then thead + else walk removeNote thead) + rows' <- mapM (rowToLaTeX blksToLaTeX BodyCell) $ + mconcat (map bodyRows tbodies) <> footRows tfoot modify $ \s -> s{ stTable = True } notes <- notesToLaTeX <$> gets stNotes - return $ "\\begin{longtable}[]" <> - braces ("@{}" <> colDescriptors <> "@{}") - -- the @{} removes extra space at beginning and end - $$ capt - $$ firsthead - $$ head' - $$ "\\endhead" - $$ vcat rows' - $$ "\\bottomrule" - $$ "\\end{longtable}" - $$ captNotes - $$ notes - -toColDescriptor :: Int -> Alignment -> Double -> Text -toColDescriptor _numcols align 0 = - case align of - AlignLeft -> "l" - AlignRight -> "r" - AlignCenter -> "c" - AlignDefault -> "l" -toColDescriptor numcols align width = - T.pack $ printf - ">{%s\\arraybackslash}p{(\\columnwidth - %d\\tabcolsep) * \\real{%0.2f}}" - align' - ((numcols - 1) * 2) - width - where - align' :: String - align' = case align of - AlignLeft -> "\\raggedright" - AlignRight -> "\\raggedleft" - AlignCenter -> "\\centering" - AlignDefault -> "\\raggedright" - -tableRowToLaTeX :: PandocMonad m - => ([Block] -> LW m (Doc Text)) - -> Bool - -> [Alignment] - -> [[Block]] - -> LW m (Doc Text) -tableRowToLaTeX blockListToLaTeX header aligns cols = do - cells <- mapM (tableCellToLaTeX blockListToLaTeX header) $ zip aligns cols - return $ hsep (intersperse "&" cells) <> " \\\\ \\addlinespace" + return + $ "\\begin{longtable}[]" <> + braces ("@{}" <> colDescriptors tbl <> "@{}") + -- the @{} removes extra space at beginning and end + $$ capt + $$ firsthead + $$ head' + $$ "\\endhead" + $$ vcat rows' + $$ "\\bottomrule" + $$ "\\end{longtable}" + $$ captNotes + $$ notes + +-- | Creates column descriptors for the table. +colDescriptors :: Ann.Table -> Doc Text +colDescriptors (Ann.Table _attr _caption specs thead tbodies tfoot) = + let (aligns, widths) = unzip specs + + defaultWidthsOnly = all (== ColWidthDefault) widths + isSimpleTable = all (all isSimpleCell) $ mconcat + [ headRows thead + , concatMap bodyRows tbodies + , footRows tfoot + ] + + relativeWidths = if defaultWidthsOnly + then replicate (length specs) + (1 / fromIntegral (length specs)) + else map toRelWidth widths + in if defaultWidthsOnly && isSimpleTable + then hcat $ map (literal . colAlign) aligns + else (cr <>) . nest 2 . vcat . map literal $ + zipWith (toColDescriptor (length specs)) + aligns + relativeWidths + where + toColDescriptor :: Int -> Alignment -> Double -> Text + toColDescriptor numcols align width = + T.pack $ printf + ">{%s\\arraybackslash}p{(\\columnwidth - %d\\tabcolsep) * \\real{%0.2f}}" + (T.unpack (alignCommand align)) + ((numcols - 1) * 2) + width + + isSimpleCell (Ann.Cell _ _ (Cell _attr _align _rowspan _colspan blocks)) = + case blocks of + [Para _] -> True + [Plain _] -> True + [] -> True + _ -> False + + toRelWidth ColWidthDefault = 0 + toRelWidth (ColWidth w) = w + +alignCommand :: Alignment -> Text +alignCommand = \case + AlignLeft -> "\\raggedright" + AlignRight -> "\\raggedleft" + AlignCenter -> "\\centering" + AlignDefault -> "\\raggedright" + +colAlign :: Alignment -> Text +colAlign = \case + AlignLeft -> "l" + AlignRight -> "r" + AlignCenter -> "c" + AlignDefault -> "l" + +data CaptionDocs = + CaptionDocs + { captionCommand :: Doc Text + , captionNotes :: Doc Text + } + +captionToLaTeX :: PandocMonad m + => ([Inline] -> LW m (Doc Text)) + -> Caption + -> LW m CaptionDocs +captionToLaTeX inlnsToLaTeX (Caption _maybeShort longCaption) = do + let caption = blocksToInlines longCaption + (captionText, captForLof, captNotes) <- getCaption inlnsToLaTeX False caption + return $ CaptionDocs + { captionNotes = captNotes + , captionCommand = if isEmpty captionText + then empty + else "\\caption" <> captForLof <> + braces captionText <> "\\tabularnewline" + } + +type BlocksWriter m = [Block] -> LW m (Doc Text) + +headToLaTeX :: PandocMonad m + => BlocksWriter m + -> Ann.TableHead + -> LW m (Doc Text) +headToLaTeX blocksWriter (Ann.TableHead _attr headerRows) = do + rowsContents <- mapM (rowToLaTeX blocksWriter HeaderCell . headerRowCells) + headerRows + return ("\\toprule" $$ vcat rowsContents $$ "\\midrule") + +-- | Converts a row of table cells into a LaTeX row. +rowToLaTeX :: PandocMonad m + => BlocksWriter m + -> CellType + -> [Ann.Cell] + -> LW m (Doc Text) +rowToLaTeX blocksWriter celltype row = do + cellsDocs <- mapM (cellToLaTeX blocksWriter celltype) (fillRow row) + return $ hsep (intersperse "&" cellsDocs) <> " \\\\ \\addlinespace" + +-- | Pads row with empty cells to adjust for rowspans above this row. +fillRow :: [Ann.Cell] -> [Ann.Cell] +fillRow = go 0 + where + go _ [] = [] + go n (acell@(Ann.Cell _spec (Ann.ColNumber colnum) cell):cells) = + let (Cell _ _ _ (ColSpan colspan) _) = cell + in map mkEmptyCell [n .. colnum - 1] ++ + acell : go (colnum + colspan) cells + + mkEmptyCell :: Int -> Ann.Cell + mkEmptyCell colnum = + Ann.Cell ((AlignDefault, ColWidthDefault):|[]) + (Ann.ColNumber colnum) + B.emptyCell + +isEmptyHead :: Ann.TableHead -> Bool +isEmptyHead (Ann.TableHead _attr []) = True +isEmptyHead (Ann.TableHead _attr rows) = all (null . headerRowCells) rows + +-- | Gets all cells in a header row. +headerRowCells :: Ann.HeaderRow -> [Ann.Cell] +headerRowCells (Ann.HeaderRow _attr _rownum cells) = cells + +-- | Gets all cells in a body row. +bodyRowCells :: Ann.BodyRow -> [Ann.Cell] +bodyRowCells (Ann.BodyRow _attr _rownum rowhead cells) = rowhead <> cells + +-- | Gets a list of rows of the table body, where a row is a simple +-- list of cells. +bodyRows :: Ann.TableBody -> [[Ann.Cell]] +bodyRows (Ann.TableBody _attr _rowheads headerRows rows) = + map headerRowCells headerRows <> map bodyRowCells rows + +-- | Gets a list of rows of the table head, where a row is a simple +-- list of cells. +headRows :: Ann.TableHead -> [[Ann.Cell]] +headRows (Ann.TableHead _attr rows) = map headerRowCells rows + +-- | Gets a list of rows from the foot, where a row is a simple list +-- of cells. +footRows :: Ann.TableFoot -> [[Ann.Cell]] +footRows (Ann.TableFoot _attr rows) = map headerRowCells rows -- For simple latex tables (without minipages or parboxes), -- we need to go to some lengths to get line breaks working: @@ -144,11 +239,14 @@ displayMathToInline :: Inline -> Inline displayMathToInline (Math DisplayMath x) = Math InlineMath x displayMathToInline x = x -tableCellToLaTeX :: PandocMonad m - => ([Block] -> LW m (Doc Text)) - -> Bool -> (Alignment, [Block]) - -> LW m (Doc Text) -tableCellToLaTeX blockListToLaTeX header (align, blocks) = do +cellToLaTeX :: PandocMonad m + => BlocksWriter m + -> CellType + -> Ann.Cell + -> LW m (Doc Text) +cellToLaTeX blockListToLaTeX celltype annotatedCell = do + let (Ann.Cell _specs _colnum cell) = annotatedCell + let (Cell _attr align rowspan colspan blocks) = cell beamer <- gets stBeamer externalNotes <- gets stExternalNotes inMinipage <- gets stInMinipage @@ -167,15 +265,30 @@ tableCellToLaTeX blockListToLaTeX header (align, blocks) = do modify $ \st -> st{ stInMinipage = True } cellContents <- blockListToLaTeX blocks modify $ \st -> st{ stInMinipage = inMinipage } - let valign = text $ if header then "[b]" else "[t]" - let halign = case align of - AlignLeft -> "\\raggedright" - AlignRight -> "\\raggedleft" - AlignCenter -> "\\centering" - AlignDefault -> "\\raggedright" + let valign = text $ case celltype of + HeaderCell -> "[b]" + BodyCell -> "[t]" + let halign = literal $ alignCommand align return $ "\\begin{minipage}" <> valign <> braces "\\linewidth" <> halign <> cr <> cellContents <> cr <> "\\end{minipage}" modify $ \st -> st{ stExternalNotes = externalNotes } - return result + when (rowspan /= RowSpan 1) $ + modify (\st -> st{ stMultiRow = True }) + let inMultiColumn x = case colspan of + (ColSpan 1) -> x + (ColSpan n) -> "\\multicolumn" + <> braces (literal (tshow n)) + <> braces (literal $ colAlign align) + <> braces x + let inMultiRow x = case rowspan of + (RowSpan 1) -> x + (RowSpan n) -> let nrows = literal (tshow n) + in "\\multirow" <> braces nrows + <> braces "*" <> braces x + return . inMultiColumn . inMultiRow $ result + +data CellType + = HeaderCell + | BodyCell diff --git a/src/Text/Pandoc/Writers/LaTeX/Types.hs b/src/Text/Pandoc/Writers/LaTeX/Types.hs index a76388729..d598794ad 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Types.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Types.hs @@ -31,6 +31,7 @@ data WriterState = -- be parameter , stVerbInNote :: Bool -- ^ true if document has verbatim text in note , stTable :: Bool -- ^ true if document has a table + , stMultiRow :: Bool -- ^ true if document has multirow cells , stStrikeout :: Bool -- ^ true if document has strikeout , stUrl :: Bool -- ^ true if document has visible URL link , stGraphics :: Bool -- ^ true if document contains images @@ -61,6 +62,7 @@ startingState options = , stOptions = options , stVerbInNote = False , stTable = False + , stMultiRow = False , stStrikeout = False , stUrl = False , stGraphics = False diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs index aca2d05d0..cf0396d0a 100644 --- a/test/Tests/Old.hs +++ b/test/Tests/Old.hs @@ -58,7 +58,7 @@ tests pandocPath = ] , testGroup "latex" [ testGroup "writer" - (writerTests' "latex" ++ lhsWriterTests' "latex") + (extWriterTests' "latex" ++ lhsWriterTests' "latex") , testGroup "reader" [ test' "basic" ["-r", "latex+raw_tex", "-w", "native", "-s"] "latex-reader.latex" "latex-reader.native" diff --git a/test/tables/nordics.latex b/test/tables/nordics.latex new file mode 100644 index 000000000..d1a0b4aed --- /dev/null +++ b/test/tables/nordics.latex @@ -0,0 +1,26 @@ +\begin{longtable}[]{@{} + >{\centering\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.30}} + >{\raggedright\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.30}} + >{\raggedright\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.20}} + >{\raggedright\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.20}}@{}} +\caption{States belonging to the \emph{Nordics.}}\tabularnewline +\toprule +Name & Capital & \vtop{\hbox{\strut Population}\hbox{\strut (in 2018)}} & +\vtop{\hbox{\strut Area}\hbox{\strut (in +km\textsuperscript{2})}} \\ \addlinespace +\midrule +\endfirsthead +\toprule +Name & Capital & \vtop{\hbox{\strut Population}\hbox{\strut (in 2018)}} & +\vtop{\hbox{\strut Area}\hbox{\strut (in +km\textsuperscript{2})}} \\ \addlinespace +\midrule +\endhead +Denmark & Copenhagen & 5,809,502 & 43,094 \\ \addlinespace +Finland & Helsinki & 5,537,364 & 338,145 \\ \addlinespace +Iceland & Reykjavik & 343,518 & 103,000 \\ \addlinespace +Norway & Oslo & 5,372,191 & 323,802 \\ \addlinespace +Sweden & Stockholm & 10,313,447 & 450,295 \\ \addlinespace +Total & & 27,376,022 & 1,258,336 \\ \addlinespace +\bottomrule +\end{longtable} diff --git a/test/tables/planets.latex b/test/tables/planets.latex new file mode 100644 index 000000000..9457b6821 --- /dev/null +++ b/test/tables/planets.latex @@ -0,0 +1,36 @@ +\begin{longtable}[]{@{}cclrrrrrrrrl@{}} +\caption{Data about the planets of our solar system.}\tabularnewline +\toprule +\multicolumn{2}{l}{} & Name & Mass (10\^{}24kg) & Diameter (km) & Density +(kg/m\^{}3) & Gravity (m/s\^{}2) & Length of day (hours) & Distance from Sun +(10\^{}6km) & Mean temperature (C) & Number of moons & Notes \\ \addlinespace +\midrule +\endfirsthead +\toprule +\multicolumn{2}{l}{} & Name & Mass (10\^{}24kg) & Diameter (km) & Density +(kg/m\^{}3) & Gravity (m/s\^{}2) & Length of day (hours) & Distance from Sun +(10\^{}6km) & Mean temperature (C) & Number of moons & Notes \\ \addlinespace +\midrule +\endhead +\multicolumn{2}{l}{\multirow{4}{*}{Terrestrial planets}} & Mercury & 0.330 & +4,879 & 5427 & 3.7 & 4222.6 & 57.9 & 167 & 0 & Closest to the +Sun \\ \addlinespace +& & Venus & 4.87 & 12,104 & 5243 & 8.9 & 2802.0 & 108.2 & 464 & 0 +& \\ \addlinespace +& & Earth & 5.97 & 12,756 & 5514 & 9.8 & 24.0 & 149.6 & 15 & 1 & Our +world \\ \addlinespace +& & Mars & 0.642 & 6,792 & 3933 & 3.7 & 24.7 & 227.9 & -65 & 2 & The red +planet \\ \addlinespace +\multirow{4}{*}{Jovian planets} & \multirow{2}{*}{Gas giants} & Jupiter & 1898 +& 142,984 & 1326 & 23.1 & 9.9 & 778.6 & -110 & 67 & The largest +planet \\ \addlinespace +& & Saturn & 568 & 120,536 & 687 & 9.0 & 10.7 & 1433.5 & -140 & 62 +& \\ \addlinespace +& \multirow{2}{*}{Ice giants} & Uranus & 86.8 & 51,118 & 1271 & 8.7 & 17.2 & +2872.5 & -195 & 27 & \\ \addlinespace +& & Neptune & 102 & 49,528 & 1638 & 11.0 & 16.1 & 4495.1 & -200 & 14 +& \\ \addlinespace +\multicolumn{2}{l}{Dwarf planets} & Pluto & 0.0146 & 2,370 & 2095 & 0.7 & +153.3 & 5906.4 & -225 & 5 & Declassified as a planet in 2006. \\ \addlinespace +\bottomrule +\end{longtable} diff --git a/test/tables/students.latex b/test/tables/students.latex new file mode 100644 index 000000000..680a539d3 --- /dev/null +++ b/test/tables/students.latex @@ -0,0 +1,23 @@ +\begin{longtable}[]{@{} + >{\raggedright\arraybackslash}p{(\columnwidth - 2\tabcolsep) * \real{0.50}} + >{\raggedright\arraybackslash}p{(\columnwidth - 2\tabcolsep) * \real{0.50}}@{}} +\caption{List of Students}\tabularnewline +\toprule +Student ID & Name \\ \addlinespace +\midrule +\endfirsthead +\toprule +Student ID & Name \\ \addlinespace +\midrule +\endhead +\multicolumn{2}{l}{Computer Science} \\ \addlinespace +3741255 & Jones, Martha \\ \addlinespace +4077830 & Pierce, Benjamin \\ \addlinespace +5151701 & Kirk, James \\ \addlinespace +\multicolumn{2}{l}{Russian Literature} \\ \addlinespace +3971244 & Nim, Victor \\ \addlinespace +\multicolumn{2}{l}{Astrophysics} \\ \addlinespace +4100332 & Petrov, Alexandra \\ \addlinespace +4100332 & Toyota, Hiroko \\ \addlinespace +\bottomrule +\end{longtable} -- cgit v1.2.3 From 9cbbf18fe18d740e12f564506079f3a2cb853be6 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 20 Dec 2020 22:33:26 -0800 Subject: HTML writer: don't include p tags in CSL bibliography entries. Fixes a regression in 2.11.3. Closes #6966 --- src/Text/Pandoc/Writers/HTML.hs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 76f17f77a..9f9a1013c 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -778,12 +778,17 @@ blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do classes' = case slideVariant of NoSlides -> classes _ -> filter (\k -> k /= "incremental" && k /= "nonincremental") classes + let paraToPlain (Para ils) = Plain ils + paraToPlain x = x + let bs' = if "csl-entry" `elem` classes' + then walk paraToPlain bs + else bs contents <- if "columns" `elem` classes' then -- we don't use blockListToHtml because it inserts -- a newline between the column divs, which throws -- off widths! see #4028 - mconcat <$> mapM (blockToHtml opts) bs - else blockListToHtml opts' bs + mconcat <$> mapM (blockToHtml opts) bs' + else blockListToHtml opts' bs' let contents' = nl opts >> contents >> nl opts let (divtag, classes'') = if html5 && "section" `elem` classes' then (H5.section, filter (/= "section") classes') -- cgit v1.2.3 From 47f435276a62b3eddac496de6a7e365631d57197 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 24 Dec 2020 09:56:20 -0800 Subject: Citeproc: fix handling of empty URL variables (`DOI`, etc.). The `linkifyVariables` function was changing these to links which then got treated as non-empty by citeproc, leading to wrong results (e.g. ignoring nonempty URL when empty DOI is present). Addresses part 2 of jgm/citeproc#41. --- src/Text/Pandoc/Citeproc.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index ca1ab9f96..e87ddcbcd 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -507,7 +507,9 @@ linkifyVariables ref = x'' = if "://" `T.isInfixOf` x' then x' else pref <> x' - in FancyVal (B.link x'' "" (B.str x')) + in if T.null x' + then x + else FancyVal (B.link x'' "" (B.str x')) extractText :: Val Inlines -> Text extractText (TextVal x) = x -- cgit v1.2.3 From dcd89413f3d4412c7a17b0b2f1019ea592a35858 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Sat, 26 Dec 2020 22:51:22 +0100 Subject: Powerpoint writer: allow arbitrary OOXML in raw inline elements The raw text is now included verbatim in the output. Previously is was parsed into XML elements, which prevented the inclusion of partial XML snippets. --- src/Text/Pandoc/Writers/Powerpoint/Output.hs | 49 +++++++++++++++------------- test/pptx/raw_ooxml.native | 2 +- 2 files changed, 28 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs index 603a84acc..8554db622 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs @@ -437,10 +437,10 @@ getContentShapeSize ns layout master getContentShapeSize _ _ _ = throwError $ PandocSomeError "Attempted to find content shape size in non-layout" -buildSpTree :: NameSpaces -> Element -> [Element] -> Element +buildSpTree :: NameSpaces -> Element -> [Content] -> Element buildSpTree ns spTreeElem newShapes = emptySpTreeElem { elContent = newContent } - where newContent = elContent emptySpTreeElem <> map Elem newShapes + where newContent = elContent emptySpTreeElem <> newShapes emptySpTreeElem = spTreeElem { elContent = filter fn (elContent spTreeElem) } fn :: Content -> Bool fn (Elem e) = isElem ns "p" "nvGrpSpPr" e || @@ -744,8 +744,8 @@ makePicElements layout picProps mInfo alt = do else return [picShape] -paraElemToElements :: PandocMonad m => ParaElem -> P m [Element] -paraElemToElements Break = return [mknode "a:br" [] ()] +paraElemToElements :: PandocMonad m => ParaElem -> P m [Content] +paraElemToElements Break = return [Elem $ mknode "a:br" [] ()] paraElemToElements (Run rpr s) = do sizeAttrs <- fontSizeAttributes rpr let attrs = sizeAttrs <> @@ -801,19 +801,20 @@ paraElemToElements (Run rpr s) = do let codeContents = [mknode "a:latin" [("typeface", T.unpack codeFont)] () | rPropCode rpr] let propContents = linkProps <> colorContents <> codeContents - return [mknode "a:r" [] [ mknode "a:rPr" attrs propContents - , mknode "a:t" [] $ T.unpack s - ]] + return [Elem $ mknode "a:r" [] [ mknode "a:rPr" attrs propContents + , mknode "a:t" [] $ T.unpack s + ]] paraElemToElements (MathElem mathType texStr) = do isInSpkrNotes <- asks envInSpeakerNotes if isInSpkrNotes then paraElemToElements $ Run def $ unTeXString texStr else do res <- convertMath writeOMML mathType (unTeXString texStr) case res of - Right r -> return [mknode "a14:m" [] $ addMathInfo r] + Right r -> return [Elem $ mknode "a14:m" [] $ addMathInfo r] Left (Str s) -> paraElemToElements (Run def s) Left _ -> throwError $ PandocShouldNeverHappenError "non-string math fallback" -paraElemToElements (RawOOXMLParaElem str) = return [ x | Elem x <- parseXML str ] +paraElemToElements (RawOOXMLParaElem str) = return + [Text (CData CDataRaw (T.unpack str) Nothing)] -- This is a bit of a kludge -- really requires adding an option to @@ -875,8 +876,9 @@ paragraphToElement par = do [mknode "a:buAutoNum" (autoNumAttrs attrs') ()] Nothing -> [mknode "a:buNone" [] ()] ) - paras <- concat <$> mapM paraElemToElements (paraElems par) - return $ mknode "a:p" [] $ [mknode "a:pPr" attrs props] <> paras + paras <- mapM paraElemToElements (paraElems par) + return $ mknode "a:p" [] $ + [Elem $ mknode "a:pPr" attrs props] <> concat paras shapeToElement :: PandocMonad m => Element -> Shape -> P m Element shapeToElement layout (TextBox paras) @@ -896,21 +898,22 @@ shapeToElement layout (TextBox paras) -- GraphicFrame and Pic should never reach this. shapeToElement _ _ = return $ mknode "p:sp" [] () -shapeToElements :: PandocMonad m => Element -> Shape -> P m [Element] +shapeToElements :: PandocMonad m => Element -> Shape -> P m [Content] shapeToElements layout (Pic picProps fp alt) = do mInfo <- registerMedia fp alt case mInfoExt mInfo of - Just _ -> + Just _ -> map Elem <$> makePicElements layout picProps mInfo alt Nothing -> shapeToElements layout $ TextBox [Paragraph def alt] -shapeToElements layout (GraphicFrame tbls cptn) = +shapeToElements layout (GraphicFrame tbls cptn) = map Elem <$> graphicFrameToElements layout tbls cptn -shapeToElements _ (RawOOXMLShape str) = return [ x | Elem x <- parseXML str ] +shapeToElements _ (RawOOXMLShape str) = return + [Text (CData CDataRaw (T.unpack str) Nothing)] shapeToElements layout shp = do element <- shapeToElement layout shp - return [element] + return [Elem element] -shapesToElements :: PandocMonad m => Element -> [Shape] -> P m [Element] +shapesToElements :: PandocMonad m => Element -> [Shape] -> P m [Content] shapesToElements layout shps = concat <$> mapM (shapeToElements layout) shps @@ -1083,7 +1086,7 @@ contentToElement layout hdrShape shapes , Just cSld <- findChild (elemName ns "p" "cSld") layout , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do element <- nonBodyTextToElement layout [PHType "title"] hdrShape - let hdrShapeElements = [element | not (null hdrShape)] + let hdrShapeElements = [Elem element | not (null hdrShape)] contentElements <- local (\env -> env {envContentType = NormalContent}) (shapesToElements layout shapes) @@ -1096,7 +1099,7 @@ twoColumnToElement layout hdrShape shapesL shapesR , Just cSld <- findChild (elemName ns "p" "cSld") layout , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do element <- nonBodyTextToElement layout [PHType "title"] hdrShape - let hdrShapeElements = [element | not (null hdrShape)] + let hdrShapeElements = [Elem element | not (null hdrShape)] contentElementsL <- local (\env -> env {envContentType =TwoColumnLeftContent}) (shapesToElements layout shapesL) @@ -1105,7 +1108,8 @@ twoColumnToElement layout hdrShape shapesL shapesR (shapesToElements layout shapesR) -- let contentElementsL' = map (setIdx ns "1") contentElementsL -- contentElementsR' = map (setIdx ns "2") contentElementsR - return $ buildSpTree ns spTree (hdrShapeElements <> contentElementsL <> contentElementsR) + return $ buildSpTree ns spTree $ + hdrShapeElements <> contentElementsL <> contentElementsR twoColumnToElement _ _ _ _= return $ mknode "p:sp" [] () @@ -1115,7 +1119,7 @@ titleToElement layout titleElems , Just cSld <- findChild (elemName ns "p" "cSld") layout , Just spTree <- findChild (elemName ns "p" "spTree") cSld = do element <- nonBodyTextToElement layout [PHType "title", PHType "ctrTitle"] titleElems - let titleShapeElements = [element | not (null titleElems)] + let titleShapeElements = [Elem element | not (null titleElems)] return $ buildSpTree ns spTree titleShapeElements titleToElement _ _ = return $ mknode "p:sp" [] () @@ -1135,7 +1139,8 @@ metadataToElement layout titleElems subtitleElems authorsElems dateElems dateShapeElements <- if null dateElems then return [] else sequence [nonBodyTextToElement layout [PHType "dt"] dateElems] - return $ buildSpTree ns spTree (titleShapeElements <> subtitleShapeElements <> dateShapeElements) + return . buildSpTree ns spTree . map Elem $ + (titleShapeElements <> subtitleShapeElements <> dateShapeElements) metadataToElement _ _ _ _ _ = return $ mknode "p:sp" [] () slideToElement :: PandocMonad m => Slide -> P m Element diff --git a/test/pptx/raw_ooxml.native b/test/pptx/raw_ooxml.native index aa86ad076..ae5bdd140 100644 --- a/test/pptx/raw_ooxml.native +++ b/test/pptx/raw_ooxml.native @@ -1,3 +1,3 @@ [Para [Str "Here",Space,Str "is",Space,Str "some",Space,Str "text,",Space,Str "written",Space,Str "as",Space,Str "a",Space,Str "raw",Space,Str "inline:",Space,RawInline (Format "openxml") "<a:r><a:rPr /><a:t>Here are examples of </a:t></a:r><a:r><a:rPr i=\"1\" /><a:t>italics</a:t></a:r><a:r><a:rPr /><a:t>, </a:t></a:r><a:r><a:rPr b=\"1\" /><a:t>bold</a:t></a:r>"] ,HorizontalRule -,RawBlock (Format "openxml") " <p:sp>\n <p:nvSpPr>\n <p:cNvPr id=\"3\" name=\"Content Placeholder 2\"/>\n <p:cNvSpPr>\n <a:spLocks noGrp=\"1\"/>\n </p:cNvSpPr>\n <p:nvPr>\n <p:ph idx=\"1\"/>\n </p:nvPr>\n </p:nvSpPr>\n <p:spPr/>\n <p:txBody>\n <a:bodyPr/>\n <a:lstStyle/>\n <a:p>\n <a:pPr lvl=\"1\"/>\n <a:r>\n <a:rPr/>\n <a:t>Bulleted bulleted lists.</a:t>\n </a:r>\n </a:p>\n <a:p>\n <a:pPr lvl=\"1\"/>\n <a:r>\n <a:rPr/>\n <a:t>And go to arbitrary depth.</a:t>\n </a:r>\n </a:p>\n <a:p>\n <a:pPr lvl=\"2\"/>\n <a:r>\n <a:rPr/>\n <a:t>Like this</a:t>\n </a:r>\n </a:p>\n <a:p>\n <a:pPr lvl=\"3\"/>\n <a:r>\n <a:rPr/>\n <a:t>Or this</a:t>\n </a:r>\n </a:p>\n <a:p>\n <a:pPr lvl=\"2\"/>\n <a:r>\n <a:rPr/>\n <a:t>Back to here.</a:t>\n </a:r>\n </a:p>\n </p:txBody>\n </p:sp>"] +,RawBlock (Format "openxml") "<p:sp>\n <p:nvSpPr>\n <p:cNvPr id=\"3\" name=\"Content Placeholder 2\" />\n <p:cNvSpPr>\n <a:spLocks noGrp=\"1\" />\n </p:cNvSpPr>\n <p:nvPr>\n <p:ph idx=\"1\" />\n </p:nvPr>\n </p:nvSpPr>\n <p:spPr />\n <p:txBody>\n <a:bodyPr />\n <a:lstStyle />\n <a:p>\n <a:pPr lvl=\"1\" />\n <a:r>\n <a:rPr />\n <a:t>Bulleted bulleted lists.</a:t>\n </a:r>\n </a:p>\n <a:p>\n <a:pPr lvl=\"1\" />\n <a:r>\n <a:rPr />\n <a:t>And go to arbitrary depth.</a:t>\n </a:r>\n </a:p>\n <a:p>\n <a:pPr lvl=\"2\" />\n <a:r>\n <a:rPr />\n <a:t>Like this</a:t>\n </a:r>\n </a:p>\n <a:p>\n <a:pPr lvl=\"3\" />\n <a:r>\n <a:rPr />\n <a:t>Or this</a:t>\n </a:r>\n </a:p>\n <a:p>\n <a:pPr lvl=\"2\" />\n <a:r>\n <a:rPr />\n <a:t>Back to here.</a:t>\n </a:r>\n </a:p>\n </p:txBody>\n </p:sp>"] -- cgit v1.2.3 From 668596cc89f8f6bddafd35fd031638bc0c416183 Mon Sep 17 00:00:00 2001 From: timo-a <timo-a@gmx.ch> Date: Mon, 28 Dec 2020 03:42:28 +0100 Subject: Add support for writing nested tables to asciidoc (#6972) Added field to WriterState that denotes the current nesting level for traversing tables. Depending on the value of that field nested tables are recognized and written. Asciidoc supports one level of nesting. If deeper tables are to be written, they are omitted and a warning is issued. --- src/Text/Pandoc/Writers/AsciiDoc.hs | 39 ++++++++++--- test/command/nested-table-to-asciidoc-6942.md | 82 +++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 7 deletions(-) create mode 100644 test/command/nested-table-to-asciidoc-6942.md (limited to 'src') diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index e742577b6..0a312d1d1 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -37,6 +37,7 @@ import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate) import Text.Pandoc.Writers.Shared + data WriterState = WriterState { defListMarker :: Text , orderedListLevel :: Int , bulletListLevel :: Int @@ -45,6 +46,10 @@ data WriterState = WriterState { defListMarker :: Text , asciidoctorVariant :: Bool , inList :: Bool , hasMath :: Bool + -- |0 is no table + -- 1 is top level table + -- 2 is a table in a table + , tableNestingLevel :: Int } defaultWriterState :: WriterState @@ -56,6 +61,7 @@ defaultWriterState = WriterState { defListMarker = "::" , asciidoctorVariant = False , inList = False , hasMath = False + , tableNestingLevel = 0 } -- | Convert Pandoc to AsciiDoc. @@ -194,7 +200,7 @@ blockToAsciiDoc opts (BlockQuote blocks) = do else contents let bar = text "____" return $ bar $$ chomp contents' $$ bar <> blankline -blockToAsciiDoc opts (Table _ blkCapt specs thead tbody tfoot) = do +blockToAsciiDoc opts block@(Table _ blkCapt specs thead tbody tfoot) = do let (caption, aligns, widths, headers, rows) = toLegacyTable blkCapt specs thead tbody tfoot caption' <- inlineListToAsciiDoc opts caption @@ -236,23 +242,42 @@ blockToAsciiDoc opts (Table _ blkCapt specs thead tbody tfoot) = do $ zipWith colspec aligns widths') <> text "," <> headerspec <> text "]" + + -- construct cells and recurse in case of nested tables + parentTableLevel <- gets tableNestingLevel + let currentNestingLevel = parentTableLevel + 1 + + modify $ \st -> st{ tableNestingLevel = currentNestingLevel } + + let separator = text (if parentTableLevel == 0 + then "|" -- top level separator + else "!") -- nested separator + let makeCell [Plain x] = do d <- blockListToAsciiDoc opts [Plain x] - return $ text "|" <> chomp d + return $ separator <> chomp d makeCell [Para x] = makeCell [Plain x] - makeCell [] = return $ text "|" - makeCell bs = do d <- blockListToAsciiDoc opts bs - return $ text "a|" $$ d + makeCell [] = return separator + makeCell bs = if currentNestingLevel == 2 + then do + --asciidoc only supports nesting once + report $ BlockNotRendered block + return separator + else do + d <- blockListToAsciiDoc opts bs + return $ (text "a" <> separator) $$ d + let makeRow cells = hsep `fmap` mapM makeCell cells rows' <- mapM makeRow rows head' <- makeRow headers + modify $ \st -> st{ tableNestingLevel = parentTableLevel } let head'' = if all null headers then empty else head' let colwidth = if writerWrapText opts == WrapAuto then writerColumns opts else 100000 let maxwidth = maximum $ map offset (head':rows') let body = if maxwidth > colwidth then vsep rows' else vcat rows' - let border = text "|===" - return $ + let border = separator <> text "===" + return $ caption'' $$ tablespec $$ border $$ head'' $$ body $$ border $$ blankline blockToAsciiDoc opts (BulletList items) = do inlist <- gets inList diff --git a/test/command/nested-table-to-asciidoc-6942.md b/test/command/nested-table-to-asciidoc-6942.md new file mode 100644 index 000000000..baf11fdf7 --- /dev/null +++ b/test/command/nested-table-to-asciidoc-6942.md @@ -0,0 +1,82 @@ +A table within a table should be convertet into a table within table + +``` +% pandoc -f html -t asciidoc +<!doctype html> +<html> +<head> +<meta charset="utf-8"> +<title> NestedTables </title> +</head> +<body> +<table> + <tr> + <td > + <table> <tr> <td> a1 </td> <td> a2 </td> </tr> </table> + </td> + <td>b</td> + </tr> + <tr> + <td>c</td> <td>d </td> + </tr> +</table> +</body> +</html> +^D +[width="100%",cols="50%,50%",] +|=== +a| +[cols=",",] +!=== +!a1 !a2 +!=== + +|b +|c |d +|=== +``` + +A table within a table within a table cannot be converted because asciidoc only +supports two levels of tables. +The table on level 3 is thus converted to level 2 and a warning is produced +``` +% pandoc -f html -t asciidoc --verbose +<!doctype html> +<html> +<head> +<meta charset="utf-8"> +<title> NestedTables </title> +</head> +<body> +<table> + <tr> + <td> + <table> <tr> + <td> a1 </td> + <td> + <table> <tr> <td> 1 </td> <td> 2 </td> </tr> </table> + </td> + </tr> </table> + </td> + <td>b</td> + </tr> + <tr> + <td>c</td> <td>d </td> + </tr> +</table> +</body> +</html> +^D +[INFO] Not rendering Table ("",[],[]) (Caption Nothing []) [(AlignDefault,ColWidth 0.5),(AlignDefault,ColWidth 0.5)] (TableHead ("",[],[]) []) [TableBody ("",[],[]) (RowHeadColumns 0) [] [Row ("",[],[]) [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) [Plain [Str "a1"]],Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) [Table ("",[],[]) (Caption Nothing []) [(AlignDefault,ColWidthDefault),(AlignDefault,ColWidthDefault)] (TableHead ("",[],[]) []) [TableBody ("",[],[]) (RowHeadColumns 0) [] [Row ("",[],[]) [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) [Plain [Str "1"]],Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) [Plain [Str "2"]]]]] (TableFoot ("",[],[]) [])]]]] (TableFoot ("",[],[]) []) +[width="100%",cols="50%,50%",] +|=== +a| +[width="100%",cols="50%,50%",] +!=== +!a1 ! +!=== + +|b +|c |d +|=== +``` -- cgit v1.2.3 From 99e1b67b74ddcbf7ffc8add640493f73471cf50a Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 27 Dec 2020 23:19:14 -0800 Subject: Use meta-description instead of description in templates. Since this is an attribute value, we need to prepare it in the writer. --- data/templates/default.html4 | 4 ++-- data/templates/default.html5 | 4 ++-- src/Text/Pandoc/Writers/HTML.hs | 3 +++ 3 files changed, 7 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/data/templates/default.html4 b/data/templates/default.html4 index deba57a7b..d54c48ee4 100644 --- a/data/templates/default.html4 +++ b/data/templates/default.html4 @@ -13,8 +13,8 @@ $endif$ $if(keywords)$ <meta name="keywords" content="$for(keywords)$$keywords$$sep$, $endfor$" /> $endif$ -$if(description)$ - <meta name="description" content="$description$" /> +$if(description-meta)$ + <meta name="description" content="$description-meta$" /> $endif$ <title>$if(title-prefix)$$title-prefix$ – $endif$$pagetitle$</title> <style type="text/css"> diff --git a/data/templates/default.html5 b/data/templates/default.html5 index dd7dc3212..9699b8504 100644 --- a/data/templates/default.html5 +++ b/data/templates/default.html5 @@ -13,8 +13,8 @@ $endif$ $if(keywords)$ <meta name="keywords" content="$for(keywords)$$keywords$$sep$, $endfor$" /> $endif$ -$if(description)$ - <meta name="description" content="$description$" /> +$if(description-meta)$ + <meta name="description" content="$description-meta$" /> $endif$ <title>$if(title-prefix)$$title-prefix$ – $endif$$pagetitle$</title> <style> diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 9f9a1013c..c93322953 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -254,6 +254,8 @@ pandocToHtml opts (Pandoc meta blocks) = do let stringifyHTML = escapeStringForXML . stringify let authsMeta = map stringifyHTML $ docAuthors meta let dateMeta = stringifyHTML $ docDate meta + let descriptionMeta = escapeStringForXML $ + lookupMetaString "description" meta slideVariant <- gets stSlideVariant let sects = adjustNumbers opts $ makeSections (writerNumberSections opts) Nothing $ @@ -352,6 +354,7 @@ pandocToHtml opts (Pandoc meta blocks) = do defField "author-meta" authsMeta . maybe id (defField "date-meta") (normalizeDate dateMeta) . + defField "description-meta" descriptionMeta . defField "pagetitle" (stringifyHTML . docTitle $ meta) . defField "idprefix" (writerIdentifierPrefix opts) . -- cgit v1.2.3 From e837ed772efe14a1c05880ec9fbe025862bb303a Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Mon, 28 Dec 2020 14:48:55 +0100 Subject: HTML reader: use renderTags' from Text.Pandoc.Shared. The `renderTags'` function was duplicated when the reader used `Text` as its string type. The duplication is no longer necessary. A side effect of this change is that empty `<col>` elements are written as self-closing tags in raw HTML blocks. --- src/Text/Pandoc/Readers/HTML.hs | 28 +++------------------------- 1 file changed, 3 insertions(+), 25 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index f8a17bb78..8e94a0812 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -61,8 +61,9 @@ import Text.Pandoc.Options ( ReaderOptions (readerExtensions, readerStripComments), extensionEnabled) import Text.Pandoc.Parsing hiding ((<|>)) -import Text.Pandoc.Shared (addMetaField, blocksToInlines', crFilter, escapeURI, - extractSpaces, htmlSpanLikeElements, safeRead, tshow) +import Text.Pandoc.Shared ( + addMetaField, blocksToInlines', crFilter, escapeURI, extractSpaces, + htmlSpanLikeElements, renderTags', safeRead, tshow) import Text.Pandoc.Walk import Text.Parsec.Error import Text.TeXMath (readMathML, writeTeX) @@ -1045,26 +1046,3 @@ canonicalizeUrl url = do return $ case (parseURIReference (T.unpack url), mbBaseHref) of (Just rel, Just bs) -> tshow (rel `nonStrictRelativeTo` bs) _ -> url - --- For now we need a special version here; the one in Shared has String type -renderTags' :: [Tag Text] -> Text -renderTags' = renderTagsOptions - renderOptions{ optMinimize = matchTags ["hr", "br", "img", - "meta", "link"] - , optRawTag = matchTags ["script", "style"] } - where matchTags tags = flip elem tags . T.toLower - - --- EPUB Specific --- --- -{- - -types :: [(String, ([String], Int))] -types = -- Document divisions - map (\s -> (s, (["section", "body"], 0))) - ["volume", "part", "chapter", "division"] - <> -- Document section and components - [ - ("abstract", ([], 0))] --} -- cgit v1.2.3 From 55f9b59af181f52f20f3e9eb8f9df3046d3cb536 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 28 Dec 2020 14:41:28 -0800 Subject: Docx writer: fix nested tables with captions. Previously we got unreadable content, because docx seems to want a `<w:p>` element (even an empty one) at the end of every table cell. Closes #6983. --- src/Text/Pandoc/Writers/Docx.hs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 0174a8501..13c4edb3c 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -1011,12 +1011,14 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do $ blockToOpenXML opts (Para caption) let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] () -- Table cells require a <w:p> element, even an empty one! - -- Not in the spec but in Word 2007, 2010. See #4953. + -- Not in the spec but in Word 2007, 2010. See #4953. And + -- apparently the last element must be a <w:p>, see #6983. let cellToOpenXML (al, cell) = do es <- withParaProp (alignmentFor al) $ blocksToOpenXML opts cell - return $ if any (\e -> qName (elName e) == "p") (onlyElems es) - then es - else es ++ [Elem $ mknode "w:p" [] ()] + return $ + case reverse (onlyElems es) of + e:_ | qName (elName e) == "p" -> es + _ -> es ++ [Elem $ mknode "w:p" [] ()] headers' <- mapM cellToOpenXML $ zip aligns headers rows' <- mapM (mapM cellToOpenXML . zip aligns) rows let borderProps = Elem $ mknode "w:tcPr" [] -- cgit v1.2.3 From 3cd21c5f6e1b9ddb1e6ff35d418f6df25c56cb63 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 29 Dec 2020 08:44:11 -0800 Subject: Improve fix to #6983. If we have a paragraph then a bookmarkEnd, we don't need to insert the empty paragraph (and in fact it alters the spacing). Closes #6983. --- src/Text/Pandoc/Writers/Docx.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 13c4edb3c..65946ec88 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -1017,7 +1017,9 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do es <- withParaProp (alignmentFor al) $ blocksToOpenXML opts cell return $ case reverse (onlyElems es) of - e:_ | qName (elName e) == "p" -> es + b:e:_ | qName (elName b) == "bookmarkEnd" + , qName (elName e) == "p" -> es + e:_ | qName (elName e) == "p" -> es _ -> es ++ [Elem $ mknode "w:p" [] ()] headers' <- mapM cellToOpenXML $ zip aligns headers rows' <- mapM (mapM cellToOpenXML . zip aligns) rows -- cgit v1.2.3 From 49286a25df24bd3bf6d5c20b47e819d83d6d8207 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 30 Dec 2020 13:36:18 -0800 Subject: Ms writer: don't justify inside table cells. --- src/Text/Pandoc/Writers/Ms.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs index 96914d3c6..0fc333bc2 100644 --- a/src/Text/Pandoc/Writers/Ms.hs +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -263,8 +263,10 @@ blockToMs opts (Table _ blkCapt specs thead tbody tfoot) = return $ makeRow cols) rows setFirstPara return $ literal ".PP" $$ caption' $$ + literal ".na" $$ -- we don't want justification in table cells literal ".TS" $$ literal "delim(@@) tab(\t);" $$ coldescriptions $$ - colheadings' $$ vcat body $$ literal ".TE" + colheadings' $$ vcat body $$ literal ".TE" $$ + literal ".ad" blockToMs opts (BulletList items) = do contents <- mapM (bulletListItemToMs opts) items -- cgit v1.2.3 From 419190213a7e6b07e2ac66f47d53e9f023af757d Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 30 Dec 2020 15:38:48 -0800 Subject: Hlint fixes --- src/Text/Pandoc/Readers/Txt2Tags.hs | 2 +- src/Text/Pandoc/Writers/Docx.hs | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index 474e4fac0..08083b177 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -564,7 +564,7 @@ getTarget = do _ -> "html" atStart :: T2T () -atStart = (sourceColumn <$> getPosition) >>= guard . (== 1) +atStart = getPosition >>= guard . (== 1) . sourceColumn ignoreSpacesCap :: T2T Text -> T2T Text ignoreSpacesCap p = T.toLower <$> (spaces *> p <* spaces) diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 65946ec88..1818ed81d 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -28,6 +28,7 @@ import Data.Char (isSpace, isLetter) import Data.List (intercalate, isPrefixOf, isSuffixOf) import Data.String (fromString) import qualified Data.Map as M +import Data.Either (fromRight) import Data.Maybe (fromMaybe, isNothing, mapMaybe, maybeToList) import qualified Data.Set as Set import qualified Data.Text as T @@ -1402,7 +1403,7 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do generateImgElt (ident, _, _, img) = let (xpt,ypt) = desiredSizeInPoints opts attr - (either (const def) id (imageSize opts img)) + (fromRight def (imageSize opts img)) -- 12700 emu = 1 pt (xemu,yemu) = fitToPage (xpt * 12700, ypt * 12700) (pageWidth * 12700) -- cgit v1.2.3 From 0782d5882c8c04fbdffcb7026457b3cc79150692 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 30 Dec 2020 16:04:09 -0800 Subject: Undo the "Use fromRight" hlint hint. --- .hlint.yaml | 4 ++++ src/Text/Pandoc/Writers/Docx.hs | 3 +-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/.hlint.yaml b/.hlint.yaml index 09fd9baf7..6b74014d4 100644 --- a/.hlint.yaml +++ b/.hlint.yaml @@ -104,5 +104,9 @@ - Text.Pandoc.Readers.Markdown - Text.Pandoc.Readers.RST +# fromRight is only in base >= 4.10 +- ignore: + name: "Use fromRight" + # Define some custom infix operators # - fixity: infixr 3 ~^#^~ diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 1818ed81d..65946ec88 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -28,7 +28,6 @@ import Data.Char (isSpace, isLetter) import Data.List (intercalate, isPrefixOf, isSuffixOf) import Data.String (fromString) import qualified Data.Map as M -import Data.Either (fromRight) import Data.Maybe (fromMaybe, isNothing, mapMaybe, maybeToList) import qualified Data.Set as Set import qualified Data.Text as T @@ -1403,7 +1402,7 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do generateImgElt (ident, _, _, img) = let (xpt,ypt) = desiredSizeInPoints opts attr - (fromRight def (imageSize opts img)) + (either (const def) id (imageSize opts img)) -- 12700 emu = 1 pt (xemu,yemu) = fitToPage (xpt * 12700, ypt * 12700) (pageWidth * 12700) -- cgit v1.2.3 From 23f964b9076f9795b00e740d20feebeb1d6d91db Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 30 Dec 2020 21:31:28 -0800 Subject: Mediawiki reader: allow space around storng/emph delimiters. Closes #6993. --- src/Text/Pandoc/Readers/MediaWiki.hs | 10 ++++------ test/command/6993.md | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+), 6 deletions(-) create mode 100644 test/command/6993.md (limited to 'src') diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index e8985ab2c..d712b1120 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -678,19 +678,17 @@ url = do -- | Parses a list of inlines between start and end delimiters. inlinesBetween :: (PandocMonad m, Show b) => MWParser m a -> MWParser m b -> MWParser m Inlines inlinesBetween start end = - trimInlines . mconcat <$> try (start >> many1Till inner end) - where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline) - innerSpace = try $ whitespace <* notFollowedBy' end + trimInlines . mconcat <$> try (start >> many1Till inline end) emph :: PandocMonad m => MWParser m Inlines emph = B.emph <$> nested (inlinesBetween start end) - where start = sym "''" >> lookAhead nonspaceChar + where start = sym "''" end = try $ notFollowedBy' (() <$ strong) >> sym "''" strong :: PandocMonad m => MWParser m Inlines strong = B.strong <$> nested (inlinesBetween start end) - where start = sym "'''" >> lookAhead nonspaceChar - end = try $ sym "'''" + where start = sym "'''" + end = sym "'''" doubleQuotes :: PandocMonad m => MWParser m Inlines doubleQuotes = do diff --git a/test/command/6993.md b/test/command/6993.md new file mode 100644 index 000000000..b7f7f5384 --- /dev/null +++ b/test/command/6993.md @@ -0,0 +1,21 @@ +``` +% pandoc -f mediawiki -t native +'''Should be bold ''' +^D +[Para [Strong [Str "Should",Space,Str "be",Space,Str "bold"]]] +``` + +``` +% pandoc -f mediawiki -t native +''' Should be bold''' +^D +[Para [Strong [Str "Should",Space,Str "be",Space,Str "bold"]]] +``` + +``` +% pandoc -f mediawiki -t native +'' Should be emph '' +^D +[Para [Emph [Str "Should",Space,Str "be",Space,Str "emph"]]] +``` + -- cgit v1.2.3 From 17e3efc785fa8b0680ec6d4ebaac1ea6bdb57e1a Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Fri, 1 Jan 2021 10:50:58 +0100 Subject: Org reader: restructure output of captioned code blocks The Div wrapper of code blocks with captions now has the class "captioned-content". The caption itself is added as a Plain block inside a Div of class "caption". This makes it easier to write filters which match on captioned code blocks. Existing filters will need to be updated. Closes: #6977 --- src/Text/Pandoc/Readers/Org/Blocks.hs | 26 ++++++++++++-------------- test/Tests/Readers/Org/Block/CodeBlock.hs | 6 +++--- 2 files changed, 15 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index f2e8b1ab6..17e3ff986 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -294,24 +294,22 @@ verseBlock blockType = try $ do codeBlock :: PandocMonad m => BlockAttributes -> Text -> OrgParser m (F Blocks) codeBlock blockAttrs blockType = do skipSpaces - (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) - content <- rawBlockContent blockType - resultsContent <- option mempty babelResultsBlock - let id' = fromMaybe mempty $ blockAttrName blockAttrs - let codeBlck = B.codeBlockWith ( id', classes, kv ) content - let labelledBlck = maybe (pure codeBlck) - (labelDiv codeBlck) - (blockAttrCaption blockAttrs) + (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) + content <- rawBlockContent blockType + resultsContent <- option mempty babelResultsBlock + let identifier = fromMaybe mempty $ blockAttrName blockAttrs + let codeBlk = B.codeBlockWith (identifier, classes, kv) content + let wrap = maybe pure addCaption (blockAttrCaption blockAttrs) return $ - (if exportsCode kv then labelledBlck else mempty) <> + (if exportsCode kv then wrap codeBlk else mempty) <> (if exportsResults kv then resultsContent else mempty) where - labelDiv :: Blocks -> F Inlines -> F Blocks - labelDiv blk value = - B.divWith nullAttr <$> (mappend <$> labelledBlock value <*> pure blk) + addCaption :: F Inlines -> Blocks -> F Blocks + addCaption caption blk = B.divWith ("", ["captioned-content"], []) + <$> (mkCaptionBlock caption <> pure blk) - labelledBlock :: F Inlines -> F Blocks - labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) + mkCaptionBlock :: F Inlines -> F Blocks + mkCaptionBlock = fmap (B.divWith ("", ["caption"], []) . B.plain) exportsResults :: [(Text, Text)] -> Bool exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports" diff --git a/test/Tests/Readers/Org/Block/CodeBlock.hs b/test/Tests/Readers/Org/Block/CodeBlock.hs index adf6661ca..2648a6e1f 100644 --- a/test/Tests/Readers/Org/Block/CodeBlock.hs +++ b/test/Tests/Readers/Org/Block/CodeBlock.hs @@ -185,10 +185,10 @@ tests = , "#+end_src" ] =?> divWith - nullAttr + ("", ["captioned-content"], [] ) (mappend - (plain $ spanWith ("", ["label"], []) - (spcSep [ "Functor", "laws", "in", "Haskell" ])) + (divWith ("", ["caption"], []) $ + plain (spcSep [ "Functor", "laws", "in", "Haskell" ])) (codeBlockWith ("functor-laws", ["haskell"], []) (T.unlines [ "fmap id = id" , "fmap (p . q) = (fmap p) . (fmap q)" -- cgit v1.2.3 From 9a18cf4b591364e7f0dc9b976628e91f8b31a6cf Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 2 Jan 2021 07:56:07 -0800 Subject: LaTeX writer: revert table line height increase in 2.11.3. In 2.11.3 we started adding `\addlinespace`, which produced less dense tables. This wasn't an intentional change; I misunderstood a comment in the discussion leading up to the change. This commit restores the earlier default table appearance. Note that if you want a less dense table, you can use something like `\def\arraystretch{1.5}` in your header. Closes #6996. --- src/Text/Pandoc/Writers/LaTeX/Table.hs | 2 +- test/command/2378.md | 6 ++-- test/command/5367.md | 6 ++-- test/tables.latex | 61 +++++++++++++++------------------- test/tables/nordics.latex | 18 +++++----- test/tables/planets.latex | 28 ++++++---------- test/tables/students.latex | 22 ++++++------ 7 files changed, 63 insertions(+), 80 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/LaTeX/Table.hs b/src/Text/Pandoc/Writers/LaTeX/Table.hs index 9dd66c8a3..8411d9f80 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Table.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Table.hs @@ -169,7 +169,7 @@ rowToLaTeX :: PandocMonad m -> LW m (Doc Text) rowToLaTeX blocksWriter celltype row = do cellsDocs <- mapM (cellToLaTeX blocksWriter celltype) (fillRow row) - return $ hsep (intersperse "&" cellsDocs) <> " \\\\ \\addlinespace" + return $ hsep (intersperse "&" cellsDocs) <> " \\\\" -- | Pads row with empty cells to adjust for rowspans above this row. fillRow :: [Ann.Cell] -> [Ann.Cell] diff --git a/test/command/2378.md b/test/command/2378.md index e81855367..59359c653 100644 --- a/test/command/2378.md +++ b/test/command/2378.md @@ -14,14 +14,14 @@ is used. \begin{longtable}[]{@{}ll@{}} \caption{a table}\tabularnewline \toprule -x & y\footnote{a footnote} \\ \addlinespace +x & y\footnote{a footnote} \\ \midrule \endfirsthead \toprule -x & y{} \\ \addlinespace +x & y{} \\ \midrule \endhead -1 & 2 \\ \addlinespace +1 & 2 \\ \bottomrule \end{longtable} ``` diff --git a/test/command/5367.md b/test/command/5367.md index 4dbcca771..2d3a5e52e 100644 --- a/test/command/5367.md +++ b/test/command/5367.md @@ -24,14 +24,14 @@ hello\footnote{doc footnote} >{\centering\arraybackslash}p{(\columnwidth - 0\tabcolsep) * \real{0.17}}@{}} \caption[Sample table.]{Sample table.\footnote{caption footnote}}\tabularnewline \toprule -Fruit\footnote{header footnote} \\ \addlinespace +Fruit\footnote{header footnote} \\ \midrule \endfirsthead \toprule -Fruit{} \\ \addlinespace +Fruit{} \\ \midrule \endhead -Bans\footnote{table cell footnote} \\ \addlinespace +Bans\footnote{table cell footnote} \\ \bottomrule \end{longtable} diff --git a/test/tables.latex b/test/tables.latex index 38c571ba6..afa14d845 100644 --- a/test/tables.latex +++ b/test/tables.latex @@ -3,16 +3,16 @@ Simple table with caption: \begin{longtable}[]{@{}rlcl@{}} \caption{Demonstration of simple table syntax.}\tabularnewline \toprule -Right & Left & Center & Default \\ \addlinespace +Right & Left & Center & Default \\ \midrule \endfirsthead \toprule -Right & Left & Center & Default \\ \addlinespace +Right & Left & Center & Default \\ \midrule \endhead -12 & 12 & 12 & 12 \\ \addlinespace -123 & 123 & 123 & 123 \\ \addlinespace -1 & 1 & 1 & 1 \\ \addlinespace +12 & 12 & 12 & 12 \\ +123 & 123 & 123 & 123 \\ +1 & 1 & 1 & 1 \\ \bottomrule \end{longtable} @@ -20,12 +20,12 @@ Simple table without caption: \begin{longtable}[]{@{}rlcl@{}} \toprule -Right & Left & Center & Default \\ \addlinespace +Right & Left & Center & Default \\ \midrule \endhead -12 & 12 & 12 & 12 \\ \addlinespace -123 & 123 & 123 & 123 \\ \addlinespace -1 & 1 & 1 & 1 \\ \addlinespace +12 & 12 & 12 & 12 \\ +123 & 123 & 123 & 123 \\ +1 & 1 & 1 & 1 \\ \bottomrule \end{longtable} @@ -34,16 +34,16 @@ Simple table indented two spaces: \begin{longtable}[]{@{}rlcl@{}} \caption{Demonstration of simple table syntax.}\tabularnewline \toprule -Right & Left & Center & Default \\ \addlinespace +Right & Left & Center & Default \\ \midrule \endfirsthead \toprule -Right & Left & Center & Default \\ \addlinespace +Right & Left & Center & Default \\ \midrule \endhead -12 & 12 & 12 & 12 \\ \addlinespace -123 & 123 & 123 & 123 \\ \addlinespace -1 & 1 & 1 & 1 \\ \addlinespace +12 & 12 & 12 & 12 \\ +123 & 123 & 123 & 123 \\ +1 & 1 & 1 & 1 \\ \bottomrule \end{longtable} @@ -56,19 +56,15 @@ Multiline table with caption: >{\raggedright\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.35}}@{}} \caption{Here's the caption. It may span multiple lines.}\tabularnewline \toprule -Centered Header & Left Aligned & Right Aligned & Default -aligned \\ \addlinespace +Centered Header & Left Aligned & Right Aligned & Default aligned \\ \midrule \endfirsthead \toprule -Centered Header & Left Aligned & Right Aligned & Default -aligned \\ \addlinespace +Centered Header & Left Aligned & Right Aligned & Default aligned \\ \midrule \endhead -First & row & 12.0 & Example of a row that spans multiple -lines. \\ \addlinespace -Second & row & 5.0 & Here's another one. Note the blank line between -rows. \\ \addlinespace +First & row & 12.0 & Example of a row that spans multiple lines. \\ +Second & row & 5.0 & Here's another one. Note the blank line between rows. \\ \bottomrule \end{longtable} @@ -80,14 +76,11 @@ Multiline table without caption: >{\raggedleft\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.16}} >{\raggedright\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.35}}@{}} \toprule -Centered Header & Left Aligned & Right Aligned & Default -aligned \\ \addlinespace +Centered Header & Left Aligned & Right Aligned & Default aligned \\ \midrule \endhead -First & row & 12.0 & Example of a row that spans multiple -lines. \\ \addlinespace -Second & row & 5.0 & Here's another one. Note the blank line between -rows. \\ \addlinespace +First & row & 12.0 & Example of a row that spans multiple lines. \\ +Second & row & 5.0 & Here's another one. Note the blank line between rows. \\ \bottomrule \end{longtable} @@ -96,9 +89,9 @@ Table without column headers: \begin{longtable}[]{@{}rlcr@{}} \toprule \endhead -12 & 12 & 12 & 12 \\ \addlinespace -123 & 123 & 123 & 123 \\ \addlinespace -1 & 1 & 1 & 1 \\ \addlinespace +12 & 12 & 12 & 12 \\ +123 & 123 & 123 & 123 \\ +1 & 1 & 1 & 1 \\ \bottomrule \end{longtable} @@ -111,9 +104,7 @@ Multiline table without column headers: >{\raggedright\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.35}}@{}} \toprule \endhead -First & row & 12.0 & Example of a row that spans multiple -lines. \\ \addlinespace -Second & row & 5.0 & Here's another one. Note the blank line between -rows. \\ \addlinespace +First & row & 12.0 & Example of a row that spans multiple lines. \\ +Second & row & 5.0 & Here's another one. Note the blank line between rows. \\ \bottomrule \end{longtable} diff --git a/test/tables/nordics.latex b/test/tables/nordics.latex index d1a0b4aed..1b5929bad 100644 --- a/test/tables/nordics.latex +++ b/test/tables/nordics.latex @@ -6,21 +6,19 @@ \caption{States belonging to the \emph{Nordics.}}\tabularnewline \toprule Name & Capital & \vtop{\hbox{\strut Population}\hbox{\strut (in 2018)}} & -\vtop{\hbox{\strut Area}\hbox{\strut (in -km\textsuperscript{2})}} \\ \addlinespace +\vtop{\hbox{\strut Area}\hbox{\strut (in km\textsuperscript{2})}} \\ \midrule \endfirsthead \toprule Name & Capital & \vtop{\hbox{\strut Population}\hbox{\strut (in 2018)}} & -\vtop{\hbox{\strut Area}\hbox{\strut (in -km\textsuperscript{2})}} \\ \addlinespace +\vtop{\hbox{\strut Area}\hbox{\strut (in km\textsuperscript{2})}} \\ \midrule \endhead -Denmark & Copenhagen & 5,809,502 & 43,094 \\ \addlinespace -Finland & Helsinki & 5,537,364 & 338,145 \\ \addlinespace -Iceland & Reykjavik & 343,518 & 103,000 \\ \addlinespace -Norway & Oslo & 5,372,191 & 323,802 \\ \addlinespace -Sweden & Stockholm & 10,313,447 & 450,295 \\ \addlinespace -Total & & 27,376,022 & 1,258,336 \\ \addlinespace +Denmark & Copenhagen & 5,809,502 & 43,094 \\ +Finland & Helsinki & 5,537,364 & 338,145 \\ +Iceland & Reykjavik & 343,518 & 103,000 \\ +Norway & Oslo & 5,372,191 & 323,802 \\ +Sweden & Stockholm & 10,313,447 & 450,295 \\ +Total & & 27,376,022 & 1,258,336 \\ \bottomrule \end{longtable} diff --git a/test/tables/planets.latex b/test/tables/planets.latex index 9457b6821..8238c43f3 100644 --- a/test/tables/planets.latex +++ b/test/tables/planets.latex @@ -3,34 +3,28 @@ \toprule \multicolumn{2}{l}{} & Name & Mass (10\^{}24kg) & Diameter (km) & Density (kg/m\^{}3) & Gravity (m/s\^{}2) & Length of day (hours) & Distance from Sun -(10\^{}6km) & Mean temperature (C) & Number of moons & Notes \\ \addlinespace +(10\^{}6km) & Mean temperature (C) & Number of moons & Notes \\ \midrule \endfirsthead \toprule \multicolumn{2}{l}{} & Name & Mass (10\^{}24kg) & Diameter (km) & Density (kg/m\^{}3) & Gravity (m/s\^{}2) & Length of day (hours) & Distance from Sun -(10\^{}6km) & Mean temperature (C) & Number of moons & Notes \\ \addlinespace +(10\^{}6km) & Mean temperature (C) & Number of moons & Notes \\ \midrule \endhead \multicolumn{2}{l}{\multirow{4}{*}{Terrestrial planets}} & Mercury & 0.330 & -4,879 & 5427 & 3.7 & 4222.6 & 57.9 & 167 & 0 & Closest to the -Sun \\ \addlinespace -& & Venus & 4.87 & 12,104 & 5243 & 8.9 & 2802.0 & 108.2 & 464 & 0 -& \\ \addlinespace -& & Earth & 5.97 & 12,756 & 5514 & 9.8 & 24.0 & 149.6 & 15 & 1 & Our -world \\ \addlinespace +4,879 & 5427 & 3.7 & 4222.6 & 57.9 & 167 & 0 & Closest to the Sun \\ +& & Venus & 4.87 & 12,104 & 5243 & 8.9 & 2802.0 & 108.2 & 464 & 0 & \\ +& & Earth & 5.97 & 12,756 & 5514 & 9.8 & 24.0 & 149.6 & 15 & 1 & Our world \\ & & Mars & 0.642 & 6,792 & 3933 & 3.7 & 24.7 & 227.9 & -65 & 2 & The red -planet \\ \addlinespace +planet \\ \multirow{4}{*}{Jovian planets} & \multirow{2}{*}{Gas giants} & Jupiter & 1898 -& 142,984 & 1326 & 23.1 & 9.9 & 778.6 & -110 & 67 & The largest -planet \\ \addlinespace -& & Saturn & 568 & 120,536 & 687 & 9.0 & 10.7 & 1433.5 & -140 & 62 -& \\ \addlinespace +& 142,984 & 1326 & 23.1 & 9.9 & 778.6 & -110 & 67 & The largest planet \\ +& & Saturn & 568 & 120,536 & 687 & 9.0 & 10.7 & 1433.5 & -140 & 62 & \\ & \multirow{2}{*}{Ice giants} & Uranus & 86.8 & 51,118 & 1271 & 8.7 & 17.2 & -2872.5 & -195 & 27 & \\ \addlinespace -& & Neptune & 102 & 49,528 & 1638 & 11.0 & 16.1 & 4495.1 & -200 & 14 -& \\ \addlinespace +2872.5 & -195 & 27 & \\ +& & Neptune & 102 & 49,528 & 1638 & 11.0 & 16.1 & 4495.1 & -200 & 14 & \\ \multicolumn{2}{l}{Dwarf planets} & Pluto & 0.0146 & 2,370 & 2095 & 0.7 & -153.3 & 5906.4 & -225 & 5 & Declassified as a planet in 2006. \\ \addlinespace +153.3 & 5906.4 & -225 & 5 & Declassified as a planet in 2006. \\ \bottomrule \end{longtable} diff --git a/test/tables/students.latex b/test/tables/students.latex index 680a539d3..87efb0851 100644 --- a/test/tables/students.latex +++ b/test/tables/students.latex @@ -3,21 +3,21 @@ >{\raggedright\arraybackslash}p{(\columnwidth - 2\tabcolsep) * \real{0.50}}@{}} \caption{List of Students}\tabularnewline \toprule -Student ID & Name \\ \addlinespace +Student ID & Name \\ \midrule \endfirsthead \toprule -Student ID & Name \\ \addlinespace +Student ID & Name \\ \midrule \endhead -\multicolumn{2}{l}{Computer Science} \\ \addlinespace -3741255 & Jones, Martha \\ \addlinespace -4077830 & Pierce, Benjamin \\ \addlinespace -5151701 & Kirk, James \\ \addlinespace -\multicolumn{2}{l}{Russian Literature} \\ \addlinespace -3971244 & Nim, Victor \\ \addlinespace -\multicolumn{2}{l}{Astrophysics} \\ \addlinespace -4100332 & Petrov, Alexandra \\ \addlinespace -4100332 & Toyota, Hiroko \\ \addlinespace +\multicolumn{2}{l}{Computer Science} \\ +3741255 & Jones, Martha \\ +4077830 & Pierce, Benjamin \\ +5151701 & Kirk, James \\ +\multicolumn{2}{l}{Russian Literature} \\ +3971244 & Nim, Victor \\ +\multicolumn{2}{l}{Astrophysics} \\ +4100332 & Petrov, Alexandra \\ +4100332 & Toyota, Hiroko \\ \bottomrule \end{longtable} -- cgit v1.2.3 From 260aaaacc640cc458259e292c42e15c3d2f34b3f Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 2 Jan 2021 08:19:00 -0800 Subject: LaTeX reader: put contents of unknown environments in a Div... when `raw_tex` is not enabled. (When `raw_tex` is enabled, the whole environment is parsed as a raw block.) The class name is the name of the environment. Previously, we just included the contents without the surrounding Div, but having a record of the environment's boundaries and name can be useful. Closes #6997. --- src/Text/Pandoc/Readers/LaTeX.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index afe960454..d9fe0f502 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1835,7 +1835,7 @@ rawEnv name = do report $ SkippedContent beginCommand pos1 pos2 <- getPosition report $ SkippedContent ("\\end{" <> name <> "}") pos2 - return bs + return $ divWith ("",[name],[]) bs rawVerbEnv :: PandocMonad m => Text -> LP m Blocks rawVerbEnv name = do -- cgit v1.2.3 From 57b10941521d9db808e3892ea9893878065b7c50 Mon Sep 17 00:00:00 2001 From: Dimitri Sabadie <dimitri.sabadie@gmail.com> Date: Sun, 3 Jan 2021 08:57:47 +0100 Subject: Org reader: mark verbatim code with class "verbatim". (#6998) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Replace org-mode’s verbatim from code to codeWith. This adds the `"verbatim"` class so that exporters can apply a specific style on it. For instance, it will be possible for HTML to add a CSS rule for code + verbatim class. * Alter test for org-mode’s verbatim change. See previous commit for further detail on the new implementation. --- src/Text/Pandoc/Readers/Org/Inlines.hs | 2 +- test/Tests/Readers/Org/Inline.hs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 0330cf55f..b819a34a3 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -572,7 +572,7 @@ underline :: PandocMonad m => OrgParser m (F Inlines) underline = fmap B.underline <$> emphasisBetween '_' verbatim :: PandocMonad m => OrgParser m (F Inlines) -verbatim = return . B.code <$> verbatimBetween '=' +verbatim = return . B.codeWith ("", ["verbatim"], []) <$> verbatimBetween '=' code :: PandocMonad m => OrgParser m (F Inlines) code = return . B.code <$> verbatimBetween '~' diff --git a/test/Tests/Readers/Org/Inline.hs b/test/Tests/Readers/Org/Inline.hs index 9edd328c3..b5d0caa26 100644 --- a/test/Tests/Readers/Org/Inline.hs +++ b/test/Tests/Readers/Org/Inline.hs @@ -56,7 +56,7 @@ tests = , "Verbatim" =: "=Robot.rock()=" =?> - para (code "Robot.rock()") + para (codeWith ("", ["verbatim"], []) "Robot.rock()") , "Code" =: "~word for word~" =?> @@ -190,7 +190,7 @@ tests = ]) , "Verbatim text can contain equal signes (=)" =: "=is_subst = True=" =?> - para (code "is_subst = True") + para (codeWith ("", ["verbatim"], []) "is_subst = True") , testGroup "Images" [ "Image" =: -- cgit v1.2.3 From 21ee2d80c25d6e7fe3be00b58de553b638c90abb Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 3 Jan 2021 11:27:01 -0800 Subject: EPUB writer: adjust internal links to images, links, and tables... after splitting into chapters. Previously we only did this for Div and Span and Header elements. See #7000. --- src/Text/Pandoc/Writers/EPUB.hs | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 5867b7c80..a24a872ef 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -557,6 +557,10 @@ pandocToEPUB version opts doc = do let extractLinkURL' :: Int -> Inline -> [(TS.Text, TS.Text)] extractLinkURL' num (Span (ident, _, _) _) | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] + extractLinkURL' num (Link (ident, _, _) _ _) + | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] + extractLinkURL' num (Image (ident, _, _) _ _) + | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] extractLinkURL' _ _ = [] let extractLinkURL :: Int -> Block -> [(TS.Text, TS.Text)] @@ -564,6 +568,8 @@ pandocToEPUB version opts doc = do | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] extractLinkURL num (Header _ (ident, _, _) _) | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] + extractLinkURL num (Table (ident,_,_) _ _ _ _ _) + | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] extractLinkURL num b = query (extractLinkURL' num) b let reftable = concat $ zipWith (\(Chapter bs) num -> -- cgit v1.2.3 From f04e02d8d5ff893475c129aaf69ee3f175bf8d8f Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 3 Jan 2021 11:35:36 -0800 Subject: EPUB writer: recognize `Format "html4"`, `Format "html5"` as raw HTML. --- src/Text/Pandoc/Writers/EPUB.hs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index a24a872ef..6222b0690 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -1059,11 +1059,17 @@ getMediaNextNewName ext = do let nextName = "file" ++ show nextId ++ ext (P.fetchItem (TS.pack nextName) >> getMediaNextNewName ext) `catchError` const (return nextName) +isHtmlFormat :: Format -> Bool +isHtmlFormat (Format "html") = True +isHtmlFormat (Format "html4") = True +isHtmlFormat (Format "html5") = True +isHtmlFormat _ = False + transformBlock :: PandocMonad m => Block -> E m Block transformBlock (RawBlock fmt raw) - | fmt == Format "html" = do + | isHtmlFormat fmt = do let tags = parseTags raw tags' <- mapM transformTag tags return $ RawBlock fmt (renderTags' tags') @@ -1083,7 +1089,7 @@ transformInline opts x@(Math t m) return $ Span ("",["math",mathclass],[]) [Image nullAttr [x] ("../" <> newsrc, "")] transformInline _opts (RawInline fmt raw) - | fmt == Format "html" = do + | isHtmlFormat fmt = do let tags = parseTags raw tags' <- mapM transformTag tags return $ RawInline fmt (renderTags' tags') -- cgit v1.2.3 From 1ce7db1fa67b6a9f1d354d377e3bfa548d854ee9 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 4 Jan 2021 11:38:18 -0800 Subject: EPUB writer: adjust internal links to identifiers... defined in raw HTML sections after splitting into chapters. Closes #7000. --- src/Text/Pandoc/Writers/EPUB.hs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 6222b0690..c8f5ff60d 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -561,6 +561,16 @@ pandocToEPUB version opts doc = do | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] extractLinkURL' num (Image (ident, _, _) _ _) | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] + extractLinkURL' num (RawInline fmt raw) + | isHtmlFormat fmt + = foldr (\tag -> + case tag of + TagOpen{} -> + case fromAttrib "id" tag of + "" -> id + x -> ((x, TS.pack (showChapter num) <> "#" <> x):) + _ -> id) + [] (parseTags raw) extractLinkURL' _ _ = [] let extractLinkURL :: Int -> Block -> [(TS.Text, TS.Text)] @@ -570,6 +580,16 @@ pandocToEPUB version opts doc = do | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] extractLinkURL num (Table (ident,_,_) _ _ _ _ _) | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] + extractLinkURL num (RawBlock fmt raw) + | isHtmlFormat fmt + = foldr (\tag -> + case tag of + TagOpen{} -> + case fromAttrib "id" tag of + "" -> id + x -> ((x, TS.pack (showChapter num) <> "#" <> x):) + _ -> id) + [] (parseTags raw) extractLinkURL num b = query (extractLinkURL' num) b let reftable = concat $ zipWith (\(Chapter bs) num -> -- cgit v1.2.3 From ea479bf28a4031f408af12ea92d3e19f9a838820 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 4 Jan 2021 14:05:03 -0800 Subject: LaTeX reader: handle filecontents environment. Closes #7003. --- src/Text/Pandoc/Readers/LaTeX.hs | 32 +++++++++++++++++++++------ src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 2 ++ test/command/7003.md | 37 ++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 6 deletions(-) create mode 100644 test/command/7003.md (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index d9fe0f502..14a41a911 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1219,6 +1219,7 @@ preamble :: PandocMonad m => LP m Blocks preamble = mconcat <$> many preambleBlock where preambleBlock = (mempty <$ spaces1) <|> macroDef (rawBlock "latex") + <|> filecontents <|> (mempty <$ blockCommand) <|> (mempty <$ braced) <|> (do notFollowedBy (begin_ "document") @@ -1272,6 +1273,16 @@ include name = do mapM_ (insertIncluded defaultExt) fs return mempty +readFileFromTexinputs :: PandocMonad m => FilePath -> LP m (Maybe Text) +readFileFromTexinputs fp = do + fileContentsMap <- sFileContents <$> getState + case M.lookup (T.pack fp) fileContentsMap of + Just t -> return (Just t) + Nothing -> do + dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." + <$> lookupEnv "TEXINPUTS" + readFileFromDirs dirs fp + insertIncluded :: PandocMonad m => FilePath -> FilePath @@ -1281,13 +1292,12 @@ insertIncluded defaultExtension f' = do ".tex" -> f' ".sty" -> f' _ -> addExtension f' defaultExtension - dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." <$> lookupEnv "TEXINPUTS" pos <- getPosition containers <- getIncludeFiles <$> getState when (T.pack f `elem` containers) $ throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos updateState $ addIncludeFile $ T.pack f - mbcontents <- readFileFromDirs dirs f + mbcontents <- readFileFromTexinputs f contents <- case mbcontents of Just s -> return s Nothing -> do @@ -1695,6 +1705,18 @@ environments = M.fromList , ("iftoggle", try $ ifToggle >> block) ] +filecontents :: PandocMonad m => LP m Blocks +filecontents = try $ do + controlSeq "begin" + name <- untokenize <$> braced + guard $ name == "filecontents" || name == "filecontents*" + skipopts + fp <- untokenize <$> braced + txt <- verbEnv name + updateState $ \st -> + st{ sFileContents = M.insert fp txt (sFileContents st) } + return mempty + theoremstyle :: PandocMonad m => LP m Blocks theoremstyle = do stylename <- untokenize <$> braced @@ -1894,8 +1916,7 @@ inputMinted = do pos <- getPosition attr <- mintedAttr f <- T.filter (/='"') . untokenize <$> braced - dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." <$> lookupEnv "TEXINPUTS" - mbCode <- readFileFromDirs dirs (T.unpack f) + mbCode <- readFileFromTexinputs (T.unpack f) rawcode <- case mbCode of Just s -> return s Nothing -> do @@ -1981,8 +2002,7 @@ inputListing = do pos <- getPosition options <- option [] keyvals f <- T.filter (/='"') . untokenize <$> braced - dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." <$> lookupEnv "TEXINPUTS" - mbCode <- readFileFromDirs dirs (T.unpack f) + mbCode <- readFileFromTexinputs (T.unpack f) codeLines <- case mbCode of Just s -> return $ T.lines s Nothing -> do diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index 563d32883..e92ed387c 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -152,6 +152,7 @@ data LaTeXState = LaTeXState{ sOptions :: ReaderOptions , sHasChapters :: Bool , sToggles :: M.Map Text Bool , sExpanded :: Bool + , sFileContents :: M.Map Text Text } deriving Show @@ -177,6 +178,7 @@ defaultLaTeXState = LaTeXState{ sOptions = def , sHasChapters = False , sToggles = M.empty , sExpanded = False + , sFileContents = M.empty } instance PandocMonad m => HasQuoteContext LaTeXState m where diff --git a/test/command/7003.md b/test/command/7003.md new file mode 100644 index 000000000..1f6275943 --- /dev/null +++ b/test/command/7003.md @@ -0,0 +1,37 @@ +``` +% pandoc -f latex -t native +\documentclass{article} +\usepackage{listings} + +\lstset{basicstyle=\ttfamily} + +\begin{filecontents*}[overwrite]{example.tex} +\documentclass{article} +\begin{document} +\section{Bar} +This a Bar section +\end{document} +\end{filecontents*} + +\begin{document} + +\section{With lstlisting environment} + +\begin{lstlisting} +\documentclass{article} +\begin{document} +\section{Foo} +This a Foo section +\end{document} +\end{lstlisting} + +\section{With lstinputlisting command} + +\lstinputlisting{example.tex} +\end{document} +^D +[Header 1 ("with-lstlisting-environment",[],[]) [Str "With",Space,Str "lstlisting",Space,Str "environment"] +,CodeBlock ("",[],[]) "\\documentclass{article}\n\\begin{document}\n\\section{Foo}\nThis a Foo section\n\\end{document}" +,Header 1 ("with-lstinputlisting-command",[],[]) [Str "With",Space,Str "lstinputlisting",Space,Str "command"] +,CodeBlock ("",["latex"],[]) "\\documentclass{article}\n\\begin{document}\n\\section{Bar}\nThis a Bar section\n\\end{document}"] +``` -- cgit v1.2.3 From 385b6a3b215124fd2dfa044b8847d69a6cf14a73 Mon Sep 17 00:00:00 2001 From: David Martschenko <62178322+davidmrt98@users.noreply.github.com> Date: Tue, 5 Jan 2021 19:15:59 +0100 Subject: Implement defaults file inheritance (#6924) Allow defaults files to inherit options from other defaults files by specifying them with the following syntax: `defaults: [list of defaults files or single defaults file]`. --- MANUAL.txt | 10 +++ pandoc.cabal | 7 ++ src/Text/Pandoc/App/CommandLineOptions.hs | 36 +++----- src/Text/Pandoc/App/Opt.hs | 136 ++++++++++++++++++++++++++++-- test/command/defaults-inheritance-1.md | 6 ++ test/command/defaults-inheritance-2.md | 5 ++ test/command/defaults-inheritance-3.md | 6 ++ test/command/defaults3.yaml | 4 + test/command/defaults4.yaml | 3 + test/command/defaults5.yaml | 2 + test/command/defaults6.yaml | 2 + test/command/defaults7.yaml | 2 + test/command/defaults8.yaml | 2 + test/command/defaults9.yaml | 1 + 14 files changed, 189 insertions(+), 33 deletions(-) create mode 100644 test/command/defaults-inheritance-1.md create mode 100644 test/command/defaults-inheritance-2.md create mode 100644 test/command/defaults-inheritance-3.md create mode 100644 test/command/defaults3.yaml create mode 100644 test/command/defaults4.yaml create mode 100644 test/command/defaults5.yaml create mode 100644 test/command/defaults6.yaml create mode 100644 test/command/defaults7.yaml create mode 100644 test/command/defaults8.yaml create mode 100644 test/command/defaults9.yaml (limited to 'src') diff --git a/MANUAL.txt b/MANUAL.txt index 53cd54e5f..831ffb0b0 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -1507,6 +1507,16 @@ input-files: - content.md # or you may use input-file: with a single value +# Include options from the specified defaults files. +# The files will be searched for first in the working directory +# and then in the defaults subdirectory of the user data directory. +# The files are included in the same order in which they appear in +# the list. Options specified in this defaults file always have +# priority over the included ones. +defaults: +- defsA +- defsB + template: letter standalone: true self-contained: false diff --git a/pandoc.cabal b/pandoc.cabal index 794eef91d..cce3c1a58 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -216,6 +216,13 @@ extra-source-files: test/command/01.csv test/command/defaults1.yaml test/command/defaults2.yaml + test/command/defaults3.yaml + test/command/defaults4.yaml + test/command/defaults5.yaml + test/command/defaults6.yaml + test/command/defaults7.yaml + test/command/defaults8.yaml + test/command/defaults9.yaml test/command/3533-rst-csv-tables.csv test/command/3880.txt test/command/5182.txt diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs index 906fcc4c0..21ee47b7b 100644 --- a/src/Text/Pandoc/App/CommandLineOptions.hs +++ b/src/Text/Pandoc/App/CommandLineOptions.hs @@ -25,6 +25,7 @@ module Text.Pandoc.App.CommandLineOptions ( import Control.Monad import Control.Monad.Trans import Control.Monad.Except (throwError) +import Control.Monad.State.Strict import Data.Aeson.Encode.Pretty (encodePretty', Config(..), keyOrder, defConfig, Indent(..), NumberFormat(..)) import Data.Bifunctor (second) @@ -46,10 +47,12 @@ import System.FilePath import System.IO (stdout) import Text.DocTemplates (Context (..), ToContext (toVal), Val (..)) import Text.Pandoc -import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), IpynbOutput (..), addMeta) +import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), IpynbOutput (..), + DefaultsState (..), addMeta, applyDefaults, + fullDefaultsPath) import Text.Pandoc.Filter (Filter (..)) import Text.Pandoc.Highlighting (highlightingStyles) -import Text.Pandoc.Shared (ordNub, elemText, safeStrRead, defaultUserDataDirs, findM) +import Text.Pandoc.Shared (ordNub, elemText, safeStrRead, defaultUserDataDirs) import Text.Printf #ifdef EMBED_DATA_FILES @@ -64,7 +67,6 @@ import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as B import qualified Data.Map as M import qualified Data.Text as T -import qualified Data.YAML as Y import qualified Text.Pandoc.UTF8 as UTF8 parseOptions :: [OptDescr (Opt -> IO Opt)] -> Opt -> IO Opt @@ -166,7 +168,11 @@ options = , Option "d" ["defaults"] (ReqArg - (\arg opt -> applyDefaults opt arg + (\arg opt -> runIOorExplode $ do + let defsState = DefaultsState { curDefaults = Nothing, + inheritanceGraph = [] } + fp <- fullDefaultsPath (optDataDir opt) arg + evalStateT (applyDefaults opt fp) defsState ) "FILE") "" @@ -1012,28 +1018,6 @@ writersNames = sort splitField :: String -> (String, String) splitField = second (tailDef "true") . break (`elemText` ":=") --- | Apply defaults from --defaults file. -applyDefaults :: Opt -> FilePath -> IO Opt -applyDefaults opt file = runIOorExplode $ do - let fp = if null (takeExtension file) - then addExtension file "yaml" - else file - setVerbosity $ optVerbosity opt - dataDirs <- liftIO defaultUserDataDirs - let fps = fp : case optDataDir opt of - Nothing -> map (</> ("defaults" </> fp)) - dataDirs - Just dd -> [dd </> "defaults" </> fp] - fp' <- fromMaybe fp <$> findM fileExists fps - inp <- readFileLazy fp' - case Y.decode1 inp of - Right (f :: Opt -> Opt) -> return $ f opt - Left (errpos, errmsg) -> throwError $ - PandocParseError $ T.pack $ - "Error parsing " ++ fp' ++ " line " ++ - show (Y.posLine errpos) ++ " column " ++ - show (Y.posColumn errpos) ++ ":\n" ++ errmsg - lookupHighlightStyle :: PandocMonad m => String -> m Style lookupHighlightStyle s | takeExtension s == ".theme" = -- attempt to load KDE theme diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs index 00b4b5523..6dd19758e 100644 --- a/src/Text/Pandoc/App/Opt.hs +++ b/src/Text/Pandoc/App/Opt.hs @@ -20,10 +20,17 @@ module Text.Pandoc.App.Opt ( Opt(..) , LineEnding (..) , IpynbOutput (..) + , DefaultsState (..) , defaultOpts , addMeta + , applyDefaults + , fullDefaultsPath ) where +import Control.Monad.Except (MonadIO, liftIO, throwError, (>=>), foldM) +import Control.Monad.State.Strict (StateT, modify, gets) +import System.FilePath ( addExtension, (</>), takeExtension ) import Data.Char (isLower, toLower) +import Data.Maybe (fromMaybe) import GHC.Generics hiding (Meta) import Text.Pandoc.Builder (setMeta) import Text.Pandoc.Filter (Filter (..)) @@ -34,7 +41,9 @@ import Text.Pandoc.Options (TopLevelDivision (TopLevelDefault), ReferenceLocation (EndOfDocument), ObfuscationMethod (NoObfuscation), CiteMethod (Citeproc)) -import Text.Pandoc.Shared (camelCaseStrToHyphenated) +import Text.Pandoc.Class (readFileLazy, fileExists, setVerbosity, PandocMonad) +import Text.Pandoc.Error (PandocError (PandocParseError, PandocSomeError)) +import Text.Pandoc.Shared (camelCaseStrToHyphenated, defaultUserDataDirs, findM, ordNub) import qualified Text.Pandoc.Parsing as P import Text.Pandoc.Readers.Metadata (yamlMap) import Text.Pandoc.Class.PandocPure @@ -150,16 +159,77 @@ data Opt = Opt } deriving (Generic, Show) instance FromYAML (Opt -> Opt) where - parseYAML (Mapping _ _ m) = - foldr (.) id <$> mapM doOpt (M.toList m) + parseYAML (Mapping _ _ m) = chain doOpt (M.toList m) parseYAML n = failAtNode n "Expected a mapping" +data DefaultsState = DefaultsState + { + curDefaults :: Maybe FilePath -- currently parsed file + , inheritanceGraph :: [[FilePath]] -- defaults file inheritance graph + } deriving (Show) + +instance (PandocMonad m, MonadIO m) + => FromYAML (Opt -> StateT DefaultsState m Opt) where + parseYAML (Mapping _ _ m) = do + let opts = M.mapKeys toText m + dataDir <- case M.lookup "data-dir" opts of + Nothing -> return Nothing + Just v -> Just . unpack <$> parseYAML v + f <- parseOptions $ M.toList m + case M.lookup "defaults" opts of + Just v -> do + g <- parseDefaults v dataDir + return $ g >=> f + Nothing -> return f + where + toText (Scalar _ (SStr s)) = s + toText _ = "" + parseYAML n = failAtNode n "Expected a mapping" + +parseDefaults :: (PandocMonad m, MonadIO m) + => Node Pos + -> Maybe FilePath + -> Parser (Opt -> StateT DefaultsState m Opt) +parseDefaults n dataDir = parseDefsNames n >>= \ds -> return $ \o -> do + -- get parent defaults: + defsParent <- gets $ fromMaybe "" . curDefaults + -- get child defaults: + defsChildren <- mapM (fullDefaultsPath dataDir) ds + -- expand parent in defaults inheritance graph by children: + defsGraph <- gets inheritanceGraph + let defsGraphExp = expand defsGraph defsChildren defsParent + modify $ \defsState -> defsState{ inheritanceGraph = defsGraphExp } + -- check for cyclic inheritance: + if cyclic defsGraphExp + then throwError $ + PandocSomeError $ T.pack $ + "Error: Circular defaults file reference in " ++ + "'" ++ defsParent ++ "'" + else foldM applyDefaults o defsChildren + where parseDefsNames x = (parseYAML x >>= \xs -> return $ map unpack xs) + <|> (parseYAML x >>= \x' -> return [unpack x']) + +parseOptions :: Monad m + => [(Node Pos, Node Pos)] + -> Parser (Opt -> StateT DefaultsState m Opt) +parseOptions ns = do + f <- chain doOpt' ns + return $ return . f + +chain :: Monad m => (a -> m (b -> b)) -> [a] -> m (b -> b) +chain f = foldM g id + where g o n = f n >>= \o' -> return $ o' . o + +doOpt' :: (Node Pos, Node Pos) -> Parser (Opt -> Opt) +doOpt' (k',v) = do + k <- parseStringKey k' + case k of + "defaults" -> return id + _ -> doOpt (k',v) + doOpt :: (Node Pos, Node Pos) -> Parser (Opt -> Opt) doOpt (k',v) = do - k <- case k' of - Scalar _ (SStr t) -> return t - Scalar _ _ -> failAtNode k' "Non-string key" - _ -> failAtNode k' "Non-scalar key" + k <- parseStringKey k' case k of "tab-stop" -> parseYAML v >>= \x -> return (\o -> o{ optTabStop = x }) @@ -494,6 +564,12 @@ defaultOpts = Opt , optStripComments = False } +parseStringKey :: Node Pos -> Parser Text +parseStringKey k = case k of + Scalar _ (SStr t) -> return t + Scalar _ _ -> failAtNode k "Non-string key" + _ -> failAtNode k "Non-scalar key" + yamlToMeta :: Node Pos -> Parser Meta yamlToMeta (Mapping _ _ m) = either (fail . show) return $ runEverything (yamlMap pMetaString m) @@ -524,6 +600,52 @@ readMetaValue s | s == "FALSE" = MetaBool False | otherwise = MetaString $ T.pack s +-- | Apply defaults from --defaults file. +applyDefaults :: (PandocMonad m, MonadIO m) + => Opt + -> FilePath + -> StateT DefaultsState m Opt +applyDefaults opt file = do + setVerbosity $ optVerbosity opt + modify $ \defsState -> defsState{ curDefaults = Just file } + inp <- readFileLazy file + case decode1 inp of + Right f -> f opt + Left (errpos, errmsg) -> throwError $ + PandocParseError $ T.pack $ + "Error parsing " ++ file ++ " line " ++ + show (posLine errpos) ++ " column " ++ + show (posColumn errpos) ++ ":\n" ++ errmsg + +fullDefaultsPath :: (PandocMonad m, MonadIO m) + => Maybe FilePath + -> FilePath + -> m FilePath +fullDefaultsPath dataDir file = do + let fp = if null (takeExtension file) + then addExtension file "yaml" + else file + dataDirs <- liftIO defaultUserDataDirs + let fps = fp : case dataDir of + Nothing -> map (</> ("defaults" </> fp)) + dataDirs + Just dd -> [dd </> "defaults" </> fp] + fromMaybe fp <$> findM fileExists fps + +-- | In a list of lists, append another list in front of every list which +-- starts with specific element. +expand :: Ord a => [[a]] -> [a] -> a -> [[a]] +expand [] ns n = fmap (\x -> x : [n]) ns +expand ps ns n = concatMap (ext n ns) ps + where + ext x xs p = case p of + (l : _) | x == l -> fmap (: p) xs + _ -> [p] + +cyclic :: Ord a => [[a]] -> Bool +cyclic = any hasDuplicate + where + hasDuplicate xs = length (ordNub xs) /= length xs -- see https://github.com/jgm/pandoc/pull/4083 -- using generic deriving caused long compilation times diff --git a/test/command/defaults-inheritance-1.md b/test/command/defaults-inheritance-1.md new file mode 100644 index 000000000..0760e2ec0 --- /dev/null +++ b/test/command/defaults-inheritance-1.md @@ -0,0 +1,6 @@ +``` +% pandoc -d command/defaults3 +# Header +^D +# Header +``` diff --git a/test/command/defaults-inheritance-2.md b/test/command/defaults-inheritance-2.md new file mode 100644 index 000000000..8b26a2613 --- /dev/null +++ b/test/command/defaults-inheritance-2.md @@ -0,0 +1,5 @@ +``` +% pandoc -d command/defaults6 +^D +Error: Circular defaults file reference in 'command/defaults7.yaml' +``` diff --git a/test/command/defaults-inheritance-3.md b/test/command/defaults-inheritance-3.md new file mode 100644 index 000000000..81cac7baa --- /dev/null +++ b/test/command/defaults-inheritance-3.md @@ -0,0 +1,6 @@ +``` +% pandoc -d command/defaults8 +<h1>Header</h1> +^D +# Header +``` diff --git a/test/command/defaults3.yaml b/test/command/defaults3.yaml new file mode 100644 index 000000000..d8b6f9144 --- /dev/null +++ b/test/command/defaults3.yaml @@ -0,0 +1,4 @@ +defaults: + - command/defaults4 + - command/defaults5 +to: markdown diff --git a/test/command/defaults4.yaml b/test/command/defaults4.yaml new file mode 100644 index 000000000..a6caa985b --- /dev/null +++ b/test/command/defaults4.yaml @@ -0,0 +1,3 @@ +from: html +defaults: + - command/defaults5 diff --git a/test/command/defaults5.yaml b/test/command/defaults5.yaml new file mode 100644 index 000000000..bb48b9708 --- /dev/null +++ b/test/command/defaults5.yaml @@ -0,0 +1,2 @@ +from: markdown +to: html diff --git a/test/command/defaults6.yaml b/test/command/defaults6.yaml new file mode 100644 index 000000000..ac4a819e5 --- /dev/null +++ b/test/command/defaults6.yaml @@ -0,0 +1,2 @@ +defaults: + - command/defaults7 diff --git a/test/command/defaults7.yaml b/test/command/defaults7.yaml new file mode 100644 index 000000000..19ca8f09e --- /dev/null +++ b/test/command/defaults7.yaml @@ -0,0 +1,2 @@ +defaults: + - command/defaults6 diff --git a/test/command/defaults8.yaml b/test/command/defaults8.yaml new file mode 100644 index 000000000..e418d33b2 --- /dev/null +++ b/test/command/defaults8.yaml @@ -0,0 +1,2 @@ +from: html +defaults: command/defaults9 diff --git a/test/command/defaults9.yaml b/test/command/defaults9.yaml new file mode 100644 index 000000000..d732eb066 --- /dev/null +++ b/test/command/defaults9.yaml @@ -0,0 +1 @@ +to: markdown -- cgit v1.2.3 From 15ba184e6e7825898ebae5a59e2c3a60e0992da0 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 5 Jan 2021 12:07:02 -0800 Subject: HTML writer: fix implicit_figure at end of footnotes. Closes #7006. --- src/Text/Pandoc/Writers/HTML.hs | 10 +++++++--- test/command/7006.md | 20 ++++++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) create mode 100644 test/command/7006.md (limited to 'src') diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index c93322953..0b051c8f0 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -1455,11 +1455,15 @@ blockListToNote opts ref blocks = do else let lastBlock = last blocks otherBlocks = init blocks in case lastBlock of - (Para lst) -> otherBlocks ++ + Para [Image _ _ (_,tit)] + | "fig:" `T.isPrefixOf` tit + -> otherBlocks ++ [lastBlock, + Plain backlink] + Para lst -> otherBlocks ++ [Para (lst ++ backlink)] - (Plain lst) -> otherBlocks ++ + Plain lst -> otherBlocks ++ [Plain (lst ++ backlink)] - _ -> otherBlocks ++ [lastBlock, + _ -> otherBlocks ++ [lastBlock, Plain backlink] contents <- blockListToHtml opts blocks' let noteItem = H.li ! prefixedId opts ("fn" <> ref) $ contents diff --git a/test/command/7006.md b/test/command/7006.md new file mode 100644 index 000000000..e7951fb1a --- /dev/null +++ b/test/command/7006.md @@ -0,0 +1,20 @@ +``` +% pandoc -t html +Test.[^fn] + +[^fn]: Foo: + +  +^D +<p>Test.<a href="#fn1" class="footnote-ref" id="fnref1" role="doc-noteref"><sup>1</sup></a></p> +<section class="footnotes" role="doc-endnotes"> +<hr /> +<ol> +<li id="fn1" role="doc-endnote"><p>Foo:</p> +<figure> +<img src="/image.jpg" alt="Caption." /><figcaption aria-hidden="true">Caption.</figcaption> +</figure> +<a href="#fnref1" class="footnote-back" role="doc-backlink">↩︎</a></li> +</ol> +</section> +``` -- cgit v1.2.3 From c0d8b186d142eb5e9f845de0a4ebcadd04c32dce Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 7 Jan 2021 11:06:25 -0800 Subject: T.P.Parsing: modify gridTableWith' for headerless tables. If the table lacks a header, the header row should be an empty list. Previously we got a list of empty cells, which caused an empty header to be emitted instead of no header. In LaTeX/PDF output that meant we got a double top line with space between. @tarleb @despres - please let me know if this is problematic for some reason I'm not grasping. --- src/Text/Pandoc/Parsing.hs | 22 +++++++++++----------- test/command/3516.md | 6 +----- test/command/5708.md | 6 +----- test/markdown-reader-more.native | 38 +++++--------------------------------- test/rst-reader.native | 24 +++--------------------- test/tables-rstsubset.native | 10 +--------- 6 files changed, 22 insertions(+), 84 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 979344f63..fd14341ad 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -1069,24 +1069,24 @@ gridTableHeader :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st => Bool -- ^ Headerless table -> ParserT s st m (mf Blocks) -> ParserT s st m (mf [Blocks], [Alignment], [Int]) -gridTableHeader headless blocks = try $ do +gridTableHeader True _ = do optional blanklines dashes <- gridDashedLines '-' - rawContent <- if headless - then return $ repeat "" - else many1 - (notFollowedBy (gridTableSep '=') >> char '|' >> + let aligns = map snd dashes + let lines' = map (snd . fst) dashes + let indices = scanl (+) 0 lines' + return (return [], aligns, indices) +gridTableHeader False blocks = try $ do + optional blanklines + dashes <- gridDashedLines '-' + rawContent <- many1 (notFollowedBy (gridTableSep '=') >> char '|' >> T.pack <$> many1Till anyChar newline) - underDashes <- if headless - then return dashes - else gridDashedLines '=' + underDashes <- gridDashedLines '=' guard $ length dashes == length underDashes let lines' = map (snd . fst) underDashes let indices = scanl (+) 0 lines' let aligns = map snd underDashes - let rawHeads = if headless - then replicate (length underDashes) "" - else map (T.unlines . map trim) $ transpose + let rawHeads = map (T.unlines . map trim) $ transpose $ map (gridTableSplitLine indices) rawContent heads <- sequence <$> mapM (parseFromString' blocks . trim) rawHeads return (heads, aligns, indices) diff --git a/test/command/3516.md b/test/command/3516.md index 1cb805afe..dc8770bad 100644 --- a/test/command/3516.md +++ b/test/command/3516.md @@ -29,11 +29,7 @@ on Windows builds. [(AlignDefault,ColWidth 5.555555555555555e-2) ,(AlignDefault,ColWidth 5.555555555555555e-2)] (TableHead ("",[],[]) - [Row ("",[],[]) - [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - []]]) + []) [(TableBody ("",[],[]) (RowHeadColumns 0) [] [Row ("",[],[]) diff --git a/test/command/5708.md b/test/command/5708.md index e4b99e4db..e5ba72c69 100644 --- a/test/command/5708.md +++ b/test/command/5708.md @@ -9,11 +9,7 @@ [(AlignDefault,ColWidth 0.125) ,(AlignDefault,ColWidth 0.2361111111111111)] (TableHead ("",[],[]) - [Row ("",[],[]) - [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - []]]) + []) [(TableBody ("",[],[]) (RowHeadColumns 0) [] [Row ("",[],[]) diff --git a/test/markdown-reader-more.native b/test/markdown-reader-more.native index 5ac4facb8..b4a16f7cf 100644 --- a/test/markdown-reader-more.native +++ b/test/markdown-reader-more.native @@ -134,13 +134,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "Author",S ,(AlignDefault,ColWidth 0.16666666666666666) ,(AlignDefault,ColWidth 0.18055555555555555)] (TableHead ("",[],[]) - [Row ("",[],[]) - [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - []]]) + []) [(TableBody ("",[],[]) (RowHeadColumns 0) [] [Row ("",[],[]) @@ -198,13 +192,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "Author",S ,(AlignLeft,ColWidth 0.16666666666666666) ,(AlignCenter,ColWidth 0.18055555555555555)] (TableHead ("",[],[]) - [Row ("",[],[]) - [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - []]]) + []) [(TableBody ("",[],[]) (RowHeadColumns 0) [] [Row ("",[],[]) @@ -230,13 +218,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "Author",S ,(AlignDefault,ColWidth 0.16666666666666666) ,(AlignDefault,ColWidth 0.18055555555555555)] (TableHead ("",[],[]) - [Row ("",[],[]) - [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - []]]) + []) [(TableBody ("",[],[]) (RowHeadColumns 0) [] [Row ("",[],[]) @@ -262,13 +244,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "Author",S ,(AlignDefault,ColWidth 0.16666666666666666) ,(AlignDefault,ColWidth 0.18055555555555555)] (TableHead ("",[],[]) - [Row ("",[],[]) - [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - []]]) + []) [(TableBody ("",[],[]) (RowHeadColumns 0) [] [Row ("",[],[]) @@ -300,11 +276,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "Author",S [(AlignDefault,ColWidth 5.555555555555555e-2) ,(AlignDefault,ColWidth 5.555555555555555e-2)] (TableHead ("",[],[]) - [Row ("",[],[]) - [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - []]]) + []) [(TableBody ("",[],[]) (RowHeadColumns 0) [] [Row ("",[],[]) diff --git a/test/rst-reader.native b/test/rst-reader.native index 93855cc00..9f8d5ab3e 100644 --- a/test/rst-reader.native +++ b/test/rst-reader.native @@ -346,13 +346,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,(AlignDefault,ColWidth 0.15) ,(AlignDefault,ColWidth 0.1625)] (TableHead ("",[],[]) - [Row ("",[],[]) - [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - []]]) + []) [(TableBody ("",[],[]) (RowHeadColumns 0) [] [Row ("",[],[]) @@ -378,13 +372,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,(AlignDefault,ColWidth 0.15) ,(AlignDefault,ColWidth 0.1625)] (TableHead ("",[],[]) - [Row ("",[],[]) - [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - []]]) + []) [(TableBody ("",[],[]) (RowHeadColumns 0) [] [Row ("",[],[]) @@ -410,13 +398,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa ,(AlignDefault,ColWidth 0.15) ,(AlignDefault,ColWidth 0.1625)] (TableHead ("",[],[]) - [Row ("",[],[]) - [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - []]]) + []) [(TableBody ("",[],[]) (RowHeadColumns 0) [] [Row ("",[],[]) diff --git a/test/tables-rstsubset.native b/test/tables-rstsubset.native index b584bb7ea..2536ff954 100644 --- a/test/tables-rstsubset.native +++ b/test/tables-rstsubset.native @@ -276,15 +276,7 @@ ,(AlignDefault,ColWidth 0.15) ,(AlignDefault,ColWidth 0.3375)] (TableHead ("",[],[]) - [Row ("",[],[]) - [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - []]]) + []) [(TableBody ("",[],[]) (RowHeadColumns 0) [] [Row ("",[],[]) -- cgit v1.2.3 From 327e1428c5cdcb62beef6d2e00ab6f1d699256f0 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 7 Jan 2021 16:41:25 -0800 Subject: gfm/commonmark writer: implement start number on ordered lists. Previously they always started at 1, but according to the spec the start number is respected. Closes #7009. --- src/Text/Pandoc/Writers/Markdown.hs | 5 ++++- test/command/7009.md | 8 ++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 test/command/7009.md (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index c349fd713..1b5c00468 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -674,7 +674,10 @@ blockToMarkdown' opts (BulletList items) = do contents <- inList $ mapM (bulletListItemToMarkdown opts) items return $ (if isTightList items then vcat else vsep) contents <> blankline blockToMarkdown' opts (OrderedList (start,sty,delim) items) = do - let start' = if isEnabled Ext_startnum opts then start else 1 + variant <- asks envVariant + let start' = if variant == Commonmark || isEnabled Ext_startnum opts + then start + else 1 let sty' = if isEnabled Ext_fancy_lists opts then sty else DefaultStyle let delim' = if isEnabled Ext_fancy_lists opts then delim else DefaultDelim let attribs = (start', sty', delim') diff --git a/test/command/7009.md b/test/command/7009.md new file mode 100644 index 000000000..a060c6076 --- /dev/null +++ b/test/command/7009.md @@ -0,0 +1,8 @@ +``` +% pandoc -t gfm +3. a +4. b +^D +3. a +4. b +``` -- cgit v1.2.3 From 4f3434586743afb69f00ca91fe6ec9b68b39ae7e Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Fri, 8 Jan 2021 18:38:20 +0100 Subject: Update copyright notices for 2021 (#7012) --- COPYRIGHT | 26 ++++++++++++------------- MANUAL.txt | 2 +- README.md | 2 +- README.template | 2 +- app/pandoc.hs | 2 +- benchmark/benchmark-pandoc.hs | 2 +- benchmark/weigh-pandoc.hs | 2 +- pandoc.cabal | 2 +- src/Text/Pandoc.hs | 2 +- src/Text/Pandoc/App.hs | 2 +- src/Text/Pandoc/App/CommandLineOptions.hs | 4 ++-- src/Text/Pandoc/App/FormatHeuristics.hs | 2 +- src/Text/Pandoc/App/Opt.hs | 2 +- src/Text/Pandoc/App/OutputSettings.hs | 2 +- src/Text/Pandoc/Asciify.hs | 2 +- src/Text/Pandoc/BCP47.hs | 2 +- src/Text/Pandoc/CSS.hs | 2 +- src/Text/Pandoc/CSV.hs | 2 +- src/Text/Pandoc/Data.hs | 2 +- src/Text/Pandoc/Error.hs | 2 +- src/Text/Pandoc/Extensions.hs | 2 +- src/Text/Pandoc/Filter.hs | 2 +- src/Text/Pandoc/Filter/JSON.hs | 2 +- src/Text/Pandoc/Filter/Lua.hs | 2 +- src/Text/Pandoc/Filter/Path.hs | 2 +- src/Text/Pandoc/Highlighting.hs | 2 +- src/Text/Pandoc/Image.hs | 2 +- src/Text/Pandoc/ImageSize.hs | 2 +- src/Text/Pandoc/Logging.hs | 2 +- src/Text/Pandoc/Lua.hs | 2 +- src/Text/Pandoc/Lua/ErrorConversion.hs | 2 +- src/Text/Pandoc/Lua/Filter.hs | 4 ++-- src/Text/Pandoc/Lua/Global.hs | 2 +- src/Text/Pandoc/Lua/Init.hs | 2 +- src/Text/Pandoc/Lua/Marshaling.hs | 4 ++-- src/Text/Pandoc/Lua/Marshaling/AST.hs | 4 ++-- src/Text/Pandoc/Lua/Marshaling/AnyValue.hs | 2 +- src/Text/Pandoc/Lua/Marshaling/CommonState.hs | 4 ++-- src/Text/Pandoc/Lua/Marshaling/Context.hs | 4 ++-- src/Text/Pandoc/Lua/Marshaling/List.hs | 4 ++-- src/Text/Pandoc/Lua/Marshaling/MediaBag.hs | 4 ++-- src/Text/Pandoc/Lua/Marshaling/PandocError.hs | 2 +- src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs | 4 ++-- src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs | 2 +- src/Text/Pandoc/Lua/Marshaling/Version.hs | 2 +- src/Text/Pandoc/Lua/Module/MediaBag.hs | 2 +- src/Text/Pandoc/Lua/Module/Pandoc.hs | 2 +- src/Text/Pandoc/Lua/Module/System.hs | 2 +- src/Text/Pandoc/Lua/Module/Types.hs | 2 +- src/Text/Pandoc/Lua/Module/Utils.hs | 2 +- src/Text/Pandoc/Lua/Packages.hs | 2 +- src/Text/Pandoc/Lua/PandocLua.hs | 2 +- src/Text/Pandoc/Lua/Util.hs | 4 ++-- src/Text/Pandoc/Lua/Walk.hs | 4 ++-- src/Text/Pandoc/MIME.hs | 2 +- src/Text/Pandoc/MediaBag.hs | 2 +- src/Text/Pandoc/Options.hs | 2 +- src/Text/Pandoc/PDF.hs | 2 +- src/Text/Pandoc/Parsing.hs | 2 +- src/Text/Pandoc/Process.hs | 2 +- src/Text/Pandoc/Readers.hs | 2 +- src/Text/Pandoc/Readers/BibTeX.hs | 2 +- src/Text/Pandoc/Readers/CSV.hs | 2 +- src/Text/Pandoc/Readers/CommonMark.hs | 2 +- src/Text/Pandoc/Readers/CslJson.hs | 2 +- src/Text/Pandoc/Readers/DocBook.hs | 2 +- src/Text/Pandoc/Readers/Docx/Combine.hs | 2 +- src/Text/Pandoc/Readers/Docx/Util.hs | 2 +- src/Text/Pandoc/Readers/HTML.hs | 2 +- src/Text/Pandoc/Readers/HTML/Parsing.hs | 2 +- src/Text/Pandoc/Readers/HTML/Table.hs | 4 ++-- src/Text/Pandoc/Readers/HTML/TagCategories.hs | 2 +- src/Text/Pandoc/Readers/HTML/Types.hs | 2 +- src/Text/Pandoc/Readers/Ipynb.hs | 2 +- src/Text/Pandoc/Readers/Jira.hs | 2 +- src/Text/Pandoc/Readers/LaTeX.hs | 2 +- src/Text/Pandoc/Readers/LaTeX/Lang.hs | 2 +- src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 2 +- src/Text/Pandoc/Readers/LaTeX/Types.hs | 2 +- src/Text/Pandoc/Readers/Markdown.hs | 2 +- src/Text/Pandoc/Readers/MediaWiki.hs | 2 +- src/Text/Pandoc/Readers/Metadata.hs | 2 +- src/Text/Pandoc/Readers/Native.hs | 2 +- src/Text/Pandoc/Readers/OPML.hs | 2 +- src/Text/Pandoc/Readers/Org.hs | 2 +- src/Text/Pandoc/Readers/Org/BlockStarts.hs | 2 +- src/Text/Pandoc/Readers/Org/Blocks.hs | 2 +- src/Text/Pandoc/Readers/Org/DocumentTree.hs | 2 +- src/Text/Pandoc/Readers/Org/ExportSettings.hs | 2 +- src/Text/Pandoc/Readers/Org/Inlines.hs | 2 +- src/Text/Pandoc/Readers/Org/Meta.hs | 2 +- src/Text/Pandoc/Readers/Org/ParserState.hs | 2 +- src/Text/Pandoc/Readers/Org/Parsing.hs | 2 +- src/Text/Pandoc/Readers/Org/Shared.hs | 2 +- src/Text/Pandoc/Readers/RST.hs | 2 +- src/Text/Pandoc/Readers/Textile.hs | 2 +- src/Text/Pandoc/RoffChar.hs | 2 +- src/Text/Pandoc/SelfContained.hs | 2 +- src/Text/Pandoc/Shared.hs | 2 +- src/Text/Pandoc/Slides.hs | 2 +- src/Text/Pandoc/Templates.hs | 2 +- src/Text/Pandoc/Translations.hs | 2 +- src/Text/Pandoc/UTF8.hs | 2 +- src/Text/Pandoc/UUID.hs | 2 +- src/Text/Pandoc/Writers.hs | 2 +- src/Text/Pandoc/Writers/AsciiDoc.hs | 2 +- src/Text/Pandoc/Writers/CommonMark.hs | 2 +- src/Text/Pandoc/Writers/ConTeXt.hs | 2 +- src/Text/Pandoc/Writers/CslJson.hs | 2 +- src/Text/Pandoc/Writers/Custom.hs | 2 +- src/Text/Pandoc/Writers/Docbook.hs | 2 +- src/Text/Pandoc/Writers/Docx.hs | 2 +- src/Text/Pandoc/Writers/Docx/StyleMap.hs | 2 +- src/Text/Pandoc/Writers/DokuWiki.hs | 2 +- src/Text/Pandoc/Writers/EPUB.hs | 2 +- src/Text/Pandoc/Writers/FB2.hs | 2 +- src/Text/Pandoc/Writers/HTML.hs | 2 +- src/Text/Pandoc/Writers/Ipynb.hs | 2 +- src/Text/Pandoc/Writers/JATS.hs | 2 +- src/Text/Pandoc/Writers/JATS/Table.hs | 2 +- src/Text/Pandoc/Writers/JATS/Types.hs | 2 +- src/Text/Pandoc/Writers/Jira.hs | 2 +- src/Text/Pandoc/Writers/LaTeX.hs | 2 +- src/Text/Pandoc/Writers/LaTeX/Caption.hs | 2 +- src/Text/Pandoc/Writers/LaTeX/Notes.hs | 2 +- src/Text/Pandoc/Writers/LaTeX/Table.hs | 2 +- src/Text/Pandoc/Writers/Man.hs | 2 +- src/Text/Pandoc/Writers/Markdown.hs | 2 +- src/Text/Pandoc/Writers/MediaWiki.hs | 2 +- src/Text/Pandoc/Writers/Ms.hs | 2 +- src/Text/Pandoc/Writers/Native.hs | 2 +- src/Text/Pandoc/Writers/ODT.hs | 2 +- src/Text/Pandoc/Writers/OOXML.hs | 2 +- src/Text/Pandoc/Writers/OPML.hs | 2 +- src/Text/Pandoc/Writers/Org.hs | 4 ++-- src/Text/Pandoc/Writers/RST.hs | 2 +- src/Text/Pandoc/Writers/RTF.hs | 2 +- src/Text/Pandoc/Writers/Roff.hs | 2 +- src/Text/Pandoc/Writers/Shared.hs | 2 +- src/Text/Pandoc/Writers/TEI.hs | 2 +- src/Text/Pandoc/Writers/Texinfo.hs | 2 +- src/Text/Pandoc/Writers/Textile.hs | 2 +- src/Text/Pandoc/Writers/ZimWiki.hs | 2 +- src/Text/Pandoc/XML.hs | 2 +- test/Tests/Command.hs | 2 +- test/Tests/Helpers.hs | 2 +- test/Tests/Lua.hs | 2 +- test/Tests/Lua/Module.hs | 2 +- test/Tests/Old.hs | 2 +- test/Tests/Readers/Creole.hs | 2 +- test/Tests/Readers/EPUB.hs | 2 +- test/Tests/Readers/HTML.hs | 2 +- test/Tests/Readers/Jira.hs | 2 +- test/Tests/Readers/LaTeX.hs | 2 +- test/Tests/Readers/Man.hs | 2 +- test/Tests/Readers/Markdown.hs | 2 +- test/Tests/Readers/Odt.hs | 2 +- test/Tests/Readers/Org.hs | 2 +- test/Tests/Readers/Org/Block.hs | 2 +- test/Tests/Readers/Org/Block/CodeBlock.hs | 2 +- test/Tests/Readers/Org/Block/Figure.hs | 2 +- test/Tests/Readers/Org/Block/Header.hs | 2 +- test/Tests/Readers/Org/Block/List.hs | 2 +- test/Tests/Readers/Org/Block/Table.hs | 2 +- test/Tests/Readers/Org/Directive.hs | 2 +- test/Tests/Readers/Org/Inline.hs | 2 +- test/Tests/Readers/Org/Inline/Citation.hs | 2 +- test/Tests/Readers/Org/Inline/Note.hs | 2 +- test/Tests/Readers/Org/Inline/Smart.hs | 2 +- test/Tests/Readers/Org/Meta.hs | 2 +- test/Tests/Readers/Org/Shared.hs | 2 +- test/Tests/Readers/RST.hs | 2 +- test/Tests/Readers/Txt2Tags.hs | 2 +- test/Tests/Shared.hs | 2 +- trypandoc/trypandoc.hs | 2 +- 175 files changed, 200 insertions(+), 200 deletions(-) (limited to 'src') diff --git a/COPYRIGHT b/COPYRIGHT index eb916cc54..9992e5680 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -1,5 +1,5 @@ Pandoc -Copyright (C) 2006-2020 John MacFarlane <jgm at berkeley dot edu> +Copyright (C) 2006-2021 John MacFarlane <jgm at berkeley dot edu> With the exceptions noted below, this code is released under the [GPL], version 2 or later: @@ -37,7 +37,7 @@ The modules in the `pandoc-types` repository (Text.Pandoc.Definition, Text.Pandoc.Builder, Text.Pandoc.Generics, Text.Pandoc.JSON, Text.Pandoc.Walk) are licensed under the BSD 3-clause license: -Copyright (c) 2006-2020, John MacFarlane +Copyright (c) 2006-2021, John MacFarlane All rights reserved. @@ -73,7 +73,7 @@ Pandoc's templates (in `data/templates`) are dual-licensed as either GPL (v2 or higher, same as pandoc) or (at your option) the BSD 3-clause license. -Copyright (c) 2014--2020, John MacFarlane +Copyright (c) 2014--2021, John MacFarlane ---------------------------------------------------------------------- src/Text/Pandoc/Writers/Muse.hs @@ -83,19 +83,19 @@ Released under the GNU General Public License version 2 or later. ---------------------------------------------------------------------- src/Text/Pandoc/Writers/Texinfo.hs -Copyright (C) 2008-2020 John MacFarlane and Peter Wang +Copyright (C) 2008-2021 John MacFarlane and Peter Wang Released under the GNU General Public License version 2 or later. ---------------------------------------------------------------------- src/Text/Pandoc/Writers/OpenDocument.hs -Copyright (C) 2008-2020 Andrea Rossato and John MacFarlane +Copyright (C) 2008-2021 Andrea Rossato and John MacFarlane Released under the GNU General Public License version 2 or later. ---------------------------------------------------------------------- src/Text/Pandoc/Writers/Org.hs -Copyright (C) 2010-2020 Puneeth Chaganti, John MacFarlane, and +Copyright (C) 2010-2021 Puneeth Chaganti, John MacFarlane, and Albert Krewinkel Released under the GNU General Public License version 2 or later. @@ -115,7 +115,7 @@ Released under the GNU General Public License version 2 or later. ---------------------------------------------------------------------- src/Text/Pandoc/Readers/Textile.hs -Copyright (C) 2010-2020 Paul Rivier and John MacFarlane +Copyright (C) 2010-2021 Paul Rivier and John MacFarlane Released under the GNU General Public License version 2 or later. @@ -133,7 +133,7 @@ Released under the GNU General Public License version 2 or later. ---------------------------------------------------------------------- src/Text/Pandoc/Readers/EPUB.hs -Copyright (C) 2014-2020 Matthew Pickering and John MacFarlane +Copyright (C) 2014-2021 Matthew Pickering and John MacFarlane Released under the GNU General Public License version 2 or later. @@ -141,7 +141,7 @@ Released under the GNU General Public License version 2 or later. src/Text/Pandoc/Readers/Org.hs src/Text/Pandoc/Readers/Org/* test/Tests/Readers/Org/* -Copyright (C) 2014-2020 Albert Krewinkel +Copyright (C) 2014-2021 Albert Krewinkel Released under the GNU General Public License version 2 or later. @@ -149,7 +149,7 @@ Released under the GNU General Public License version 2 or later. src/Text/Pandoc/Lua.hs src/Text/Pandoc/Lua/* test/lua/* -Copyright (C) 2017--2020 Albert Krewinkel and John MacFarlane +Copyright (C) 2017--2021 Albert Krewinkel and John MacFarlane Released under the GNU General Public License version 2 or later. @@ -157,7 +157,7 @@ Released under the GNU General Public License version 2 or later. src/Text/Pandoc/Readers/Jira.hs src/Text/Pandoc/Writers/Jira.hs test/Tests/Readers/Jira.hs -Copyright (C) 2019--2020 Albert Krewinkel +Copyright (C) 2019--2021 Albert Krewinkel Released under the GNU General Public License version 2 or later. @@ -169,7 +169,7 @@ Released under the GNU General Public License version 2 or later. ---------------------------------------------------------------------- data/pandoc.lua -Copyright (C) 2017-2020 Albert Krewinkel +Copyright (C) 2017-2021 Albert Krewinkel Released under the GNU General Public License version 2 or later. @@ -183,7 +183,7 @@ Released under the Do What the Fuck You Want To Public License. ------------------------------------------------------------------------ Pandoc embeds a lua interpreter (via hslua). -Copyright © 1994–2019 Lua.org, PUC-Rio. +Copyright © 1994–2020 Lua.org, PUC-Rio. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the diff --git a/MANUAL.txt b/MANUAL.txt index 831ffb0b0..aa75a32a9 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -6329,7 +6329,7 @@ application, here are some things to keep in mind: # Authors -Copyright 2006--2020 John MacFarlane (jgm@berkeley.edu). Released +Copyright 2006--2021 John MacFarlane (jgm@berkeley.edu). Released under the [GPL], version 2 or greater. This software carries no warranty of any kind. (See COPYRIGHT for full copyright and warranty notices.) For a full list of contributors, see the file diff --git a/README.md b/README.md index aea55b0c4..9317438fc 100644 --- a/README.md +++ b/README.md @@ -236,7 +236,7 @@ opening a new issue. ## License -© 2006-2020 John MacFarlane (jgm@berkeley.edu). Released under the +© 2006-2021 John MacFarlane (jgm@berkeley.edu). Released under the [GPL](http://www.gnu.org/copyleft/gpl.html "GNU General Public License"), version 2 or greater. This software carries no warranty of any kind. (See COPYRIGHT for full copyright and warranty notices.) diff --git a/README.template b/README.template index 5eeafb9e3..53d14b584 100644 --- a/README.template +++ b/README.template @@ -81,7 +81,7 @@ new issue. License ------- -© 2006-2020 John MacFarlane (jgm@berkeley.edu). Released under the +© 2006-2021 John MacFarlane (jgm@berkeley.edu). Released under the [GPL], version 2 or greater. This software carries no warranty of any kind. (See COPYRIGHT for full copyright and warranty notices.) diff --git a/app/pandoc.hs b/app/pandoc.hs index 9ed3b9e9f..162570f18 100644 --- a/app/pandoc.hs +++ b/app/pandoc.hs @@ -1,7 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} {- | Module : Main - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley@edu> diff --git a/benchmark/benchmark-pandoc.hs b/benchmark/benchmark-pandoc.hs index 3e7b663b4..b3e67bb14 100644 --- a/benchmark/benchmark-pandoc.hs +++ b/benchmark/benchmark-pandoc.hs @@ -1,7 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- -Copyright (C) 2012-2019 John MacFarlane <jgm@berkeley.edu> +Copyright (C) 2012-2021 John MacFarlane <jgm@berkeley.edu> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/benchmark/weigh-pandoc.hs b/benchmark/weigh-pandoc.hs index 4ba6feb03..ad4c83ad7 100644 --- a/benchmark/weigh-pandoc.hs +++ b/benchmark/weigh-pandoc.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Main - Copyright : © 2016-2019 John MacFarlane <jgm@berkeley.edu> + Copyright : © 2016-2021 John MacFarlane <jgm@berkeley.edu> License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/pandoc.cabal b/pandoc.cabal index cce3c1a58..bb68ea402 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -4,7 +4,7 @@ version: 2.11.3.2 build-type: Simple license: GPL-2.0-or-later license-file: COPYING.md -copyright: (c) 2006-2020 John MacFarlane +copyright: (c) 2006-2021 John MacFarlane author: John MacFarlane <jgm@berkeley.edu> maintainer: John MacFarlane <jgm@berkeley.edu> bug-reports: https://github.com/jgm/pandoc/issues diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 549aeddfb..f09dfd8c7 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -3,7 +3,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index e6d5c93d4..725c76424 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -3,7 +3,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.App - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley@edu> diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs index 21ee47b7b..307f28b5c 100644 --- a/src/Text/Pandoc/App/CommandLineOptions.hs +++ b/src/Text/Pandoc/App/CommandLineOptions.hs @@ -6,7 +6,7 @@ {-# LANGUAGE FlexibleContexts #-} {- | Module : Text.Pandoc.App.CommandLineOptions - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley@edu> @@ -968,7 +968,7 @@ usageMessage programName = usageInfo (programName ++ " [OPTIONS] [FILES]") copyrightMessage :: String copyrightMessage = intercalate "\n" [ - "Copyright (C) 2006-2020 John MacFarlane. Web: https://pandoc.org", + "Copyright (C) 2006-2021 John MacFarlane. Web: https://pandoc.org", "This is free software; see the source for copying conditions. There is no", "warranty, not even for merchantability or fitness for a particular purpose." ] diff --git a/src/Text/Pandoc/App/FormatHeuristics.hs b/src/Text/Pandoc/App/FormatHeuristics.hs index 155b7e586..17ed30fe9 100644 --- a/src/Text/Pandoc/App/FormatHeuristics.hs +++ b/src/Text/Pandoc/App/FormatHeuristics.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.App.FormatHeuristics - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley@edu> diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs index 6dd19758e..0d96ab67c 100644 --- a/src/Text/Pandoc/App/Opt.hs +++ b/src/Text/Pandoc/App/Opt.hs @@ -7,7 +7,7 @@ {-# LANGUAGE FlexibleContexts #-} {- | Module : Text.Pandoc.App.Opt - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley@edu> diff --git a/src/Text/Pandoc/App/OutputSettings.hs b/src/Text/Pandoc/App/OutputSettings.hs index 139b408cb..53c7d82ef 100644 --- a/src/Text/Pandoc/App/OutputSettings.hs +++ b/src/Text/Pandoc/App/OutputSettings.hs @@ -5,7 +5,7 @@ {-# LANGUAGE TupleSections #-} {- | Module : Text.Pandoc.App - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley@edu> diff --git a/src/Text/Pandoc/Asciify.hs b/src/Text/Pandoc/Asciify.hs index 9e9cc8d9b..01a7b624a 100644 --- a/src/Text/Pandoc/Asciify.hs +++ b/src/Text/Pandoc/Asciify.hs @@ -1,6 +1,6 @@ {- | Module : Text.Pandoc.Asciify - Copyright : Copyright (C) 2013-2020 John MacFarlane + Copyright : Copyright (C) 2013-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs index b41e93125..69824aa57 100644 --- a/src/Text/Pandoc/BCP47.hs +++ b/src/Text/Pandoc/BCP47.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.BCP47 - Copyright : Copyright (C) 2017–2020 John MacFarlane + Copyright : Copyright (C) 2017-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/CSS.hs b/src/Text/Pandoc/CSS.hs index d98c85147..625feadbb 100644 --- a/src/Text/Pandoc/CSS.hs +++ b/src/Text/Pandoc/CSS.hs @@ -1,6 +1,6 @@ {- | Module : Text.Pandoc.CSS -Copyright : © 2006-2020 John MacFarlane <jgm@berkeley.edu>, +Copyright : © 2006-2021 John MacFarlane <jgm@berkeley.edu>, 2015-2016 Mauro Bieg, 2015 Ophir Lifshitz <hangfromthefloor@gmail.com> License : GNU GPL, version 2 or above diff --git a/src/Text/Pandoc/CSV.hs b/src/Text/Pandoc/CSV.hs index 10812644f..ec212fa9a 100644 --- a/src/Text/Pandoc/CSV.hs +++ b/src/Text/Pandoc/CSV.hs @@ -1,6 +1,6 @@ {- | Module : Text.Pandoc.CSV - Copyright : Copyright (C) 2017–2020 John MacFarlane <jgm@berkeley.edu> + Copyright : Copyright (C) 2017-2021 John MacFarlane <jgm@berkeley.edu> License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha diff --git a/src/Text/Pandoc/Data.hs b/src/Text/Pandoc/Data.hs index 38682b9f9..55ed3f5bf 100644 --- a/src/Text/Pandoc/Data.hs +++ b/src/Text/Pandoc/Data.hs @@ -1,7 +1,7 @@ {-# LANGUAGE TemplateHaskell #-} {- | Module : Text.Pandoc.Data -Copyright : Copyright (C) 2013-2020 John MacFarlane +Copyright : Copyright (C) 2013-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley@edu> diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 2c311bb49..204cf15ca 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -3,7 +3,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Error - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 9865f897b..39c2a0489 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -6,7 +6,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Extensions - Copyright : Copyright (C) 2012-2020 John MacFarlane + Copyright : Copyright (C) 2012-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Filter.hs b/src/Text/Pandoc/Filter.hs index 6d4846f98..1209ceeb7 100644 --- a/src/Text/Pandoc/Filter.hs +++ b/src/Text/Pandoc/Filter.hs @@ -4,7 +4,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Filter - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley@edu> diff --git a/src/Text/Pandoc/Filter/JSON.hs b/src/Text/Pandoc/Filter/JSON.hs index 83ec9a97c..d2323fac4 100644 --- a/src/Text/Pandoc/Filter/JSON.hs +++ b/src/Text/Pandoc/Filter/JSON.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Filter - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley@edu> diff --git a/src/Text/Pandoc/Filter/Lua.hs b/src/Text/Pandoc/Filter/Lua.hs index a76c8da2f..c238e53d9 100644 --- a/src/Text/Pandoc/Filter/Lua.hs +++ b/src/Text/Pandoc/Filter/Lua.hs @@ -1,6 +1,6 @@ {- | Module : Text.Pandoc.Filter.Lua - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley@edu> diff --git a/src/Text/Pandoc/Filter/Path.hs b/src/Text/Pandoc/Filter/Path.hs index 9e5e8fa77..1d16c53b9 100644 --- a/src/Text/Pandoc/Filter/Path.hs +++ b/src/Text/Pandoc/Filter/Path.hs @@ -1,6 +1,6 @@ {- | Module : Text.Pandoc.Filter.Path - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley@edu> diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs index ce8880f84..0bb6ed319 100644 --- a/src/Text/Pandoc/Highlighting.hs +++ b/src/Text/Pandoc/Highlighting.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Highlighting - Copyright : Copyright (C) 2008-2020 John MacFarlane + Copyright : Copyright (C) 2008-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Image.hs b/src/Text/Pandoc/Image.hs index e37de4e00..e0c938938 100644 --- a/src/Text/Pandoc/Image.hs +++ b/src/Text/Pandoc/Image.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings, ScopedTypeVariables, CPP #-} {- | Module : Text.Pandoc.Image -Copyright : Copyright (C) 2020 John MacFarlane +Copyright : Copyright (C) 2020-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index 9ce5c668d..098c16721 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -3,7 +3,7 @@ {-# OPTIONS_GHC -fno-warn-type-defaults #-} {- | Module : Text.Pandoc.ImageSize -Copyright : Copyright (C) 2011-2020 John MacFarlane +Copyright : Copyright (C) 2011-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs index 825fdaadb..3a3acedc7 100644 --- a/src/Text/Pandoc/Logging.hs +++ b/src/Text/Pandoc/Logging.hs @@ -4,7 +4,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Logging - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Lua.hs b/src/Text/Pandoc/Lua.hs index 39db0074a..f0e9e076b 100644 --- a/src/Text/Pandoc/Lua.hs +++ b/src/Text/Pandoc/Lua.hs @@ -1,6 +1,6 @@ {- | Module : Text.Pandoc.Lua - Copyright : Copyright © 2017–2020 Albert Krewinkel + Copyright : Copyright © 2017-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Lua/ErrorConversion.hs b/src/Text/Pandoc/Lua/ErrorConversion.hs index 59c962723..4e6880722 100644 --- a/src/Text/Pandoc/Lua/ErrorConversion.hs +++ b/src/Text/Pandoc/Lua/ErrorConversion.hs @@ -3,7 +3,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.Lua.ErrorConversion - Copyright : © 2020 Albert Krewinkel + Copyright : © 2020-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Lua/Filter.hs b/src/Text/Pandoc/Lua/Filter.hs index 94d7adeb2..bffe01a34 100644 --- a/src/Text/Pandoc/Lua/Filter.hs +++ b/src/Text/Pandoc/Lua/Filter.hs @@ -1,8 +1,8 @@ {-# LANGUAGE FlexibleContexts #-} {- | Module : Text.Pandoc.Lua.Filter -Copyright : © 2012–2020 John MacFarlane, - © 2017-2020 Albert Krewinkel +Copyright : © 2012-2021 John MacFarlane, + © 2017-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> Stability : alpha diff --git a/src/Text/Pandoc/Lua/Global.hs b/src/Text/Pandoc/Lua/Global.hs index 4285be662..29b788f04 100644 --- a/src/Text/Pandoc/Lua/Global.hs +++ b/src/Text/Pandoc/Lua/Global.hs @@ -1,7 +1,7 @@ {-# LANGUAGE DeriveDataTypeable #-} {- | Module : Text.Pandoc.Lua - Copyright : Copyright © 2017-2020 Albert Krewinkel + Copyright : Copyright © 2017-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Lua/Init.hs b/src/Text/Pandoc/Lua/Init.hs index e89e9d6e0..0a5ce85cb 100644 --- a/src/Text/Pandoc/Lua/Init.hs +++ b/src/Text/Pandoc/Lua/Init.hs @@ -1,6 +1,6 @@ {- | Module : Text.Pandoc.Lua - Copyright : Copyright © 2017-2020 Albert Krewinkel + Copyright : Copyright © 2017-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Lua/Marshaling.hs b/src/Text/Pandoc/Lua/Marshaling.hs index 1254402b6..f517c7c27 100644 --- a/src/Text/Pandoc/Lua/Marshaling.hs +++ b/src/Text/Pandoc/Lua/Marshaling.hs @@ -1,7 +1,7 @@ {- | Module : Text.Pandoc.Lua.Marshaling - Copyright : © 2012-2020 John MacFarlane - © 2017-2020 Albert Krewinkel + Copyright : © 2012-2021 John MacFarlane + © 2017-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Lua/Marshaling/AST.hs b/src/Text/Pandoc/Lua/Marshaling/AST.hs index c889618c4..6485da661 100644 --- a/src/Text/Pandoc/Lua/Marshaling/AST.hs +++ b/src/Text/Pandoc/Lua/Marshaling/AST.hs @@ -2,8 +2,8 @@ {-# LANGUAGE LambdaCase #-} {- | Module : Text.Pandoc.Lua.Marshaling.AST - Copyright : © 2012-2020 John MacFarlane - © 2017-2020 Albert Krewinkel + Copyright : © 2012-2021 John MacFarlane + © 2017-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Lua/Marshaling/AnyValue.hs b/src/Text/Pandoc/Lua/Marshaling/AnyValue.hs index c4720aedf..82e26b963 100644 --- a/src/Text/Pandoc/Lua/Marshaling/AnyValue.hs +++ b/src/Text/Pandoc/Lua/Marshaling/AnyValue.hs @@ -1,6 +1,6 @@ {- | Module : Text.Pandoc.Lua.Marshaling.AnyValue - Copyright : © 2017-2020 Albert Krewinkel + Copyright : © 2017-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Lua/Marshaling/CommonState.hs b/src/Text/Pandoc/Lua/Marshaling/CommonState.hs index 636650af3..147197c5d 100644 --- a/src/Text/Pandoc/Lua/Marshaling/CommonState.hs +++ b/src/Text/Pandoc/Lua/Marshaling/CommonState.hs @@ -3,8 +3,8 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Lua.Marshaling.CommonState - Copyright : © 2012-2020 John MacFarlane - © 2017-2020 Albert Krewinkel + Copyright : © 2012-2021 John MacFarlane + © 2017-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> Stability : alpha diff --git a/src/Text/Pandoc/Lua/Marshaling/Context.hs b/src/Text/Pandoc/Lua/Marshaling/Context.hs index c0e7aef60..606bdcfb2 100644 --- a/src/Text/Pandoc/Lua/Marshaling/Context.hs +++ b/src/Text/Pandoc/Lua/Marshaling/Context.hs @@ -1,8 +1,8 @@ {-# OPTIONS_GHC -fno-warn-orphans #-} {- | Module : Text.Pandoc.Lua.Marshaling.Context - Copyright : © 2012-2020 John MacFarlane - © 2017-2020 Albert Krewinkel + Copyright : © 2012-2021 John MacFarlane + © 2017-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Lua/Marshaling/List.hs b/src/Text/Pandoc/Lua/Marshaling/List.hs index e6614400d..0446302a1 100644 --- a/src/Text/Pandoc/Lua/Marshaling/List.hs +++ b/src/Text/Pandoc/Lua/Marshaling/List.hs @@ -4,8 +4,8 @@ {-# LANGUAGE UndecidableInstances #-} {- | Module : Text.Pandoc.Lua.Marshaling.List -Copyright : © 2012-2020 John MacFarlane - © 2017-2020 Albert Krewinkel +Copyright : © 2012-2021 John MacFarlane + © 2017-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> Stability : alpha diff --git a/src/Text/Pandoc/Lua/Marshaling/MediaBag.hs b/src/Text/Pandoc/Lua/Marshaling/MediaBag.hs index 2cf5b8893..70bd010a0 100644 --- a/src/Text/Pandoc/Lua/Marshaling/MediaBag.hs +++ b/src/Text/Pandoc/Lua/Marshaling/MediaBag.hs @@ -1,7 +1,7 @@ {- | Module : Text.Pandoc.Lua.Marshaling.MediaBag - Copyright : © 2012-2020 John MacFarlane - © 2017-2020 Albert Krewinkel + Copyright : © 2012-2021 John MacFarlane + © 2017-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> Stability : alpha diff --git a/src/Text/Pandoc/Lua/Marshaling/PandocError.hs b/src/Text/Pandoc/Lua/Marshaling/PandocError.hs index 74537a1dd..f698704e0 100644 --- a/src/Text/Pandoc/Lua/Marshaling/PandocError.hs +++ b/src/Text/Pandoc/Lua/Marshaling/PandocError.hs @@ -4,7 +4,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.Lua.Marshaling.PandocError - Copyright : © 2020 Albert Krewinkel + Copyright : © 2020-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs b/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs index 2e45affe4..dd7bf2e61 100644 --- a/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs +++ b/src/Text/Pandoc/Lua/Marshaling/ReaderOptions.hs @@ -4,8 +4,8 @@ {-# OPTIONS_GHC -fno-warn-orphans #-} {- | Module : Text.Pandoc.Lua.Marshaling.ReaderOptions - Copyright : © 2012-2020 John MacFarlane - © 2017-2020 Albert Krewinkel + Copyright : © 2012-2021 John MacFarlane + © 2017-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs b/src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs index 98fa1efa4..6d43039fa 100644 --- a/src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs +++ b/src/Text/Pandoc/Lua/Marshaling/SimpleTable.hs @@ -1,6 +1,6 @@ {- | Module : Text.Pandoc.Lua.Marshaling.SimpleTable - Copyright : © 2020 Albert Krewinkel + Copyright : © 2020-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Lua/Marshaling/Version.hs b/src/Text/Pandoc/Lua/Marshaling/Version.hs index 9adb1b763..4f4ffac51 100644 --- a/src/Text/Pandoc/Lua/Marshaling/Version.hs +++ b/src/Text/Pandoc/Lua/Marshaling/Version.hs @@ -4,7 +4,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.Lua.Marshaling.Version - Copyright : © 2019-2020 Albert Krewinkel + Copyright : © 2019-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Lua/Module/MediaBag.hs b/src/Text/Pandoc/Lua/Module/MediaBag.hs index e5a10217a..715e53885 100644 --- a/src/Text/Pandoc/Lua/Module/MediaBag.hs +++ b/src/Text/Pandoc/Lua/Module/MediaBag.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Lua.Module.MediaBag - Copyright : Copyright © 2017-2020 Albert Krewinkel + Copyright : Copyright © 2017-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Lua/Module/Pandoc.hs b/src/Text/Pandoc/Lua/Module/Pandoc.hs index 3886568b7..a9ce3866d 100644 --- a/src/Text/Pandoc/Lua/Module/Pandoc.hs +++ b/src/Text/Pandoc/Lua/Module/Pandoc.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Lua.Module.Pandoc - Copyright : Copyright © 2017-2020 Albert Krewinkel + Copyright : Copyright © 2017-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Lua/Module/System.hs b/src/Text/Pandoc/Lua/Module/System.hs index 04508e461..bd35babaf 100644 --- a/src/Text/Pandoc/Lua/Module/System.hs +++ b/src/Text/Pandoc/Lua/Module/System.hs @@ -1,6 +1,6 @@ {- | Module : Text.Pandoc.Lua.Module.System - Copyright : © 2019-2020 Albert Krewinkel + Copyright : © 2019-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Lua/Module/Types.hs b/src/Text/Pandoc/Lua/Module/Types.hs index 999f2e588..bb4f02c3c 100644 --- a/src/Text/Pandoc/Lua/Module/Types.hs +++ b/src/Text/Pandoc/Lua/Module/Types.hs @@ -1,6 +1,6 @@ {- | Module : Text.Pandoc.Lua.Module.Types - Copyright : © 2019-2020 Albert Krewinkel + Copyright : © 2019-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Lua/Module/Utils.hs b/src/Text/Pandoc/Lua/Module/Utils.hs index 7595b9c0f..1b04021a7 100644 --- a/src/Text/Pandoc/Lua/Module/Utils.hs +++ b/src/Text/Pandoc/Lua/Module/Utils.hs @@ -2,7 +2,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.Lua.Module.Utils - Copyright : Copyright © 2017-2020 Albert Krewinkel + Copyright : Copyright © 2017-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Lua/Packages.hs b/src/Text/Pandoc/Lua/Packages.hs index 4c3b9d79d..d62fb725d 100644 --- a/src/Text/Pandoc/Lua/Packages.hs +++ b/src/Text/Pandoc/Lua/Packages.hs @@ -3,7 +3,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.Lua.Packages - Copyright : Copyright © 2017-2020 Albert Krewinkel + Copyright : Copyright © 2017-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Lua/PandocLua.hs b/src/Text/Pandoc/Lua/PandocLua.hs index 6c3b410dd..4beac22b7 100644 --- a/src/Text/Pandoc/Lua/PandocLua.hs +++ b/src/Text/Pandoc/Lua/PandocLua.hs @@ -9,7 +9,7 @@ {-# OPTIONS_GHC -fno-warn-orphans #-} {- | Module : Text.Pandoc.Lua.PandocLua - Copyright : Copyright © 2020 Albert Krewinkel + Copyright : Copyright © 2020-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Lua/Util.hs b/src/Text/Pandoc/Lua/Util.hs index fbd013801..70a8a6d47 100644 --- a/src/Text/Pandoc/Lua/Util.hs +++ b/src/Text/Pandoc/Lua/Util.hs @@ -3,8 +3,8 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Lua.Util - Copyright : © 2012–2020 John MacFarlane, - © 2017-2020 Albert Krewinkel + Copyright : © 2012-2021 John MacFarlane, + © 2017-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Lua/Walk.hs b/src/Text/Pandoc/Lua/Walk.hs index 695c7b44e..d6d973496 100644 --- a/src/Text/Pandoc/Lua/Walk.hs +++ b/src/Text/Pandoc/Lua/Walk.hs @@ -4,8 +4,8 @@ {-# LANGUAGE MultiParamTypeClasses #-} {- | Module : Text.Pandoc.Lua.Walk -Copyright : © 2012–2020 John MacFarlane, - © 2017-2020 Albert Krewinkel +Copyright : © 2012-2021 John MacFarlane, + © 2017-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> Stability : alpha diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index 4fe25ebe1..53c5cd018 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.MIME - Copyright : Copyright (C) 2011-2020 John MacFarlane + Copyright : Copyright (C) 2011-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index 26f44cef0..3249bcdeb 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -3,7 +3,7 @@ {-# LANGUAGE GeneralizedNewtypeDeriving #-} {- | Module : Text.Pandoc.MediaBag - Copyright : Copyright (C) 2014-2015, 2017–2020 John MacFarlane + Copyright : Copyright (C) 2014-2015, 2017-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index ecd65a54d..92bda36b2 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -6,7 +6,7 @@ {-# LANGUAGE TemplateHaskell #-} {- | Module : Text.Pandoc.Options - Copyright : Copyright (C) 2012-2020 John MacFarlane + Copyright : Copyright (C) 2012-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index c4080a227..7c0082c29 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -4,7 +4,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.PDF - Copyright : Copyright (C) 2012-2020 John MacFarlane + Copyright : Copyright (C) 2012-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index fd14341ad..4c4dd531d 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -9,7 +9,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Parsing - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Process.hs b/src/Text/Pandoc/Process.hs index 866972e3f..b896feb7e 100644 --- a/src/Text/Pandoc/Process.hs +++ b/src/Text/Pandoc/Process.hs @@ -1,6 +1,6 @@ {- | Module : Text.Pandoc.Process - Copyright : Copyright (C) 2013-2020 John MacFarlane + Copyright : Copyright (C) 2013-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs index 9a069f7d0..ac70f7d4c 100644 --- a/src/Text/Pandoc/Readers.hs +++ b/src/Text/Pandoc/Readers.hs @@ -3,7 +3,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers/BibTeX.hs b/src/Text/Pandoc/Readers/BibTeX.hs index 6c96ab30a..956b9f1f7 100644 --- a/src/Text/Pandoc/Readers/BibTeX.hs +++ b/src/Text/Pandoc/Readers/BibTeX.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.BibTeX - Copyright : Copyright (C) 2020 John MacFarlane + Copyright : Copyright (C) 2020-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers/CSV.hs b/src/Text/Pandoc/Readers/CSV.hs index 45f4d88d4..2958d6180 100644 --- a/src/Text/Pandoc/Readers/CSV.hs +++ b/src/Text/Pandoc/Readers/CSV.hs @@ -3,7 +3,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.Readers.CSV - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index 9eef498e1..150a837e4 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -3,7 +3,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.Readers.CommonMark - Copyright : Copyright (C) 2015-2020 John MacFarlane + Copyright : Copyright (C) 2015-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers/CslJson.hs b/src/Text/Pandoc/Readers/CslJson.hs index 377186b1e..30bb19483 100644 --- a/src/Text/Pandoc/Readers/CslJson.hs +++ b/src/Text/Pandoc/Readers/CslJson.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.CslJson - Copyright : Copyright (C) 2020 John MacFarlane + Copyright : Copyright (C) 2020-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index b0846e345..d3b2dd4d3 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.DocBook - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs index 46112af19..bcf26c4a3 100644 --- a/src/Text/Pandoc/Readers/Docx/Combine.hs +++ b/src/Text/Pandoc/Readers/Docx/Combine.hs @@ -2,7 +2,7 @@ {- | Module : Text.Pandoc.Readers.Docx.Combine Copyright : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>, - 2014-2020 John MacFarlane <jgm@berkeley.edu>, + 2014-2021 John MacFarlane <jgm@berkeley.edu>, 2020 Nikolay Yakimov <root@livid.pp.ru> License : GNU GPL, version 2 or above diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs index a573344ff..f9c9a8e26 100644 --- a/src/Text/Pandoc/Readers/Docx/Util.hs +++ b/src/Text/Pandoc/Readers/Docx/Util.hs @@ -1,7 +1,7 @@ {- | Module : Text.Pandoc.Readers.Docx.StyleMaps Copyright : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>, - 2014-2020 John MacFarlane <jgm@berkeley.edu>, + 2014-2021 John MacFarlane <jgm@berkeley.edu>, 2015 Nikolay Yakimov <root@livid.pp.ru> License : GNU GPL, version 2 or above diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 8e94a0812..afc7a3e25 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -5,7 +5,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.HTML - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers/HTML/Parsing.hs b/src/Text/Pandoc/Readers/HTML/Parsing.hs index e28ebe77b..4c069e2c3 100644 --- a/src/Text/Pandoc/Readers/HTML/Parsing.hs +++ b/src/Text/Pandoc/Readers/HTML/Parsing.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.HTML.Parsing - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs index 91639fa4c..6179ea8e7 100644 --- a/src/Text/Pandoc/Readers/HTML/Table.hs +++ b/src/Text/Pandoc/Readers/HTML/Table.hs @@ -3,8 +3,8 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.HTML.Table - Copyright : © 2006-2020 John MacFarlane, - 2020 Albert Krewinkel + Copyright : © 2006-2021 John MacFarlane, + 2020-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <albert@zeitkraut.de> diff --git a/src/Text/Pandoc/Readers/HTML/TagCategories.hs b/src/Text/Pandoc/Readers/HTML/TagCategories.hs index 4f82a1831..b7bd40fee 100644 --- a/src/Text/Pandoc/Readers/HTML/TagCategories.hs +++ b/src/Text/Pandoc/Readers/HTML/TagCategories.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.HTML.TagCategories - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers/HTML/Types.hs b/src/Text/Pandoc/Readers/HTML/Types.hs index a94eeb828..12c519ad6 100644 --- a/src/Text/Pandoc/Readers/HTML/Types.hs +++ b/src/Text/Pandoc/Readers/HTML/Types.hs @@ -2,7 +2,7 @@ {-# LANGUAGE MultiParamTypeClasses #-} {- | Module : Text.Pandoc.Readers.HTML.Types - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs index a866e6ec3..70296bb6b 100644 --- a/src/Text/Pandoc/Readers/Ipynb.hs +++ b/src/Text/Pandoc/Readers/Ipynb.hs @@ -4,7 +4,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.Readers.Ipynb - Copyright : Copyright (C) 2019-2020 John MacFarlane + Copyright : Copyright (C) 2019-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers/Jira.hs b/src/Text/Pandoc/Readers/Jira.hs index 9266ce10d..5a5d0ee1e 100644 --- a/src/Text/Pandoc/Readers/Jira.hs +++ b/src/Text/Pandoc/Readers/Jira.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org - Copyright : © 2019-2020 Albert Krewinkel + Copyright : © 2019-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 14a41a911..f49323996 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -8,7 +8,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.LaTeX - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs index 814b2fe79..5f634818e 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.LaTeX.Lang - Copyright : Copyright (C) 2018-2020 John MacFarlane + Copyright : Copyright (C) 2018-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index e92ed387c..12a3ba2f6 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -5,7 +5,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.Readers.LaTeX.Parsing - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs index a017a2afb..f8c214318 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Types.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs @@ -1,6 +1,6 @@ {- | Module : Text.Pandoc.Readers.LaTeX.Types - Copyright : Copyright (C) 2017-2020 John MacFarlane + Copyright : Copyright (C) 2017-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 5888bf095..5c3a21bb7 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -4,7 +4,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.Markdown - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index d712b1120..cdb746c67 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.MediaWiki - Copyright : Copyright (C) 2012-2020 John MacFarlane + Copyright : Copyright (C) 2012-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs index a64b130e5..927291776 100644 --- a/src/Text/Pandoc/Readers/Metadata.hs +++ b/src/Text/Pandoc/Readers/Metadata.hs @@ -2,7 +2,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.Readers.Metadata - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs index 42843381a..9c8bc0374 100644 --- a/src/Text/Pandoc/Readers/Native.hs +++ b/src/Text/Pandoc/Readers/Native.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Native - Copyright : Copyright (C) 2011-2020 John MacFarlane + Copyright : Copyright (C) 2011-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index 903cdf4a1..5b8996025 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.OPML - Copyright : Copyright (C) 2013-2020 John MacFarlane + Copyright : Copyright (C) 2013-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index 851aec103..afeb27a87 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org - Copyright : Copyright (C) 2014-2020 Albert Krewinkel + Copyright : Copyright (C) 2014-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs index 8f7cac6ea..14233569c 100644 --- a/src/Text/Pandoc/Readers/Org/BlockStarts.hs +++ b/src/Text/Pandoc/Readers/Org/BlockStarts.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.BlockStarts - Copyright : Copyright (C) 2014-2020 Albert Krewinkel + Copyright : Copyright (C) 2014-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index 17e3ff986..6bd046e04 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -3,7 +3,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.Blocks - Copyright : Copyright (C) 2014-2020 Albert Krewinkel + Copyright : Copyright (C) 2014-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs index 3b363270c..2dcbecb1d 100644 --- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs +++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs @@ -3,7 +3,7 @@ {-# LANGUAGE TupleSections #-} {- | Module : Text.Pandoc.Readers.Org.DocumentTree - Copyright : Copyright (C) 2014-2020 Albert Krewinkel + Copyright : Copyright (C) 2014-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs index 9399ebd54..401e1bd8f 100644 --- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs +++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.ExportSettings - Copyright : © 2016–2020 Albert Krewinkel + Copyright : © 2016-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index b819a34a3..68c2ba5e0 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.Inlines - Copyright : Copyright (C) 2014-2020 Albert Krewinkel + Copyright : Copyright (C) 2014-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index 4864d9478..6621822a2 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -3,7 +3,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.Meta - Copyright : Copyright (C) 2014-2020 Albert Krewinkel + Copyright : Copyright (C) 2014-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index 1e4799e7b..abe8a9ebf 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -3,7 +3,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.ParserState - Copyright : Copyright (C) 2014-2020 Albert Krewinkel + Copyright : Copyright (C) 2014-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index bce71c24d..d33920d47 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -1,6 +1,6 @@ {- | Module : Text.Pandoc.Readers.Org.Parsing - Copyright : Copyright (C) 2014-2020 Albert Krewinkel + Copyright : Copyright (C) 2014-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs index 7f72077a4..ad7c65060 100644 --- a/src/Text/Pandoc/Readers/Org/Shared.hs +++ b/src/Text/Pandoc/Readers/Org/Shared.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.Shared - Copyright : Copyright (C) 2014-2020 Albert Krewinkel + Copyright : Copyright (C) 2014-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index eeb3d1389..707af905f 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -4,7 +4,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.RST - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 6691d8381..4991c6308 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -3,7 +3,7 @@ {- | Module : Text.Pandoc.Readers.Textile Copyright : Copyright (C) 2010-2012 Paul Rivier - 2010-2020 John MacFarlane + 2010-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : Paul Rivier <paul*rivier#demotera*com> diff --git a/src/Text/Pandoc/RoffChar.hs b/src/Text/Pandoc/RoffChar.hs index 67e8b9cd5..d1c38204f 100644 --- a/src/Text/Pandoc/RoffChar.hs +++ b/src/Text/Pandoc/RoffChar.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.RoffChar - Copyright : Copyright (C) 2007-2020 John MacFarlane + Copyright : Copyright (C) 2007-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 061361aba..c9e20cad0 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -2,7 +2,7 @@ {-# LANGUAGE TupleSections #-} {- | Module : Text.Pandoc.SelfContained - Copyright : Copyright (C) 2011-2020 John MacFarlane + Copyright : Copyright (C) 2011-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 4853621c8..b908a0172 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -8,7 +8,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Shared - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Slides.hs b/src/Text/Pandoc/Slides.hs index 9ea0389c9..a3e550b1f 100644 --- a/src/Text/Pandoc/Slides.hs +++ b/src/Text/Pandoc/Slides.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Slides - Copyright : Copyright (C) 2012-2020 John MacFarlane + Copyright : Copyright (C) 2012-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 0c10b258d..e83f26329 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -4,7 +4,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Templates - Copyright : Copyright (C) 2009-2020 John MacFarlane + Copyright : Copyright (C) 2009-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Translations.hs b/src/Text/Pandoc/Translations.hs index 200d756f6..0c7d7ab23 100644 --- a/src/Text/Pandoc/Translations.hs +++ b/src/Text/Pandoc/Translations.hs @@ -4,7 +4,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Translations - Copyright : Copyright (C) 2017-2020 John MacFarlane + Copyright : Copyright (C) 2017-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index 4621e1765..b583dbbdb 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.UTF8 - Copyright : Copyright (C) 2010-2020 John MacFarlane + Copyright : Copyright (C) 2010-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/UUID.hs b/src/Text/Pandoc/UUID.hs index ca0df2d0b..12579be90 100644 --- a/src/Text/Pandoc/UUID.hs +++ b/src/Text/Pandoc/UUID.hs @@ -1,6 +1,6 @@ {- | Module : Text.Pandoc.UUID - Copyright : Copyright (C) 2010-2020 John MacFarlane + Copyright : Copyright (C) 2010-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers.hs b/src/Text/Pandoc/Writers.hs index 0654c2d85..49531d924 100644 --- a/src/Text/Pandoc/Writers.hs +++ b/src/Text/Pandoc/Writers.hs @@ -4,7 +4,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index 0a312d1d1..b4ef7c8b9 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.AsciiDoc - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs index 66ded218f..8733b7149 100644 --- a/src/Text/Pandoc/Writers/CommonMark.hs +++ b/src/Text/Pandoc/Writers/CommonMark.hs @@ -1,6 +1,6 @@ {- | Module : Text.Pandoc.Writers.CommonMark - Copyright : Copyright (C) 2015-2020 John MacFarlane + Copyright : Copyright (C) 2015-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 0a6313513..4d44842e2 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -3,7 +3,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.ConTeXt - Copyright : Copyright (C) 2007-2020 John MacFarlane + Copyright : Copyright (C) 2007-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/CslJson.hs b/src/Text/Pandoc/Writers/CslJson.hs index 08310de65..13b95586b 100644 --- a/src/Text/Pandoc/Writers/CslJson.hs +++ b/src/Text/Pandoc/Writers/CslJson.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.CslJson - Copyright : Copyright (C) 2020 John MacFarlane + Copyright : Copyright (C) 2020-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/Custom.hs b/src/Text/Pandoc/Writers/Custom.hs index 8da611b61..58c4bb5be 100644 --- a/src/Text/Pandoc/Writers/Custom.hs +++ b/src/Text/Pandoc/Writers/Custom.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.Custom - Copyright : Copyright (C) 2012-2020 John MacFarlane + Copyright : Copyright (C) 2012-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index affa0de04..a6776608d 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -3,7 +3,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.Docbook - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 65946ec88..a99e13a85 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -7,7 +7,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.Docx - Copyright : Copyright (C) 2012-2020 John MacFarlane + Copyright : Copyright (C) 2012-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/Docx/StyleMap.hs b/src/Text/Pandoc/Writers/Docx/StyleMap.hs index c3c54c7e5..04868eaad 100644 --- a/src/Text/Pandoc/Writers/Docx/StyleMap.hs +++ b/src/Text/Pandoc/Writers/Docx/StyleMap.hs @@ -2,7 +2,7 @@ {- | Module : Text.Pandoc.Writers.Docx.StyleMap Copyright : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>, - 2014-2020 John MacFarlane <jgm@berkeley.edu>, + 2014-2021 John MacFarlane <jgm@berkeley.edu>, 2015-2019 Nikolay Yakimov <root@livid.pp.ru> License : GNU GPL, version 2 or above diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs index 90ec6824f..7df47c912 100644 --- a/src/Text/Pandoc/Writers/DokuWiki.hs +++ b/src/Text/Pandoc/Writers/DokuWiki.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.DokuWiki - Copyright : Copyright (C) 2008-2020 John MacFarlane + Copyright : Copyright (C) 2008-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : Clare Macrae <clare.macrae@googlemail.com> diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index c8f5ff60d..3e3fd8fd6 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -5,7 +5,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.Writers.EPUB - Copyright : Copyright (C) 2010-2020 John MacFarlane + Copyright : Copyright (C) 2010-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 701ff3d9b..25b1f28d1 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -3,7 +3,7 @@ {- | Module : Text.Pandoc.Writers.FB2 Copyright : Copyright (C) 2011-2012 Sergey Astanin - 2012-2020 John MacFarlane + 2012-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 0b051c8f0..26df0325e 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -6,7 +6,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.HTML - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/Ipynb.hs b/src/Text/Pandoc/Writers/Ipynb.hs index d01d5a7e5..2613851c5 100644 --- a/src/Text/Pandoc/Writers/Ipynb.hs +++ b/src/Text/Pandoc/Writers/Ipynb.hs @@ -2,7 +2,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.Writers.Ipynb - Copyright : Copyright (C) 2019-2020 John MacFarlane + Copyright : Copyright (C) 2019-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index 7058a4557..e8d93b8d5 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -3,7 +3,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.JATS - Copyright : Copyright (C) 2017-2020 John MacFarlane + Copyright : Copyright (C) 2017-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/JATS/Table.hs b/src/Text/Pandoc/Writers/JATS/Table.hs index a4d42832d..465480f59 100644 --- a/src/Text/Pandoc/Writers/JATS/Table.hs +++ b/src/Text/Pandoc/Writers/JATS/Table.hs @@ -3,7 +3,7 @@ {-# LANGUAGE TupleSections #-} {- | Module : Text.Pandoc.Writers.JATS.Table - Copyright : © 2020 Albert Krewinkel + Copyright : © 2020-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb@zeitkraut.de> diff --git a/src/Text/Pandoc/Writers/JATS/Types.hs b/src/Text/Pandoc/Writers/JATS/Types.hs index 8162f3bc0..54ed4a8bd 100644 --- a/src/Text/Pandoc/Writers/JATS/Types.hs +++ b/src/Text/Pandoc/Writers/JATS/Types.hs @@ -1,6 +1,6 @@ {- | Module : Text.Pandoc.Writers.JATS.Types - Copyright : Copyright (C) 2017-2020 John MacFarlane + Copyright : Copyright (C) 2017-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/Jira.hs b/src/Text/Pandoc/Writers/Jira.hs index 6bc048a61..c21085a4f 100644 --- a/src/Text/Pandoc/Writers/Jira.hs +++ b/src/Text/Pandoc/Writers/Jira.hs @@ -3,7 +3,7 @@ {-# LANGUAGE PatternGuards #-} {- | Module : Text.Pandoc.Writers.Jira - Copyright : © 2010-2020 Albert Krewinkel, John MacFarlane + Copyright : © 2010-2021 Albert Krewinkel, John MacFarlane License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 2281290c0..df922e17b 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -5,7 +5,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.LaTeX - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/LaTeX/Caption.hs b/src/Text/Pandoc/Writers/LaTeX/Caption.hs index 61ca41fb1..7b9ce186f 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Caption.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Caption.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.LaTeX.Caption - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/LaTeX/Notes.hs b/src/Text/Pandoc/Writers/LaTeX/Notes.hs index 216a7bfc3..f225ef0c5 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Notes.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Notes.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.LaTeX.Notes - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/LaTeX/Table.hs b/src/Text/Pandoc/Writers/LaTeX/Table.hs index 8411d9f80..6f8386937 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Table.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Table.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.LaTeX.Table - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 4eb0db042..edb70f53e 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -3,7 +3,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.Man - Copyright : Copyright (C) 2007-2020 John MacFarlane + Copyright : Copyright (C) 2007-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 1b5c00468..d62727d90 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -5,7 +5,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.Markdown - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/MediaWiki.hs b/src/Text/Pandoc/Writers/MediaWiki.hs index d1912caa6..5029be69f 100644 --- a/src/Text/Pandoc/Writers/MediaWiki.hs +++ b/src/Text/Pandoc/Writers/MediaWiki.hs @@ -2,7 +2,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.MediaWiki - Copyright : Copyright (C) 2008-2020 John MacFarlane + Copyright : Copyright (C) 2008-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs index 0fc333bc2..48395c420 100644 --- a/src/Text/Pandoc/Writers/Ms.hs +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -2,7 +2,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.Ms - Copyright : Copyright (C) 2007-2020 John MacFarlane + Copyright : Copyright (C) 2007-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/Native.hs b/src/Text/Pandoc/Writers/Native.hs index 4d4dfca15..9c2ce805d 100644 --- a/src/Text/Pandoc/Writers/Native.hs +++ b/src/Text/Pandoc/Writers/Native.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.Native - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index e41fb7176..05dfad5eb 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.ODT - Copyright : Copyright (C) 2008-2020 John MacFarlane + Copyright : Copyright (C) 2008-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/OOXML.hs b/src/Text/Pandoc/Writers/OOXML.hs index ac991b594..3ac007f4e 100644 --- a/src/Text/Pandoc/Writers/OOXML.hs +++ b/src/Text/Pandoc/Writers/OOXML.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.OOXML - Copyright : Copyright (C) 2012-2020 John MacFarlane + Copyright : Copyright (C) 2012-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs index 810a94775..8c9229fc0 100644 --- a/src/Text/Pandoc/Writers/OPML.hs +++ b/src/Text/Pandoc/Writers/OPML.hs @@ -2,7 +2,7 @@ {-# LANGUAGE CPP #-} {- | Module : Text.Pandoc.Writers.OPML - Copyright : Copyright (C) 2013-2020 John MacFarlane + Copyright : Copyright (C) 2013-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 2af93017d..43ebf1807 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -3,8 +3,8 @@ {- | Module : Text.Pandoc.Writers.Org Copyright : © 2010-2015 Puneeth Chaganti <punchagan@gmail.com> - 2010-2020 John MacFarlane <jgm@berkeley.edu> - 2016-2020 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> + 2010-2021 John MacFarlane <jgm@berkeley.edu> + 2016-2021 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 8beeef46a..a3be1d723 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -2,7 +2,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.RST - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index e3966ed07..cf27011c2 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.RTF - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/Roff.hs b/src/Text/Pandoc/Writers/Roff.hs index 00b027cc9..6af56242f 100644 --- a/src/Text/Pandoc/Writers/Roff.hs +++ b/src/Text/Pandoc/Writers/Roff.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.Roff - Copyright : Copyright (C) 2007-2020 John MacFarlane + Copyright : Copyright (C) 2007-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 129e45e9d..fc3f8ff3a 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -3,7 +3,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.Shared - Copyright : Copyright (C) 2013-2020 John MacFarlane + Copyright : Copyright (C) 2013-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs index a9ee5eece..b926c48a1 100644 --- a/src/Text/Pandoc/Writers/TEI.hs +++ b/src/Text/Pandoc/Writers/TEI.hs @@ -2,7 +2,7 @@ {-# LANGUAGE PatternGuards #-} {- | Module : Text.Pandoc.Writers.Docbook - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index c6debd9ce..53da70f84 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.Texinfo - Copyright : Copyright (C) 2008-2020 John MacFarlane + Copyright : Copyright (C) 2008-2021 John MacFarlane 2012 Peter Wang License : GNU GPL, version 2 or above diff --git a/src/Text/Pandoc/Writers/Textile.hs b/src/Text/Pandoc/Writers/Textile.hs index 61ddb7497..03d030477 100644 --- a/src/Text/Pandoc/Writers/Textile.hs +++ b/src/Text/Pandoc/Writers/Textile.hs @@ -2,7 +2,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.Textile - Copyright : Copyright (C) 2010-2020 John MacFarlane + Copyright : Copyright (C) 2010-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs index 902b093d3..9e45f0417 100644 --- a/src/Text/Pandoc/Writers/ZimWiki.hs +++ b/src/Text/Pandoc/Writers/ZimWiki.hs @@ -2,7 +2,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.ZimWiki - Copyright : © 2008-2020 John MacFarlane, + Copyright : © 2008-2021 John MacFarlane, 2017-2019 Alex Ivkin License : GNU GPL, version 2 or above diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index 4b71d7b69..c4e3ed1e7 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -3,7 +3,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.XML - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/test/Tests/Command.hs b/test/Tests/Command.hs index 522c4b3a1..b3e2a0509 100644 --- a/test/Tests/Command.hs +++ b/test/Tests/Command.hs @@ -1,7 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} {- | Module : Tests.Command - Copyright : © 2006-2020 John MacFarlane + Copyright : © 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley@edu> diff --git a/test/Tests/Helpers.hs b/test/Tests/Helpers.hs index c9ee6d206..21898d10e 100644 --- a/test/Tests/Helpers.hs +++ b/test/Tests/Helpers.hs @@ -3,7 +3,7 @@ {-# LANGUAGE FlexibleInstances #-} {- | Module : Tests.Helpers - Copyright : © 2006-2020 John MacFarlane + Copyright : © 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley@edu> diff --git a/test/Tests/Lua.hs b/test/Tests/Lua.hs index 853375327..1dfbbd053 100644 --- a/test/Tests/Lua.hs +++ b/test/Tests/Lua.hs @@ -3,7 +3,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {- | Module : Tests.Lua - Copyright : © 2017-2020 Albert Krewinkel + Copyright : © 2017-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <albert@zeitkraut.de> diff --git a/test/Tests/Lua/Module.hs b/test/Tests/Lua/Module.hs index e2570e87f..d88633cf8 100644 --- a/test/Tests/Lua/Module.hs +++ b/test/Tests/Lua/Module.hs @@ -1,6 +1,6 @@ {- | Module : Tests.Lua.Module -Copyright : © 2019-2020 Albert Krewinkel +Copyright : © 2019-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <albert@zeitkraut.de> diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs index cf0396d0a..638620a36 100644 --- a/test/Tests/Old.hs +++ b/test/Tests/Old.hs @@ -1,7 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} {- | Module : Tests.Old - Copyright : © 2006-2020 John MacFarlane + Copyright : © 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley@edu> diff --git a/test/Tests/Readers/Creole.hs b/test/Tests/Readers/Creole.hs index 15b826460..1fc0e62d7 100644 --- a/test/Tests/Readers/Creole.hs +++ b/test/Tests/Readers/Creole.hs @@ -3,7 +3,7 @@ {- | Module : Tests.Readers.Creole Copyright : © 2017 Sascha Wilde - 2017-2020 John MacFarlane + 2017-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : Sascha Wilde <wilde@sha-bang.de> diff --git a/test/Tests/Readers/EPUB.hs b/test/Tests/Readers/EPUB.hs index 700d6723d..3c75dd08d 100644 --- a/test/Tests/Readers/EPUB.hs +++ b/test/Tests/Readers/EPUB.hs @@ -1,7 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} {- | Module : Tests.Readers.EPUB - Copyright : © 2006-2020 John MacFarlane + Copyright : © 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.eu> diff --git a/test/Tests/Readers/HTML.hs b/test/Tests/Readers/HTML.hs index e4c681421..578c76860 100644 --- a/test/Tests/Readers/HTML.hs +++ b/test/Tests/Readers/HTML.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.HTML - Copyright : © 2006-2020 John MacFarlane + Copyright : © 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/test/Tests/Readers/Jira.hs b/test/Tests/Readers/Jira.hs index 32b8ecb7c..bf78fe1fe 100644 --- a/test/Tests/Readers/Jira.hs +++ b/test/Tests/Readers/Jira.hs @@ -3,7 +3,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {- | Module : Tests.Readers.Jira - Copyright : © 2019-2020 Albert Krewinel + Copyright : © 2019-2021 Albert Krewinel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <tarleb@zeitkraut.de> diff --git a/test/Tests/Readers/LaTeX.hs b/test/Tests/Readers/LaTeX.hs index c50c91ca1..2a52ffd18 100644 --- a/test/Tests/Readers/LaTeX.hs +++ b/test/Tests/Readers/LaTeX.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.LaTeX - Copyright : © 2006-2020 John MacFarlane + Copyright : © 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/test/Tests/Readers/Man.hs b/test/Tests/Readers/Man.hs index f591aa00d..4f3ab5a28 100644 --- a/test/Tests/Readers/Man.hs +++ b/test/Tests/Readers/Man.hs @@ -2,7 +2,7 @@ {- | Module : Tests.Readers.Man Copyright : © 2018-2019 Yan Pas <yanp.bugz@gmail.com>, - 2018-2020 John MacFarlane + 2018-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs index 3af5e2a94..18f909583 100644 --- a/test/Tests/Readers/Markdown.hs +++ b/test/Tests/Readers/Markdown.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Markdown - Copyright : © 2006-2020 John MacFarlane + Copyright : © 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/test/Tests/Readers/Odt.hs b/test/Tests/Readers/Odt.hs index 14062c884..f5e427ba2 100644 --- a/test/Tests/Readers/Odt.hs +++ b/test/Tests/Readers/Odt.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Odt - Copyright : © 2015-2020 John MacFarlane + Copyright : © 2015-2021 John MacFarlane 2015 Martin Linnemann License : GNU GPL, version 2 or above diff --git a/test/Tests/Readers/Org.hs b/test/Tests/Readers/Org.hs index d4f7bb6dc..290bb603e 100644 --- a/test/Tests/Readers/Org.hs +++ b/test/Tests/Readers/Org.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Shared - Copyright : © 2014-2020 Albert Krewinkel + Copyright : © 2014-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <albert@zeitkraut.de> diff --git a/test/Tests/Readers/Org/Block.hs b/test/Tests/Readers/Org/Block.hs index 995bd0316..2ce07c4bb 100644 --- a/test/Tests/Readers/Org/Block.hs +++ b/test/Tests/Readers/Org/Block.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Block - Copyright : © 2014-2020 Albert Krewinkel + Copyright : © 2014-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <albert@zeitkraut.de> diff --git a/test/Tests/Readers/Org/Block/CodeBlock.hs b/test/Tests/Readers/Org/Block/CodeBlock.hs index 2648a6e1f..d40c3bc1d 100644 --- a/test/Tests/Readers/Org/Block/CodeBlock.hs +++ b/test/Tests/Readers/Org/Block/CodeBlock.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Block.CodeBlock - Copyright : © 2014-2020 Albert Krewinkel + Copyright : © 2014-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <albert@zeitkraut.de> diff --git a/test/Tests/Readers/Org/Block/Figure.hs b/test/Tests/Readers/Org/Block/Figure.hs index 56ddde9d8..8822f5b03 100644 --- a/test/Tests/Readers/Org/Block/Figure.hs +++ b/test/Tests/Readers/Org/Block/Figure.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Block.Figure - Copyright : © 2014-2020 Albert Krewinkel + Copyright : © 2014-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <albert@zeitkraut.de> diff --git a/test/Tests/Readers/Org/Block/Header.hs b/test/Tests/Readers/Org/Block/Header.hs index d38d26efb..887055451 100644 --- a/test/Tests/Readers/Org/Block/Header.hs +++ b/test/Tests/Readers/Org/Block/Header.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Block.Header - Copyright : © 2014-2020 Albert Krewinkel + Copyright : © 2014-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <albert@zeitkraut.de> diff --git a/test/Tests/Readers/Org/Block/List.hs b/test/Tests/Readers/Org/Block/List.hs index 15373b3b3..ac03c583b 100644 --- a/test/Tests/Readers/Org/Block/List.hs +++ b/test/Tests/Readers/Org/Block/List.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Block.Header - Copyright : © 2014-2020 Albert Krewinkel + Copyright : © 2014-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <albert@zeitkraut.de> diff --git a/test/Tests/Readers/Org/Block/Table.hs b/test/Tests/Readers/Org/Block/Table.hs index 31c994d3f..cb38fcc12 100644 --- a/test/Tests/Readers/Org/Block/Table.hs +++ b/test/Tests/Readers/Org/Block/Table.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Block.Table - Copyright : © 2014-2020 Albert Krewinkel + Copyright : © 2014-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <albert@zeitkraut.de> diff --git a/test/Tests/Readers/Org/Directive.hs b/test/Tests/Readers/Org/Directive.hs index 727a29658..00cb9762b 100644 --- a/test/Tests/Readers/Org/Directive.hs +++ b/test/Tests/Readers/Org/Directive.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Directive - Copyright : © 2014-2020 Albert Krewinkel + Copyright : © 2014-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <albert@zeitkraut.de> diff --git a/test/Tests/Readers/Org/Inline.hs b/test/Tests/Readers/Org/Inline.hs index b5d0caa26..13e9fef21 100644 --- a/test/Tests/Readers/Org/Inline.hs +++ b/test/Tests/Readers/Org/Inline.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Inline - Copyright : © 2014-2020 Albert Krewinkel + Copyright : © 2014-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <albert@zeitkraut.de> diff --git a/test/Tests/Readers/Org/Inline/Citation.hs b/test/Tests/Readers/Org/Inline/Citation.hs index 792e4559c..87bb3ca75 100644 --- a/test/Tests/Readers/Org/Inline/Citation.hs +++ b/test/Tests/Readers/Org/Inline/Citation.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Inline.Citation - Copyright : © 2014-2020 Albert Krewinkel + Copyright : © 2014-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <albert@zeitkraut.de> diff --git a/test/Tests/Readers/Org/Inline/Note.hs b/test/Tests/Readers/Org/Inline/Note.hs index 5924e69cc..20157d2ae 100644 --- a/test/Tests/Readers/Org/Inline/Note.hs +++ b/test/Tests/Readers/Org/Inline/Note.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Inline.Note - Copyright : © 2014-2020 Albert Krewinkel + Copyright : © 2014-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <albert@zeitkraut.de> diff --git a/test/Tests/Readers/Org/Inline/Smart.hs b/test/Tests/Readers/Org/Inline/Smart.hs index e9309108e..7fde380af 100644 --- a/test/Tests/Readers/Org/Inline/Smart.hs +++ b/test/Tests/Readers/Org/Inline/Smart.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Inline.Smart - Copyright : © 2014-2020 Albert Krewinkel + Copyright : © 2014-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <albert@zeitkraut.de> diff --git a/test/Tests/Readers/Org/Meta.hs b/test/Tests/Readers/Org/Meta.hs index bc167f2a5..b30b8949a 100644 --- a/test/Tests/Readers/Org/Meta.hs +++ b/test/Tests/Readers/Org/Meta.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Meta - Copyright : © 2014-2020 Albert Krewinkel + Copyright : © 2014-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <albert@zeitkraut.de> diff --git a/test/Tests/Readers/Org/Shared.hs b/test/Tests/Readers/Org/Shared.hs index 184adee44..4d0848575 100644 --- a/test/Tests/Readers/Org/Shared.hs +++ b/test/Tests/Readers/Org/Shared.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Shared - Copyright : © 2014-2020 Albert Krewinkel + Copyright : © 2014-2021 Albert Krewinkel License : GNU GPL, version 2 or above Maintainer : Albert Krewinkel <albert@zeitkraut.de> diff --git a/test/Tests/Readers/RST.hs b/test/Tests/Readers/RST.hs index bd9897ebc..68241b7f9 100644 --- a/test/Tests/Readers/RST.hs +++ b/test/Tests/Readers/RST.hs @@ -3,7 +3,7 @@ {-# LANGUAGE ScopedTypeVariables #-} {- | Module : Tests.Readers.RST - Copyright : © 2006-2020 John MacFarlane + Copyright : © 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> diff --git a/test/Tests/Readers/Txt2Tags.hs b/test/Tests/Readers/Txt2Tags.hs index 989b7f673..62f336690 100644 --- a/test/Tests/Readers/Txt2Tags.hs +++ b/test/Tests/Readers/Txt2Tags.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Txt2Tags - Copyright : © 2014-2020 John MacFarlane, + Copyright : © 2014-2021 John MacFarlane, © 2014 Matthew Pickering License : GNU GPL, version 2 or above diff --git a/test/Tests/Shared.hs b/test/Tests/Shared.hs index a23edf452..72a59fec0 100644 --- a/test/Tests/Shared.hs +++ b/test/Tests/Shared.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Shared - Copyright : © 2006-2020 John MacFarlane + Copyright : © 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley@edu> diff --git a/trypandoc/trypandoc.hs b/trypandoc/trypandoc.hs index 9383c92ce..9a53aa18c 100644 --- a/trypandoc/trypandoc.hs +++ b/trypandoc/trypandoc.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Main - Copyright : © 2014-2020 John MacFarlane <jgm@berkeley.edu> + Copyright : © 2014-2021 John MacFarlane <jgm@berkeley.edu> License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> -- cgit v1.2.3 From fe1378227b24fa6a8661b2e0d377b808eb270c52 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Sat, 9 Jan 2021 13:35:07 +0100 Subject: Org reader: allow multiple pipe chars in todo sequences Additional pipe chars, used to separate "action" state from "no further action" states, are ignored. E.g., for the following sequence, both `DONE` and `FINISHED` are states with no further action required. #+TODO: UNFINISHED | DONE | FINISHED Previously, parsing of the todo sequence failed if multiple pipe chars were included. Closes: #7014 --- src/Text/Pandoc/Readers/Org/Meta.hs | 14 ++++++++++---- test/Tests/Readers/Org/Meta.hs | 10 ++++++++++ 2 files changed, 20 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index 6621822a2..a1b21046a 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -239,7 +239,7 @@ lineOfInlines = do todoSequence :: Monad m => OrgParser m TodoSequence todoSequence = try $ do todoKws <- todoKeywords - doneKws <- optionMaybe $ todoDoneSep *> todoKeywords + doneKws <- optionMaybe $ todoDoneSep *> doneKeywords newline -- There must be at least one DONE keyword. The last TODO keyword is -- taken if necessary. @@ -250,11 +250,17 @@ todoSequence = try $ do (x:xs) -> return $ keywordsToSequence (reverse xs) [x] where + todoKeyword :: Monad m => OrgParser m Text + todoKeyword = many1Char nonspaceChar <* skipSpaces + todoKeywords :: Monad m => OrgParser m [Text] todoKeywords = try $ - let keyword = many1Char nonspaceChar <* skipSpaces - endOfKeywords = todoDoneSep <|> void newline - in manyTill keyword (lookAhead endOfKeywords) + let endOfKeywords = todoDoneSep <|> void newline + in manyTill todoKeyword (lookAhead endOfKeywords) + + doneKeywords :: Monad m => OrgParser m [Text] + doneKeywords = try $ + manyTill (todoKeyword <* optional todoDoneSep) (lookAhead newline) todoDoneSep :: Monad m => OrgParser m () todoDoneSep = void . try $ skipSpaces *> char '|' <* skipSpaces1 diff --git a/test/Tests/Readers/Org/Meta.hs b/test/Tests/Readers/Org/Meta.hs index b30b8949a..3c50f891b 100644 --- a/test/Tests/Readers/Org/Meta.hs +++ b/test/Tests/Readers/Org/Meta.hs @@ -116,6 +116,16 @@ tests = "#+LANGUAGE: de-DE" =?> Pandoc (setMeta "lang" (MetaString "de-DE") nullMeta) mempty + , testGroup "Todo sequences" + [ "not included in document" =: + "#+todo: WAITING | FINISHED" =?> + Pandoc mempty mempty + + , "can contain multiple pipe characters" =: + "#+todo: UNFINISHED | RESEARCH | NOTES | CHART\n" =?> + Pandoc mempty mempty + ] + , testGroup "LaTeX" [ "LATEX_HEADER" =: "#+latex_header: \\usepackage{tikz}" =?> -- cgit v1.2.3 From 15e33b33b4df67f989062a356e009ffc596bbc32 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 10 Jan 2021 10:15:30 -0800 Subject: T.P.Citeproc: refactor and export `getReferences`. See #7016. --- src/Text/Pandoc/Citeproc.hs | 79 +++++++++++++++++++++++++++++---------------- 1 file changed, 51 insertions(+), 28 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index e87ddcbcd..bd54ca2bf 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -5,7 +5,9 @@ {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedStrings #-} module Text.Pandoc.Citeproc - ( processCitations ) + ( processCitations, + getReferences + ) where import Citeproc @@ -46,7 +48,7 @@ import Safe (lastMay, initSafe) -- import Debug.Trace as Trace (trace, traceShowId) -processCitations :: PandocMonad m => Pandoc -> m Pandoc +processCitations :: PandocMonad m => Pandoc -> m Pandoc processCitations (Pandoc meta bs) = do let cslfile = (lookupMeta "csl" meta <|> lookupMeta "citation-style" meta) >>= metaValueToText @@ -96,31 +98,9 @@ processCitations (Pandoc meta bs) = do ((lookupMeta "lang" meta <|> lookupMeta "locale" meta) >>= metaValueToText) let locale = Citeproc.mergeLocales mblang style - let getCiteId (Cite cs _) = Set.fromList $ map B.citationId cs - getCiteId _ = mempty - let metanocites = lookupMeta "nocite" meta - let meta' = deleteMeta "nocite" meta - let nocites = maybe mempty (query getCiteId) metanocites - let citeIds = query getCiteId (Pandoc meta bs) - let idpred = if "*" `Set.member` nocites - then const True - else (`Set.member` citeIds) - let inlineRefs = case lookupMeta "references" meta of - Just (MetaList rs) -> mapMaybe metaValueToReference rs - _ -> [] - externalRefs <- case lookupMeta "bibliography" meta of - Just (MetaList xs) -> - mconcat <$> - mapM (getRefsFromBib locale idpred) - (mapMaybe metaValueToText xs) - Just x -> - case metaValueToText x of - Just fp -> getRefsFromBib locale idpred fp - Nothing -> return [] - Nothing -> return [] - let refs = map (linkifyVariables . legacyDateRanges) - (externalRefs ++ inlineRefs) - -- note that inlineRefs can override externalRefs + + refs <- getReferences (Just locale) (Pandoc meta bs) + let otherIdsMap = foldr (\ref m -> case T.words . extractText <$> M.lookup "other-ids" @@ -130,8 +110,10 @@ processCitations (Pandoc meta bs) = do (\id' -> M.insert id' (referenceId ref)) m ids) M.empty refs - -- TODO: issue warning if no refs defined + let meta' = deleteMeta "nocite" meta let citations = getCitations locale otherIdsMap $ Pandoc meta' bs + + let linkCites = maybe False truish $ lookupMeta "link-citations" meta let opts = defaultCiteprocOptions{ linkCitations = linkCites } let result = Citeproc.citeproc opts style (localeLanguage locale) @@ -161,6 +143,7 @@ processCitations (Pandoc meta bs) = do B.toList . movePunctuationInsideQuotes . B.fromList _ -> id + let metanocites = lookupMeta "nocite" meta let Pandoc meta'' bs' = maybe id (setMeta "nocite") metanocites . walk (map capitalizeNoteCitation . @@ -172,6 +155,46 @@ processCitations (Pandoc meta bs) = do $ insertRefs refkvs classes meta'' (walk fixLinks $ B.toList bibs) bs' +-- | Get references defined inline in the metadata and via an external +-- bibliography. Only references that are actually cited in the document +-- (either with a genuine citation or with `nocite`) are returned. +-- URL variables are converted to links. +getReferences :: PandocMonad m + => Maybe Locale -> Pandoc -> m [Reference Inlines] +getReferences mblocale (Pandoc meta bs) = do + let lang = maybe (Lang "en" (Just "US")) (parseLang . stringify) $ + lookupMeta "lang" meta + let locale = case mblocale of + Just l -> l + Nothing -> either mempty id $ getLocale lang + + let getCiteId (Cite cs _) = Set.fromList $ map B.citationId cs + getCiteId _ = mempty + let metanocites = lookupMeta "nocite" meta + let nocites = maybe mempty (query getCiteId) metanocites + let citeIds = query getCiteId (Pandoc meta bs) + let idpred = if "*" `Set.member` nocites + then const True + else (`Set.member` citeIds) + let inlineRefs = case lookupMeta "references" meta of + Just (MetaList rs) -> mapMaybe metaValueToReference rs + _ -> [] + externalRefs <- case lookupMeta "bibliography" meta of + Just (MetaList xs) -> + mconcat <$> + mapM (getRefsFromBib locale idpred) + (mapMaybe metaValueToText xs) + Just x -> + case metaValueToText x of + Just fp -> getRefsFromBib locale idpred fp + Nothing -> return [] + Nothing -> return [] + return $ map (linkifyVariables . legacyDateRanges) + (externalRefs ++ inlineRefs) + -- note that inlineRefs can override externalRefs + + + -- If we have a span.csl-left-margin followed by span.csl-right-inline, -- we insert a space. This ensures that they will be separated by a space, -- even in formats that don't have special handling for the display spans. -- cgit v1.2.3 From 402d984bc53773e1876000d0d9857b053c002904 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 10 Jan 2021 10:25:58 -0800 Subject: T.P.Citeproc: factor out getLang. --- src/Text/Pandoc/Citeproc.hs | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index bd54ca2bf..e81f93cdd 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -94,9 +94,8 @@ processCitations (Pandoc meta bs) = do case styleRes of Left err -> throwError $ PandocAppError $ prettyCiteprocError err Right style -> return style{ styleAbbreviations = mbAbbrevs } - mblang <- maybe (return Nothing) bcp47LangToIETF - ((lookupMeta "lang" meta <|> lookupMeta "locale" meta) >>= - metaValueToText) + + mblang <- getLang meta let locale = Citeproc.mergeLocales mblang style refs <- getReferences (Just locale) (Pandoc meta bs) @@ -155,6 +154,12 @@ processCitations (Pandoc meta bs) = do $ insertRefs refkvs classes meta'' (walk fixLinks $ B.toList bibs) bs' +-- Retrieve citeproc lang based on metadata. +getLang :: PandocMonad m => Meta -> m (Maybe Lang) +getLang meta = maybe (return Nothing) bcp47LangToIETF + ((lookupMeta "lang" meta <|> lookupMeta "locale" meta) >>= + metaValueToText) + -- | Get references defined inline in the metadata and via an external -- bibliography. Only references that are actually cited in the document -- (either with a genuine citation or with `nocite`) are returned. @@ -162,11 +167,13 @@ processCitations (Pandoc meta bs) = do getReferences :: PandocMonad m => Maybe Locale -> Pandoc -> m [Reference Inlines] getReferences mblocale (Pandoc meta bs) = do - let lang = maybe (Lang "en" (Just "US")) (parseLang . stringify) $ - lookupMeta "lang" meta - let locale = case mblocale of - Just l -> l - Nothing -> either mempty id $ getLocale lang + locale <- case mblocale of + Just l -> return l + Nothing -> do + mblang <- getLang meta + case mblang of + Just lang -> return $ either mempty id $ getLocale lang + Nothing -> return mempty let getCiteId (Cite cs _) = Set.fromList $ map B.citationId cs getCiteId _ = mempty -- cgit v1.2.3 From d98ec4feb80ae0a9bf192c9b14aaef033ba7fd6e Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 10 Jan 2021 11:48:53 -0800 Subject: T.P.Citeproc: factor out and export `getStyle`. --- src/Text/Pandoc/Citeproc.hs | 100 ++++++++++++++++++++++++-------------------- 1 file changed, 55 insertions(+), 45 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index e81f93cdd..9649c6971 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -6,7 +6,8 @@ {-# LANGUAGE OverloadedStrings #-} module Text.Pandoc.Citeproc ( processCitations, - getReferences + getReferences, + getStyle ) where @@ -50,50 +51,7 @@ import Safe (lastMay, initSafe) processCitations :: PandocMonad m => Pandoc -> m Pandoc processCitations (Pandoc meta bs) = do - let cslfile = (lookupMeta "csl" meta <|> lookupMeta "citation-style" meta) - >>= metaValueToText - - let getFile defaultExtension fp = do - oldRp <- getResourcePath - mbUdd <- getUserDataDir - setResourcePath $ oldRp ++ maybe [] - (\u -> [u <> "/csl", - u <> "/csl/dependent"]) mbUdd - let fp' = if T.any (=='.') fp || "data:" `T.isPrefixOf` fp - then fp - else fp <> defaultExtension - (result, _) <- fetchItem fp' - setResourcePath oldRp - return result - - let getCslDefault = readDataFile "default.csl" - - cslContents <- UTF8.toText <$> maybe getCslDefault (getFile ".csl") cslfile - - let abbrevFile = lookupMeta "citation-abbreviations" meta >>= metaValueToText - - mbAbbrevs <- case abbrevFile of - Nothing -> return Nothing - Just fp -> do - rawAbbr <- getFile ".json" fp - case eitherDecode (L.fromStrict rawAbbr) of - Left err -> throwError $ PandocCiteprocError $ - CiteprocParseError $ - "Could not parse abbreviations file " <> fp - <> "\n" <> T.pack err - Right abbr -> return $ Just abbr - - let getParentStyle url = do - -- first, try to retrieve the style locally, then use HTTP. - let basename = T.takeWhileEnd (/='/') url - UTF8.toText <$> - catchError (getFile ".csl" basename) (\_ -> fst <$> fetchItem url) - - styleRes <- Citeproc.parseStyle getParentStyle cslContents - style <- - case styleRes of - Left err -> throwError $ PandocAppError $ prettyCiteprocError err - Right style -> return style{ styleAbbreviations = mbAbbrevs } + style <- getStyle (Pandoc meta bs) mblang <- getLang meta let locale = Citeproc.mergeLocales mblang style @@ -154,6 +112,58 @@ processCitations (Pandoc meta bs) = do $ insertRefs refkvs classes meta'' (walk fixLinks $ B.toList bibs) bs' +-- | Retrieve the CSL style specified by the csl or citation-style +-- metadata field in a pandoc document, or the default CSL style +-- if none is specified. Retrieve the parent style +-- if the style is a dependent style. Add abbreviations defined +-- in an abbreviation file if one has been specified. +getStyle :: PandocMonad m => Pandoc -> m (Style Inlines) +getStyle (Pandoc meta _) = do + let cslfile = (lookupMeta "csl" meta <|> lookupMeta "citation-style" meta) + >>= metaValueToText + + let getFile defaultExtension fp = do + oldRp <- getResourcePath + mbUdd <- getUserDataDir + setResourcePath $ oldRp ++ maybe [] + (\u -> [u <> "/csl", + u <> "/csl/dependent"]) mbUdd + let fp' = if T.any (=='.') fp || "data:" `T.isPrefixOf` fp + then fp + else fp <> defaultExtension + (result, _) <- fetchItem fp' + setResourcePath oldRp + return result + + let getCslDefault = readDataFile "default.csl" + + cslContents <- UTF8.toText <$> maybe getCslDefault (getFile ".csl") cslfile + + let abbrevFile = lookupMeta "citation-abbreviations" meta >>= metaValueToText + + mbAbbrevs <- case abbrevFile of + Nothing -> return Nothing + Just fp -> do + rawAbbr <- getFile ".json" fp + case eitherDecode (L.fromStrict rawAbbr) of + Left err -> throwError $ PandocCiteprocError $ + CiteprocParseError $ + "Could not parse abbreviations file " <> fp + <> "\n" <> T.pack err + Right abbr -> return $ Just abbr + + let getParentStyle url = do + -- first, try to retrieve the style locally, then use HTTP. + let basename = T.takeWhileEnd (/='/') url + UTF8.toText <$> + catchError (getFile ".csl" basename) (\_ -> fst <$> fetchItem url) + + styleRes <- Citeproc.parseStyle getParentStyle cslContents + case styleRes of + Left err -> throwError $ PandocAppError $ prettyCiteprocError err + Right style -> return style{ styleAbbreviations = mbAbbrevs } + + -- Retrieve citeproc lang based on metadata. getLang :: PandocMonad m => Meta -> m (Maybe Lang) getLang meta = maybe (return Nothing) bcp47LangToIETF -- cgit v1.2.3 From e741c7f5533d60971e98c6ced477d998335ef0b5 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 10 Jan 2021 12:49:53 -0800 Subject: Fix infinite HTTP requests when writing epubs from URL source. Due to a bug in code added to avoid overwriting the cover image if it had the form `fileX.YYY`, pandoc made an endless sequence of HTTP requests when writing epub with input from a URL. Closes #7013. --- src/Text/Pandoc/Writers/EPUB.hs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 3e3fd8fd6..783f190f5 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -465,7 +465,12 @@ pandocToEPUB version opts doc = do case epubCoverImage metadata of Nothing -> return ([],[]) Just img -> do - let coverImage = takeFileName img + let fp = takeFileName img + mediaPaths <- gets (map (fst . snd) . stMediaPaths) + coverImageName <- -- see #4206 + if ("media/" <> fp) `elem` mediaPaths + then getMediaNextNewName (takeExtension fp) + else return fp imgContent <- lift $ P.readFileLazy img (coverImageWidth, coverImageHeight) <- case imageSize opts' (B.toStrict imgContent) of @@ -478,7 +483,7 @@ pandocToEPUB version opts doc = do ("coverpage", toVal' "true"), ("pagetitle", toVal $ escapeStringForXML $ TS.pack plainTitle), - ("cover-image", toVal' coverImage), + ("cover-image", toVal' coverImageName), ("cover-image-width", toVal' $ show coverImageWidth), ("cover-image-height", toVal' $ @@ -486,7 +491,7 @@ pandocToEPUB version opts doc = do cssvars True <> vars } (Pandoc meta []) coverEntry <- mkEntry "text/cover.xhtml" cpContent - coverImageEntry <- mkEntry ("media/" ++ coverImage) + coverImageEntry <- mkEntry ("media/" ++ coverImageName) imgContent return ( [ coverEntry ] , [ coverImageEntry ] ) @@ -1076,8 +1081,7 @@ getMediaNextNewName :: PandocMonad m => String -> E m String getMediaNextNewName ext = do nextId <- gets stMediaNextId modify $ \st -> st { stMediaNextId = nextId + 1 } - let nextName = "file" ++ show nextId ++ ext - (P.fetchItem (TS.pack nextName) >> getMediaNextNewName ext) `catchError` const (return nextName) + return $ "file" ++ show nextId ++ ext isHtmlFormat :: Format -> Bool isHtmlFormat (Format "html") = True -- cgit v1.2.3 From 68fa43799963fff11a980d5d3959184c3d34a723 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Mon, 11 Jan 2021 00:35:48 +0100 Subject: JATS writer: fix citations (#7018) * JATS writer: keep code lines at 80 chars or below * JATS writer: fix citations --- COPYRIGHT | 11 +-- data/jats.csl | 203 ---------------------------------------- pandoc.cabal | 2 - src/Text/Pandoc/App.hs | 16 +--- src/Text/Pandoc/Writers/JATS.hs | 33 ++++--- test/command/7016.md | 48 ++++++++++ 6 files changed, 72 insertions(+), 241 deletions(-) delete mode 100644 data/jats.csl create mode 100644 test/command/7016.md (limited to 'src') diff --git a/COPYRIGHT b/COPYRIGHT index 9992e5680..a6e3a897c 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -181,7 +181,7 @@ http://github.com/paulrouget/dzslides Released under the Do What the Fuck You Want To Public License. ------------------------------------------------------------------------ -Pandoc embeds a lua interpreter (via hslua). +Pandoc embeds a Lua interpreter (via hslua). Copyright © 1994–2020 Lua.org, PUC-Rio. @@ -203,12 +203,3 @@ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - ------------------------------------------------------------------------- -The template pandoc.jats is Copyright 2013--2015 Martin Fenner, -released under GPL version 2 or later. - -The file data/jats.csl is derived from a csl file by Martin Fenner, -revised by Martin Paul Eve and then John MacFarlane. -"This work is licensed under a Creative Commons Attribution-ShareAlike 3.0 -License. Originally by Martin Fenner." diff --git a/data/jats.csl b/data/jats.csl deleted file mode 100644 index 6972cb3f8..000000000 --- a/data/jats.csl +++ /dev/null @@ -1,203 +0,0 @@ -<?xml version="1.0" encoding="utf-8"?> -<style xmlns="http://purl.org/net/xbiblio/csl" class="in-text" version="1.0" default-locale="en-US"> - <info> - <title>Journal Article Tag Suite</title> - <title-short>JATS</title-short> - <id>http://www.zotero.org/styles/journal-article-tag-suite</id> - <link href="https://github.com/MartinPaulEve/JATS-CSL/blob/master/jats.csl" rel="self"/> - <link rel="documentation" href="http://jats.nlm.nih.gov/archiving/tag-library/1.0/index.html"/> - <author> - <name>Martin Paul Eve</name> - <email>martin@martineve.com</email> - </author> - <category citation-format="numeric"/> - <category field="medicine"/> - <category field="biology"/> - <summary>Use this style to generate bibliographic data in Journal Article Tagging Suite (JATS) 1.0 XML format</summary> - <updated>2014-06-21T17:41:26+00:00</updated> - <rights license="http://creativecommons.org/licenses/by-sa/3.0/">This work is licensed under a Creative Commons Attribution-ShareAlike 3.0 License. Originally by Martin Fenner.</rights> - </info> - <locale xml:lang="en"> - <terms> - <term name="et-al">{{jats}}<etal/>{{/jats}}</term> - </terms> - </locale> - <macro name="citation-number"> - <text variable="citation-number" prefix="{{jats}}id="ref-{{/jats}}" suffix="{{jats}}">{{/jats}}"/> - </macro> - <macro name="author"> - <names variable="author" delimiter=" "> - <name prefix="{{jats}}<name>{{/jats}}" suffix="{{jats}}</name>{{/jats}}" name-as-sort-order="all" sort-separator=" "> - <name-part name="family" text-case="capitalize-first" prefix="{{jats}}<surname>{{/jats}}" suffix="{{jats}}</surname>{{/jats}}"/> - <name-part name="given" text-case="capitalize-first" prefix="{{jats}}<given-names>{{/jats}}" suffix="{{jats}}</given-names>{{/jats}}"/> - </name> - <substitute> - <names variable="editor"/> - </substitute> - </names> - </macro> - - <macro name="editor" delimiter=" "> - <names variable="editor" prefix="{{jats}}<person-group person-group-type="editor">{{/jats}}" suffix="{{jats}}</person-group>{{/jats}}"> - <name prefix="{{jats}}<name>{{/jats}}" suffix="{{jats}}</name>{{/jats}}" name-as-sort-order="all" sort-separator=" "> - <name-part name="family" text-case="capitalize-first" prefix="{{jats}}<surname>{{/jats}}" suffix="{{jats}}</surname>{{/jats}}"/> - <name-part name="given" text-case="capitalize-first" prefix="{{jats}}<given-names>{{/jats}}" suffix="{{jats}}</given-names>{{/jats}}"/> - </name> - <substitute> - <names variable="editor"/> - </substitute> - </names> - </macro> - - <macro name="editor"> - <group delimiter=": "> - <names variable="editor"> - <name prefix="{{jats}}<name>{{/jats}}" suffix="{{jats}}</name>{{/jats}}" name-as-sort-order="all" sort-separator=""> - <name-part name="family" text-case="capitalize-first" prefix="{{jats}}<surname>{{/jats}}" suffix="{{jats}}</surname>{{/jats}}"/> - <name-part name="given" text-case="capitalize-first" prefix="{{jats}}<given-names>{{/jats}}" suffix="{{jats}}<given-names>{{/jats}}"/> - </name> - </names> - </group> - </macro> - <macro name="title"> - <choose> - <if type="book" match="any"> - <group prefix="{{jats}}<source>{{/jats}}" suffix="{{jats}}</source>{{/jats}}"> - <text variable="title"/> - </group> - </if> - <else> - <group prefix="{{jats}}<article-title>{{/jats}}" suffix="{{jats}}</article-title>{{/jats}}"> - <text variable="title"/> - </group> - </else> - </choose> - </macro> - <macro name="container-title"> - <text variable="container-title" form="short" prefix="{{jats}}<source>{{/jats}}" suffix="{{jats}}</source>{{/jats}}"/> - </macro> - <macro name="publisher"> - <text variable="publisher" prefix="{{jats}}<publisher-name>{{/jats}}" suffix="{{jats}}</publisher-name>{{/jats}}"/> - <text variable="publisher-place" prefix="{{jats}}<publisher-loc>{{/jats}}" suffix="{{jats}}</publisher-loc>{{/jats}}"/> - </macro> - <macro name="link"> - <choose> - <if match="any" variable="DOI"> - <group prefix="{{jats}}<pub-id pub-id-type="doi">{{/jats}}" suffix="{{jats}}</pub-id>{{/jats}}"> - <text variable="DOI"/> - </group> - </if> - </choose> - <choose> - <if match="any" variable="PMID"> - <group prefix="{{jats}}<pub-id pub-id-type="pmid">{{/jats}}" suffix="{{jats}}</pub-id>{{/jats}}"> - <text variable="PMID"/> - </group> - </if> - </choose> - <choose> - <if variable="URL" match="any"> - <text variable="URL" /> - </if> - </choose> - </macro> - <macro name="date"> - <choose> - <if type="article-journal article-magazine article-newspaper report patent book" match="any"> - <group prefix="{{jats}}<date>{{/jats}}" suffix="{{jats}}</date>{{/jats}}"> - <date variable="issued"> - <date-part name="day" form="numeric-leading-zeros" prefix="{{jats}}<day>{{/jats}}" suffix="{{jats}}</day>{{/jats}}"/> - <date-part name="month" form="numeric-leading-zeros" prefix="{{jats}}<month>{{/jats}}" suffix="{{jats}}</month>{{/jats}}"/> - <date-part name="year" prefix="{{jats}}<year>{{/jats}}" suffix="{{jats}}</year>{{/jats}}"/> - </date> - </group> - </if> - <else> - <group prefix="{{jats}}<date-in-citation content-type="access-date"{{/jats}}" suffix="{{jats}}</date-in-citation>{{/jats}}"> - <date variable="accessed" prefix="{{jats}} iso-8601-date="{{/jats}}" suffix="{{jats}}">{{/jats}}"> - <date-part name="year"/> - <date-part name="month" form="numeric-leading-zeros" prefix="{{jats}}-{{/jats}}"/> - <date-part name="day" form="numeric-leading-zeros" prefix="{{jats}}-{{/jats}}"/> - </date> - <date variable="accessed"> - <date-part name="day" prefix="{{jats}}<day>{{/jats}}" suffix="{{jats}}</day>{{/jats}}"/> - <date-part name="month" form="numeric-leading-zeros" prefix="{{jats}}<month>{{/jats}}" suffix="{{jats}}</month>{{/jats}}"/> - <date-part name="year" prefix="{{jats}}<year>{{/jats}}" suffix="{{jats}}</year>{{/jats}}"/> - </date> - </group> - </else> - </choose> - </macro> - <macro name="location"> - <choose> - <if type="article-journal article-magazine" match="any"> - <text variable="volume" prefix="{{jats}}<volume>{{/jats}}" suffix="{{jats}}</volume>{{/jats}}"/> - <text variable="issue" prefix="{{jats}}<issue>{{/jats}}" suffix="{{jats}}</issue>{{/jats}}"/> - </if> - </choose> - <choose> - <if type="article-journal article-magazine article-newspaper chapter" match="any"> - <text variable="page-first" prefix="{{jats}}<fpage>{{/jats}}" suffix="{{jats}}</fpage>{{/jats}}"/> - </if> - </choose> - </macro> - <macro name="publication-type"> - <group prefix="{{jats}} publication-type="{{/jats}}" suffix="{{jats}}">{{/jats}}"> - <choose> - <if type="article-journal article-magazine article-newspaper" match="any"> - <text value="journal"/> - </if> - <else-if type="book" match="any"> - <text value="book"/> - </else-if> - <else-if type="chapter" match="any"> - <text value="bookchapter"/> - </else-if> - <else-if type="dataset" match="any"> - <text value="dataset"/> - </else-if> - <else-if type="patent" match="any"> - <text value="patent"/> - </else-if> - <else-if type="report" match="any"> - <text value="report"/> - </else-if> - <else-if type="review" match="any"> - <text value="review"/> - </else-if> - <else> - <text value="standard"/> - </else> - </choose> - </group> - </macro> - <citation collapse="citation-number"> - <sort> - <key variable="citation-number"/> - </sort> - <layout delimiter=","> - <group prefix="{{jats}}<xref ref-type="bibr" rid="{{/jats}}" suffix="{{jats}}</xref>{{/jats}}"> - <text variable="citation-number" prefix="{{jats}}ref-{{/jats}}" suffix="{{jats}}">{{/jats}}"/> - <text variable="citation-number"/> - </group> - </layout> - </citation> - <bibliography sort-separator=""> - <layout> - <group prefix="{{jats}}<ref {{/jats}}" suffix="{{jats}}</ref>{{/jats}}"> - <text macro="citation-number"/> - <group prefix="{{jats}}<element-citation{{/jats}}" suffix="{{jats}}</element-citation>{{/jats}}"> - <text macro="publication-type"/> - <text macro="author" prefix="{{jats}}<person-group person-group-type="author">{{/jats}}" suffix="{{jats}}</person-group>{{/jats}}"/> - <text macro="title" /> - <text macro="container-title"/> - <text macro="editor"/> - <text macro="publisher"/> - <text macro="date"/> - <text macro="location"/> - <text macro="link"/> - </group> - </group> - </layout> - </bibliography> -</style> diff --git a/pandoc.cabal b/pandoc.cabal index ede9af6f0..db8dab491 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -180,8 +180,6 @@ data-files: data/pandoc.List.lua -- bash completion template data/bash_completion.tpl - -- jats csl - data/jats.csl -- citeproc data/default.csl citeproc/biblatex-localization/*.lbx.strings diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 725c76424..437af3257 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -50,10 +50,9 @@ import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts, import Text.Pandoc.App.CommandLineOptions (parseOptions, options) import Text.Pandoc.App.OutputSettings (OutputSettings (..), optToOutputSettings) import Text.Pandoc.BCP47 (Lang (..), parseBCP47) -import Text.Pandoc.Builder (setMeta) import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters) import Text.Pandoc.PDF (makePDF) -import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained) +import Text.Pandoc.SelfContained (makeSelfContained) import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs, headerShift, isURI, tabFilter, uriPathToPath, filterIpynbOutput, defaultUserDataDirs, tshow, findM) @@ -190,17 +189,6 @@ convertWithOpts opts = do Nothing -> readDataFile "abbreviations" Just f -> readFileStrict f - metadata <- if format == "jats" && - isNothing (lookupMeta "csl" (optMetadata opts)) && - isNothing (lookupMeta "citation-style" - (optMetadata opts)) - then do - jatsCSL <- readDataFile "jats.csl" - let jatsEncoded = makeDataURI - ("application/xml", jatsCSL) - return $ setMeta "csl" jatsEncoded $ optMetadata opts - else return $ optMetadata opts - case lookupMetaString "lang" (optMetadata opts) of "" -> setTranslations $ Lang "en" "" "US" [] l -> case parseBCP47 l of @@ -286,7 +274,7 @@ convertWithOpts opts = do then fillMediaBag else return) >=> return . adjustMetadata (metadataFromFile <>) - >=> return . adjustMetadata (<> metadata) + >=> return . adjustMetadata (<> optMetadata opts) >=> applyTransforms transforms >=> applyFilters readerOpts filters [T.unpack format] >=> maybe return extractMedia (optExtractMedia opts) diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index e8d93b8d5..b2266d179 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -3,7 +3,7 @@ {-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.JATS - Copyright : Copyright (C) 2017-2021 John MacFarlane + Copyright : 2017-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -168,13 +168,15 @@ plainToPara x = x -- | Convert a list of pairs of terms and definitions into a list of -- JATS varlistentrys. deflistItemsToJATS :: PandocMonad m - => WriterOptions -> [([Inline],[[Block]])] -> JATS m (Doc Text) + => WriterOptions + -> [([Inline],[[Block]])] -> JATS m (Doc Text) deflistItemsToJATS opts items = vcat <$> mapM (uncurry (deflistItemToJATS opts)) items -- | Convert a term and a list of blocks into a JATS varlistentry. deflistItemToJATS :: PandocMonad m - => WriterOptions -> [Inline] -> [[Block]] -> JATS m (Doc Text) + => WriterOptions + -> [Inline] -> [[Block]] -> JATS m (Doc Text) deflistItemToJATS opts term defs = do term' <- inlinesToJATS opts term def' <- wrappedBlocksToJATS (not . isPara) @@ -186,7 +188,8 @@ deflistItemToJATS opts term defs = do -- | Convert a list of lists of blocks to a list of JATS list items. listItemsToJATS :: PandocMonad m - => WriterOptions -> Maybe [Text] -> [[Block]] -> JATS m (Doc Text) + => WriterOptions + -> Maybe [Text] -> [[Block]] -> JATS m (Doc Text) listItemsToJATS opts markers items = case markers of Nothing -> vcat <$> mapM (listItemToJATS opts Nothing) items @@ -194,12 +197,13 @@ listItemsToJATS opts markers items = -- | Convert a list of blocks into a JATS list item. listItemToJATS :: PandocMonad m - => WriterOptions -> Maybe Text -> [Block] -> JATS m (Doc Text) + => WriterOptions + -> Maybe Text -> [Block] -> JATS m (Doc Text) listItemToJATS opts mbmarker item = do contents <- wrappedBlocksToJATS (not . isParaOrList) opts (walk demoteHeaderAndRefs item) return $ inTagsIndented "list-item" $ - maybe empty (\lbl -> inTagsSimple "label" (text $ T.unpack lbl)) mbmarker + maybe empty (inTagsSimple "label" . text . T.unpack) mbmarker $$ contents imageMimeType :: Text -> [(Text, Text)] -> (Text, Text) @@ -247,7 +251,9 @@ blockToJATS opts (Div (id',"section":_,kvs) (Header _lvl _ ils : xs)) = do return $ inTags True "sec" attribs $ inTagsSimple "title" title' $$ contents -- Bibliography reference: -blockToJATS opts (Div (T.stripPrefix "ref-" -> Just _,_,_) [Para lst]) = +blockToJATS opts (Div (ident,_,_) [Para lst]) | "ref-" `T.isPrefixOf` ident = + inTags True "ref" [("id", ident)] . + inTagsSimple "mixed-citation" <$> inlinesToJATS opts lst blockToJATS opts (Div ("refs",_,_) xs) = do contents <- blocksToJATS opts xs @@ -470,10 +476,13 @@ inlineToJATS _ (Link _attr [Str t] (T.stripPrefix "mailto:" -> Just email, _)) | escapeURI t == email = return $ inTagsSimple "email" $ literal (escapeStringForXML email) inlineToJATS opts (Link (ident,_,kvs) txt (T.uncons -> Just ('#', src), _)) = do - let attr = [("id", ident) | not (T.null ident)] ++ - [("alt", stringify txt) | not (null txt)] ++ - [("rid", src)] ++ - [(k,v) | (k,v) <- kvs, k `elem` ["ref-type", "specific-use"]] + let attr = mconcat + [ [("id", ident) | not (T.null ident)] + , [("alt", stringify txt) | not (null txt)] + , [("rid", src)] + , [(k,v) | (k,v) <- kvs, k `elem` ["ref-type", "specific-use"]] + , [("ref-type", "bibr") | "ref-" `T.isPrefixOf` src] + ] if null txt then return $ selfClosingTag "xref" attr else do @@ -529,7 +538,7 @@ demoteHeaderAndRefs (Div ("refs",cls,kvs) bs) = demoteHeaderAndRefs x = x parseDate :: Text -> Maybe Day -parseDate s = msum (map (\fs -> parsetimeWith fs $ T.unpack s) formats) :: Maybe Day +parseDate s = msum (map (`parsetimeWith` T.unpack s) formats) where parsetimeWith = parseTimeM True defaultTimeLocale formats = ["%x","%m/%d/%Y", "%D","%F", "%d %b %Y", "%e %B %Y", "%b. %e, %Y", "%B %e, %Y", diff --git a/test/command/7016.md b/test/command/7016.md new file mode 100644 index 000000000..c2d791ce9 --- /dev/null +++ b/test/command/7016.md @@ -0,0 +1,48 @@ +``` +% pandoc --citeproc --to=jats_archiving --standalone +--- +csl: command/apa.csl +references: +- id: doe + type: article + author: + - family: Doe + given: Jane + container-title: Proceedings of the Academy of Test Inputs + doi: 10.x/nope + issued: 2021 + title: Another article +... +Blah [@doe]. +^D +<?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.2 20190208//EN" + "JATS-archivearticle1.dtd"> +<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="1.2" article-type="other"> +<front> +<journal-meta> +<journal-id></journal-id> +<journal-title-group> +</journal-title-group> +<issn></issn> +<publisher> +<publisher-name></publisher-name> +</publisher> +</journal-meta> +<article-meta> +</article-meta> +</front> +<body> +<p>Blah (Doe, 2021).</p> +</body> +<back> +<ref-list> + <ref id="ref-doe"> + <mixed-citation>Doe, J. (2021). Another article. <italic>Proceedings + of the Academy of Test Inputs</italic>. + doi:<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.x/nope">10.x/nope</ext-link></mixed-citation> + </ref> +</ref-list> +</back> +</article> +``` -- cgit v1.2.3 From c451207b08edc36fa5c2f1af5556a8d211e023ed Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 12 Jan 2021 09:49:10 -0800 Subject: Docx writer: handle table header using styles. Instead of hard-coding the border and header cell vertical alignment, we now let this be determined by the Table style, making use of Word's "conditional formatting" for the table's first row. For headerless tables, we use the tblLook element to tell Word not to apply conditional first-row formatting. Closes #7008. --- data/docx/word/styles.xml | 15 +++++++++ src/Text/Pandoc/Writers/Docx.hs | 37 +++++++++++---------- test/docx/golden/block_quotes.docx | Bin 10001 -> 10092 bytes test/docx/golden/codeblock.docx | Bin 9853 -> 9944 bytes test/docx/golden/comments.docx | Bin 10188 -> 10279 bytes test/docx/golden/custom_style_no_reference.docx | Bin 9952 -> 10042 bytes test/docx/golden/custom_style_preserve.docx | Bin 10578 -> 10666 bytes test/docx/golden/definition_list.docx | Bin 9850 -> 9941 bytes .../golden/document-properties-short-desc.docx | Bin 9856 -> 9947 bytes test/docx/golden/document-properties.docx | Bin 10332 -> 10423 bytes test/docx/golden/headers.docx | Bin 9989 -> 10080 bytes test/docx/golden/image.docx | Bin 26667 -> 26758 bytes test/docx/golden/inline_code.docx | Bin 9789 -> 9880 bytes test/docx/golden/inline_formatting.docx | Bin 9969 -> 10060 bytes test/docx/golden/inline_images.docx | Bin 26725 -> 26816 bytes test/docx/golden/link_in_notes.docx | Bin 10010 -> 10101 bytes test/docx/golden/links.docx | Bin 10185 -> 10276 bytes test/docx/golden/lists.docx | Bin 10261 -> 10352 bytes test/docx/golden/lists_continuing.docx | Bin 10052 -> 10143 bytes test/docx/golden/lists_multiple_initial.docx | Bin 10141 -> 10232 bytes test/docx/golden/lists_restarting.docx | Bin 10053 -> 10144 bytes test/docx/golden/nested_anchors_in_header.docx | Bin 10148 -> 10239 bytes test/docx/golden/notes.docx | Bin 9955 -> 10046 bytes test/docx/golden/raw-blocks.docx | Bin 9888 -> 9980 bytes test/docx/golden/raw-bookmarks.docx | Bin 10023 -> 10115 bytes test/docx/golden/table_one_row.docx | Bin 9834 -> 9932 bytes test/docx/golden/table_with_list_cell.docx | Bin 10199 -> 10249 bytes test/docx/golden/tables.docx | Bin 10225 -> 10266 bytes test/docx/golden/track_changes_deletion.docx | Bin 9833 -> 9924 bytes test/docx/golden/track_changes_insertion.docx | Bin 9816 -> 9907 bytes test/docx/golden/track_changes_move.docx | Bin 9850 -> 9941 bytes .../golden/track_changes_scrubbed_metadata.docx | Bin 9962 -> 10053 bytes test/docx/golden/unicode.docx | Bin 9774 -> 9865 bytes test/docx/golden/verbatim_subsuper.docx | Bin 9822 -> 9913 bytes 34 files changed, 35 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/data/docx/word/styles.xml b/data/docx/word/styles.xml index 6bb5a3f52..832b1b25b 100644 --- a/data/docx/word/styles.xml +++ b/data/docx/word/styles.xml @@ -350,6 +350,21 @@ <w:right w:w="108" w:type="dxa" /> </w:tblCellMar> </w:tblPr> + <w:tblStylePr w:type="firstRow"> + <w:tblPr> + <w:jc w:val="left"/> + <w:tblInd w:w="0" w:type="dxa"/> + </w:tblPr> + <w:trPr> + <w:jc w:val="left"/> + </w:trPr> + <w:tcPr> + <w:vAlign w:val="bottom"/> + <w:tcBorders> + <w:bottom w:val="single"/> + </w:tcBorders> + </w:tcPr> + </w:tblStylePr> </w:style> <w:style w:type="paragraph" w:customStyle="1" w:styleId="DefinitionTerm"> <w:name w:val="Definition Term" /> diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index a99e13a85..8f498775d 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -1023,23 +1023,15 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do _ -> es ++ [Elem $ mknode "w:p" [] ()] headers' <- mapM cellToOpenXML $ zip aligns headers rows' <- mapM (mapM cellToOpenXML . zip aligns) rows - let borderProps = Elem $ mknode "w:tcPr" [] - [ mknode "w:tcBorders" [] - $ mknode "w:bottom" [("w:val","single")] () - , mknode "w:vAlign" [("w:val","bottom")] () ] compactStyle <- pStyleM "Compact" let emptyCell' = [Elem $ mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]] - let mkcell border contents = mknode "w:tc" [] - $ [ borderProps | border ] ++ - if null contents - then emptyCell' - else contents - let mkrow border cells = + let mkcell contents = mknode "w:tc" [] + $ if null contents + then emptyCell' + else contents + let mkrow cells = mknode "w:tr" [] $ - [ mknode "w:trPr" [] - [ mknode "w:cnfStyle" [("w:firstRow","1")] ()] - | border] - ++ map (mkcell border) cells + map mkcell cells let textwidth = 7920 -- 5.5 in in twips, 1/20 pt let fullrow = 5000 -- 100% specified in pct let rowwidth = fullrow * sum widths @@ -1047,6 +1039,15 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do [("w:w", show (floor (textwidth * w) :: Integer))] () let hasHeader = not $ all null headers modify $ \s -> s { stInTable = False } + -- for compatibility with Word <= 2007, we include a val with a bitmask + -- 0×0020 Apply first row conditional formatting + -- 0×0040 Apply last row conditional formatting + -- 0×0080 Apply first column conditional formatting + -- 0×0100 Apply last column conditional formatting + -- 0×0200 Do not apply row banding conditional formatting + -- 0×0400 Do not apply column banding conditional formattin + let tblLookVal :: Int + tblLookVal = if hasHeader then 0x20 else 0 return $ caption' ++ [Elem $ @@ -1059,15 +1060,17 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do ,("w:firstColumn","0") ,("w:lastColumn","0") ,("w:noHBand","0") - ,("w:noVBand","0")] () : + ,("w:noVBand","0") + ,("w:val", printf "%04x" tblLookVal) + ] () : [ mknode "w:tblCaption" [("w:val", T.unpack captionStr)] () | not (null caption) ] ) : mknode "w:tblGrid" [] (if all (==0) widths then [] else map mkgridcol widths) - : [ mkrow True headers' | hasHeader ] ++ - map (mkrow False) rows' + : [ mkrow headers' | hasHeader ] ++ + map mkrow rows' )] blockToOpenXML' opts el | BulletList lst <- el = addOpenXMLList BulletMarker lst diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx index bbc8d8de9..3e1bf16e7 100644 Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx index e20efcab4..66f055063 100644 Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx index f1185da98..fb3a02a0a 100644 Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx index 83243ab8c..bc6c2702a 100644 Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx index 17804bb81..8c555a5bd 100644 Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx index 21629e208..c21b3a5b3 100644 Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx index 5cf8db0b0..92ce144e9 100644 Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx index 14bfab6d4..d21b67309 100644 Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx index 416743aa1..3558a47bf 100644 Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx index ef2940f89..606df92a3 100644 Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx index 479ea65ec..759269cac 100644 Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx index e12e3b38d..c37777080 100644 Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx index 8bd57bb8c..9450b1a73 100644 Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx index 2c6a638fc..6f0b830e6 100644 Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx index 11e52c4b1..e53889cfb 100644 Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx index 7667990c4..5dbe298b7 100644 Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx index 3e8c6d2b2..194181288 100644 Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx index 05a7cf060..6e0b634f7 100644 Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx index f5ae4a384..477178e77 100644 Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx index d02c77271..51110356e 100644 Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx index f7fdcbe11..b6206cdf5 100644 Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx index ae7f8f1f0..07b576080 100644 Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx index 5e433b736..d46095eb7 100644 Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx index d404878c6..7caba4e93 100644 Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx index 79c395262..6aaa6da61 100644 Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx index df9680773..5746c5ad0 100644 Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx index bb73b82f6..5f22dccc6 100644 Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx index 7df484aaa..ab5c4f56d 100644 Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx index d717b93ab..085f33162 100644 Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx index 791182db2..1ac86d5c8 100644 Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx index b64a7b58e..c2c443b19 100644 Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx index b5116d1a8..5ea18d32e 100644 Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ -- cgit v1.2.3 From 387d3e76ee81138588195ebe8dad7720f2623b02 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 12 Jan 2021 10:20:32 -0800 Subject: Markdown writer: cleaned up raw formats. We now react appropriately to gfm, commonmark, and commonmark_x as raw formats. --- src/Text/Pandoc/Writers/Markdown.hs | 69 +++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 34 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index d62727d90..de9075ac4 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -494,25 +494,24 @@ blockToMarkdown' opts b@(RawBlock f str) = do let renderEmpty = mempty <$ report (BlockNotRendered b) case variant of PlainText -> renderEmpty - _ | f `elem` ["markdown", "markdown_github", "markdown_phpextra", - "markdown_mmd", "markdown_strict"] -> - return $ literal str <> literal "\n" - | isEnabled Ext_raw_attribute opts -> rawAttribBlock - | f `elem` ["html", "html5", "html4"] -> - case () of - _ | isEnabled Ext_markdown_attribute opts -> return $ - literal (addMarkdownAttribute str) <> literal "\n" - | isEnabled Ext_raw_html opts -> return $ - literal str <> literal "\n" - | isEnabled Ext_raw_attribute opts -> rawAttribBlock - | otherwise -> renderEmpty - | f `elem` ["latex", "tex"] -> - case () of - _ | isEnabled Ext_raw_tex opts -> return $ - literal str <> literal "\n" - | isEnabled Ext_raw_attribute opts -> rawAttribBlock - | otherwise -> renderEmpty - | otherwise -> renderEmpty + Commonmark + | f `elem` ["gfm", "commonmark", "commonmark_x", "markdown"] + -> return $ literal str <> literal "\n" + Markdown + | f `elem` ["markdown", "markdown_github", "markdown_phpextra", + "markdown_mmd", "markdown_strict"] + -> return $ literal str <> literal "\n" + _ | isEnabled Ext_raw_attribute opts -> rawAttribBlock + | f `elem` ["html", "html5", "html4"] + , isEnabled Ext_markdown_attribute opts + -> return $ literal (addMarkdownAttribute str) <> literal "\n" + | f `elem` ["html", "html5", "html4"] + , isEnabled Ext_raw_html opts + -> return $ literal str <> literal "\n" + | f `elem` ["latex", "tex"] + , isEnabled Ext_raw_tex opts + -> return $ literal str <> literal "\n" + _ -> renderEmpty blockToMarkdown' opts HorizontalRule = return $ blankline <> literal (T.replicate (writerColumns opts) "-") <> blankline blockToMarkdown' opts (Header level attr inlines) = do @@ -1250,21 +1249,23 @@ inlineToMarkdown opts il@(RawInline f str) = do let renderEmpty = mempty <$ report (InlineNotRendered il) case variant of PlainText -> renderEmpty - _ | f `elem` ["markdown", "markdown_github", "markdown_phpextra", - "markdown_mmd", "markdown_strict"] -> - return $ literal str - | isEnabled Ext_raw_attribute opts -> rawAttribInline - | f `elem` ["html", "html5", "html4"] -> - case () of - _ | isEnabled Ext_raw_html opts -> return $ literal str - | isEnabled Ext_raw_attribute opts -> rawAttribInline - | otherwise -> renderEmpty - | f `elem` ["latex", "tex"] -> - case () of - _ | isEnabled Ext_raw_tex opts -> return $ literal str - | isEnabled Ext_raw_attribute opts -> rawAttribInline - | otherwise -> renderEmpty - | otherwise -> renderEmpty + Commonmark + | f `elem` ["gfm", "commonmark", "commonmark_x", "markdown"] + -> return $ literal str + Markdown + | f `elem` ["markdown", "markdown_github", "markdown_phpextra", + "markdown_mmd", "markdown_strict"] + -> return $ literal str + _ | isEnabled Ext_raw_attribute opts -> rawAttribInline + | f `elem` ["html", "html5", "html4"] + , isEnabled Ext_raw_html opts + -> return $ literal str + | f `elem` ["latex", "tex"] + , isEnabled Ext_raw_tex opts + -> return $ literal str + _ -> renderEmpty + + inlineToMarkdown opts LineBreak = do variant <- asks envVariant if variant == PlainText || isEnabled Ext_hard_line_breaks opts -- cgit v1.2.3 From 83336a45a78b288dcc0104d2432135bdb3f5e8f1 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 16 Jan 2021 11:15:35 -0800 Subject: Recognize more extensions as markdown by default. `mkdn`, `mkd`, `mdwn`, `mdown`, `Rmd`. Closes #7034. --- src/Text/Pandoc/App/FormatHeuristics.hs | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src') diff --git a/src/Text/Pandoc/App/FormatHeuristics.hs b/src/Text/Pandoc/App/FormatHeuristics.hs index 17ed30fe9..65a1a7b82 100644 --- a/src/Text/Pandoc/App/FormatHeuristics.hs +++ b/src/Text/Pandoc/App/FormatHeuristics.hs @@ -48,6 +48,11 @@ formatFromFilePath x = ".lhs" -> Just "markdown+lhs" ".ltx" -> Just "latex" ".markdown" -> Just "markdown" + ".mkdn" -> Just "markdown" + ".mkd" -> Just "markdown" + ".mdwn" -> Just "markdown" + ".mdown" -> Just "markdown" + ".Rmd" -> Just "markdown" ".md" -> Just "markdown" ".ms" -> Just "ms" ".muse" -> Just "muse" -- cgit v1.2.3 From 6efd3460a776620fdb93812daa4f6831e6c332ce Mon Sep 17 00:00:00 2001 From: Gautier DI FOLCO <gautier.difolco@gmail.com> Date: Sun, 17 Jan 2021 01:15:33 +0100 Subject: Markdown reader: support GitHub wiki's internal links (#2923) (#6458) Canges overview: * Add a `Ext_markdown_github_wikilink` constructor to `Extension` [API change]. * Add the parser `githubWikiLink` in `Text.Pandoc.Readers.Markdown` * Add tests. --- src/Text/Pandoc/Extensions.hs | 3 +++ src/Text/Pandoc/Readers/Markdown.hs | 25 +++++++++++++++++++++++++ test/Tests/Readers/Markdown.hs | 30 ++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+) (limited to 'src') diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 39c2a0489..69257ecc8 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -118,6 +118,7 @@ data Extension = | Ext_literate_haskell -- ^ Enable literate Haskell conventions | Ext_markdown_attribute -- ^ Interpret text inside HTML as markdown iff -- container has attribute 'markdown' + | Ext_wikilinks -- ^ Interpret a markdown wiki link | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks | Ext_mmd_header_identifiers -- ^ Multimarkdown style header identifiers [myid] | Ext_mmd_link_attributes -- ^ MMD style reference link attributes @@ -258,6 +259,7 @@ githubMarkdownExtensions = extensionsFromList , Ext_emoji , Ext_fenced_code_blocks , Ext_backtick_code_blocks + , Ext_wikilinks ] -- | Extensions to be used with multimarkdown. @@ -444,6 +446,7 @@ getAllExtensions f = universalExtensions <> getAll f , Ext_tex_math_single_backslash , Ext_tex_math_double_backslash , Ext_markdown_attribute + , Ext_wikilinks , Ext_mmd_title_block , Ext_abbreviations , Ext_autolink_bare_uris diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 5c3a21bb7..8fd0b68e2 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -21,6 +21,7 @@ module Text.Pandoc.Readers.Markdown ( import Control.Monad import Control.Monad.Except (throwError) import Data.Char (isAlphaNum, isPunctuation, isSpace) +import Data.Functor (($>)) import Data.List (transpose, elemIndex, sortOn) import qualified Data.Map as M import Data.Maybe @@ -1485,6 +1486,7 @@ inline = choice [ whitespace , note , cite , bracketedSpan + , githubWikiLink , link , image , math @@ -1780,6 +1782,29 @@ source = do linkTitle :: PandocMonad m => MarkdownParser m Text linkTitle = quotedTitle '"' <|> quotedTitle '\'' +-- Github wiki style link, with optional title +-- syntax documented under https://help.github.com/en/github/building-a-strong-community/editing-wiki-content +githubWikiLink :: PandocMonad m => MarkdownParser m (F Inlines) +githubWikiLink = try $ guardEnabled Ext_wikilinks >> wikilink + where + wikilink = try $ do + string "[[" + firstPart <- fmap mconcat . sequence <$> wikiText + (char '|' *> complexWikilink firstPart) + <|> (string "]]" $> (B.link + <$> (stringify <$> firstPart) + <*> return "wikilink" + <*> firstPart)) + + complexWikilink firstPart = do + url <- fmap stringify . sequence <$> wikiText + string "]]" + return $ B.link <$> url + <*> return "wikilink" + <*> firstPart + + wikiText = many (whitespace <|> bareURL <|> str <|> endline <|> escapedChar) + link :: PandocMonad m => MarkdownParser m (F Inlines) link = try $ do st <- getState diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs index 18f909583..a2abcb143 100644 --- a/test/Tests/Readers/Markdown.hs +++ b/test/Tests/Readers/Markdown.hs @@ -307,6 +307,36 @@ tests = [ testGroup "inline code" "[https://example.org(](url)" =?> para (link "url" "" (text "https://example.org(")) ] + , testGroup "Github wiki links" + [ test markdownGH "autolink" $ + "[[https://example.org]]" =?> + para (link "https://example.org" "wikilink" (text "https://example.org")) + , test markdownGH "link with title" $ + "[[title|https://example.org]]" =?> + para (link "https://example.org" "wikilink" (text "title")) + , test markdownGH "bad link with title" $ + "[[title|random string]]" =?> + para (link "random-string" "wikilink" (text "title")) + , test markdownGH "autolink not being a link" $ + "[[Name of page]]" =?> + para (link "Name-of-page" "wikilink" (text "Name of page")) + , test markdownGH "autolink not being a link with a square bracket" $ + "[[Name of ]page]]" =?> + para (link "Name-of-]page" "wikilink" (text "Name of ]page")) + , test markdownGH "formatting (strong and emphasis) should not be interpreted" $ + "[[***a**b **c**d*|https://example.org]]" =?> + para (text "[[" <> emph (strong (str "a") <> str "b" <> space + <> strong (str "c") <> str "d") <> text "|https://example.org]]") + , test markdownGH "inlined code should not make a link" $ + "[[ti`|`le|https://example.org]]" =?> + para (text "[[ti" <> code "|" <> text "le|https://example.org]]") + , test markdownGH "link with title and a cut should take the middle part as link" $ + "[[tit|le|https://example.org]]" =?> + para (link "le" "wikilink" (text "tit")) + , test markdownGH "link with inline start should be a link" $ + "[[t`i*t_le|https://example.org]]" =?> + para (link "https://example.org" "wikilink" (text "t`i*t_le")) + ] , testGroup "Headers" [ "blank line before header" =: "\n# Header\n" -- cgit v1.2.3 From c841bcf3b01548b2f9b462b39d8edda4c10be534 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 16 Jan 2021 16:22:04 -0800 Subject: Revert "Markdown reader: support GitHub wiki's internal links (#2923) (#6458)" This reverts commit 6efd3460a776620fdb93812daa4f6831e6c332ce. Since this extension is designed to be used with GitHub markdown (gfm), we need to implement the parser as a commonmark extension (commonmark-extensions), rather than in pandoc's markdown reader. When that is done, we can add it here. --- src/Text/Pandoc/Extensions.hs | 3 --- src/Text/Pandoc/Readers/Markdown.hs | 25 ------------------------- test/Tests/Readers/Markdown.hs | 30 ------------------------------ 3 files changed, 58 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 69257ecc8..39c2a0489 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -118,7 +118,6 @@ data Extension = | Ext_literate_haskell -- ^ Enable literate Haskell conventions | Ext_markdown_attribute -- ^ Interpret text inside HTML as markdown iff -- container has attribute 'markdown' - | Ext_wikilinks -- ^ Interpret a markdown wiki link | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks | Ext_mmd_header_identifiers -- ^ Multimarkdown style header identifiers [myid] | Ext_mmd_link_attributes -- ^ MMD style reference link attributes @@ -259,7 +258,6 @@ githubMarkdownExtensions = extensionsFromList , Ext_emoji , Ext_fenced_code_blocks , Ext_backtick_code_blocks - , Ext_wikilinks ] -- | Extensions to be used with multimarkdown. @@ -446,7 +444,6 @@ getAllExtensions f = universalExtensions <> getAll f , Ext_tex_math_single_backslash , Ext_tex_math_double_backslash , Ext_markdown_attribute - , Ext_wikilinks , Ext_mmd_title_block , Ext_abbreviations , Ext_autolink_bare_uris diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 8fd0b68e2..5c3a21bb7 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -21,7 +21,6 @@ module Text.Pandoc.Readers.Markdown ( import Control.Monad import Control.Monad.Except (throwError) import Data.Char (isAlphaNum, isPunctuation, isSpace) -import Data.Functor (($>)) import Data.List (transpose, elemIndex, sortOn) import qualified Data.Map as M import Data.Maybe @@ -1486,7 +1485,6 @@ inline = choice [ whitespace , note , cite , bracketedSpan - , githubWikiLink , link , image , math @@ -1782,29 +1780,6 @@ source = do linkTitle :: PandocMonad m => MarkdownParser m Text linkTitle = quotedTitle '"' <|> quotedTitle '\'' --- Github wiki style link, with optional title --- syntax documented under https://help.github.com/en/github/building-a-strong-community/editing-wiki-content -githubWikiLink :: PandocMonad m => MarkdownParser m (F Inlines) -githubWikiLink = try $ guardEnabled Ext_wikilinks >> wikilink - where - wikilink = try $ do - string "[[" - firstPart <- fmap mconcat . sequence <$> wikiText - (char '|' *> complexWikilink firstPart) - <|> (string "]]" $> (B.link - <$> (stringify <$> firstPart) - <*> return "wikilink" - <*> firstPart)) - - complexWikilink firstPart = do - url <- fmap stringify . sequence <$> wikiText - string "]]" - return $ B.link <$> url - <*> return "wikilink" - <*> firstPart - - wikiText = many (whitespace <|> bareURL <|> str <|> endline <|> escapedChar) - link :: PandocMonad m => MarkdownParser m (F Inlines) link = try $ do st <- getState diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs index a2abcb143..18f909583 100644 --- a/test/Tests/Readers/Markdown.hs +++ b/test/Tests/Readers/Markdown.hs @@ -307,36 +307,6 @@ tests = [ testGroup "inline code" "[https://example.org(](url)" =?> para (link "url" "" (text "https://example.org(")) ] - , testGroup "Github wiki links" - [ test markdownGH "autolink" $ - "[[https://example.org]]" =?> - para (link "https://example.org" "wikilink" (text "https://example.org")) - , test markdownGH "link with title" $ - "[[title|https://example.org]]" =?> - para (link "https://example.org" "wikilink" (text "title")) - , test markdownGH "bad link with title" $ - "[[title|random string]]" =?> - para (link "random-string" "wikilink" (text "title")) - , test markdownGH "autolink not being a link" $ - "[[Name of page]]" =?> - para (link "Name-of-page" "wikilink" (text "Name of page")) - , test markdownGH "autolink not being a link with a square bracket" $ - "[[Name of ]page]]" =?> - para (link "Name-of-]page" "wikilink" (text "Name of ]page")) - , test markdownGH "formatting (strong and emphasis) should not be interpreted" $ - "[[***a**b **c**d*|https://example.org]]" =?> - para (text "[[" <> emph (strong (str "a") <> str "b" <> space - <> strong (str "c") <> str "d") <> text "|https://example.org]]") - , test markdownGH "inlined code should not make a link" $ - "[[ti`|`le|https://example.org]]" =?> - para (text "[[ti" <> code "|" <> text "le|https://example.org]]") - , test markdownGH "link with title and a cut should take the middle part as link" $ - "[[tit|le|https://example.org]]" =?> - para (link "le" "wikilink" (text "tit")) - , test markdownGH "link with inline start should be a link" $ - "[[t`i*t_le|https://example.org]]" =?> - para (link "https://example.org" "wikilink" (text "t`i*t_le")) - ] , testGroup "Headers" [ "blank line before header" =: "\n# Header\n" -- cgit v1.2.3 From 1c4d14cdcc60c1bfcc579a5b5ab154e8e62e54c7 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 18 Jan 2021 11:32:02 -0800 Subject: RST writer: fix #7039. We were losing content from inside spans with a class, due to logic that is meant to avoid nested inline structures that can't be represented in RST. The logic was a bit stricter than necessary. This commit fixes the issue. --- src/Text/Pandoc/Writers/RST.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index a3be1d723..d01e13db4 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -519,8 +519,8 @@ flatten outer (Quoted _ _, _) -> keep f i (_, Quoted _ _) -> keep f i -- spans are not rendered using RST inlines, so we can keep them - (Span ("",[],[]) _, _) -> keep f i - (_, Span ("",[],[]) _) -> keep f i + (Span (_,_,[]) _, _) -> keep f i + (_, Span (_,_,[]) _) -> keep f i -- inlineToRST handles this case properly so it's safe to keep ( Link{}, Image{}) -> keep f i -- parent inlines would prevent links from being correctly -- cgit v1.2.3 From 5f98ac62e3f91a3301c13f17d9081b6ecb39e5a9 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 19 Jan 2021 20:39:58 -0800 Subject: JATS writer: Ensure that disp-quote is always wrapped in p. Closes #7041. --- src/Text/Pandoc/Writers/JATS.hs | 4 +- test/writer.jats_archiving | 68 +++++++++++++++++------------- test/writer.jats_articleauthoring | 88 +++++++++++++++++++++------------------ test/writer.jats_publishing | 68 +++++++++++++++++------------- 4 files changed, 131 insertions(+), 97 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index b2266d179..c75d40745 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -156,9 +156,11 @@ wrappedBlocksToJATS needsWrap opts = wrappedBlockToJATS b = do inner <- blockToJATS opts b return $ - if needsWrap b + if needsWrap b || isBlockQuote b -- see #7041 then inTags True "p" [("specific-use","wrapper")] inner else inner + isBlockQuote (BlockQuote _) = True + isBlockQuote _ = False -- | Auxiliary function to convert Plain block to Para. plainToPara :: Block -> Block diff --git a/test/writer.jats_archiving b/test/writer.jats_archiving index a708e0367..2d06c5cab 100644 --- a/test/writer.jats_archiving +++ b/test/writer.jats_archiving @@ -78,31 +78,39 @@ Gruber’s markdown test suite.</p> <sec id="block-quotes"> <title>Block Quotes</title> <p>E-mail style:</p> - <disp-quote> - <p>This is a block quote. It is pretty short.</p> - </disp-quote> - <disp-quote> - <p>Code in a block quote:</p> - <preformat>sub status { - print "working"; -}</preformat> - <p>A list:</p> - <list list-type="order"> - <list-item> - <p>item one</p> - </list-item> - <list-item> - <p>item two</p> - </list-item> - </list> - <p>Nested block quotes:</p> + <p specific-use="wrapper"> <disp-quote> - <p>nested</p> + <p>This is a block quote. It is pretty short.</p> </disp-quote> + </p> + <p specific-use="wrapper"> <disp-quote> - <p>nested</p> + <p>Code in a block quote:</p> + <preformat>sub status { + print "working"; +}</preformat> + <p>A list:</p> + <list list-type="order"> + <list-item> + <p>item one</p> + </list-item> + <list-item> + <p>item two</p> + </list-item> + </list> + <p>Nested block quotes:</p> + <p specific-use="wrapper"> + <disp-quote> + <p>nested</p> + </disp-quote> + </p> + <p specific-use="wrapper"> + <disp-quote> + <p>nested</p> + </disp-quote> + </p> </disp-quote> - </disp-quote> + </p> <p>This should not be a block quote: 2 > 1.</p> <p>And a following paragraph.</p> </sec> @@ -829,10 +837,12 @@ These should not be escaped: \$ \\ \> \[ \{</preformat> </list-item> </list> <p>An e-mail address: <email>nobody@nowhere.net</email></p> - <disp-quote> - <p>Blockquoted: - <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p> - </disp-quote> + <p specific-use="wrapper"> + <disp-quote> + <p>Blockquoted: + <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p> + </disp-quote> + </p> <p>Auto-links should not occur here: <monospace><http://example.com/></monospace></p> <preformat>or here: <http://example.com/></preformat> @@ -856,9 +866,11 @@ These should not be escaped: \$ \\ \> \[ \{</preformat> <italic>not</italic> be a footnote reference, because it contains a space.[^my note] Here is an inline note.<xref ref-type="fn" rid="fn3">3</xref></p> - <disp-quote> - <p>Notes can go in quotes.<xref ref-type="fn" rid="fn4">4</xref></p> - </disp-quote> + <p specific-use="wrapper"> + <disp-quote> + <p>Notes can go in quotes.<xref ref-type="fn" rid="fn4">4</xref></p> + </disp-quote> + </p> <list list-type="order"> <list-item> <p>And in list items.<xref ref-type="fn" rid="fn5">5</xref></p> diff --git a/test/writer.jats_articleauthoring b/test/writer.jats_articleauthoring index 90437992e..65ad5e37c 100644 --- a/test/writer.jats_articleauthoring +++ b/test/writer.jats_articleauthoring @@ -67,39 +67,43 @@ Gruber’s markdown test suite.</p> <sec id="block-quotes"> <title>Block Quotes</title> <p>E-mail style:</p> - <disp-quote> - <p>This is a block quote. It is pretty short.</p> - </disp-quote> - <disp-quote> - <p>Code in a block quote:</p> - <p specific-use="wrapper"> - <preformat>sub status { + <p specific-use="wrapper"> + <disp-quote> + <p>This is a block quote. It is pretty short.</p> + </disp-quote> + </p> + <p specific-use="wrapper"> + <disp-quote> + <p>Code in a block quote:</p> + <p specific-use="wrapper"> + <preformat>sub status { print "working"; }</preformat> - </p> - <p>A list:</p> - <p specific-use="wrapper"> - <list list-type="order"> - <list-item> - <p>item one</p> - </list-item> - <list-item> - <p>item two</p> - </list-item> - </list> - </p> - <p>Nested block quotes:</p> - <p specific-use="wrapper"> - <disp-quote> - <p>nested</p> - </disp-quote> - </p> - <p specific-use="wrapper"> - <disp-quote> - <p>nested</p> - </disp-quote> - </p> - </disp-quote> + </p> + <p>A list:</p> + <p specific-use="wrapper"> + <list list-type="order"> + <list-item> + <p>item one</p> + </list-item> + <list-item> + <p>item two</p> + </list-item> + </list> + </p> + <p>Nested block quotes:</p> + <p specific-use="wrapper"> + <disp-quote> + <p>nested</p> + </disp-quote> + </p> + <p specific-use="wrapper"> + <disp-quote> + <p>nested</p> + </disp-quote> + </p> + </disp-quote> + </p> <p>This should not be a block quote: 2 > 1.</p> <p>And a following paragraph.</p> </sec> @@ -813,10 +817,12 @@ These should not be escaped: \$ \\ \> \[ \{</preformat> </list-item> </list> <p>An e-mail address: <email>nobody@nowhere.net</email></p> - <disp-quote> - <p>Blockquoted: - <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p> - </disp-quote> + <p specific-use="wrapper"> + <disp-quote> + <p>Blockquoted: + <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p> + </disp-quote> + </p> <p>Auto-links should not occur here: <monospace><http://example.com/></monospace></p> <preformat>or here: <http://example.com/></preformat> @@ -854,11 +860,13 @@ These should not be escaped: \$ \\ \> \[ \{</preformat> and <monospace>]</monospace> verbatim characters, as well as [bracketed text].</p> </fn></p> - <disp-quote> - <p>Notes can go in quotes.<fn> - <p>In quote.</p> - </fn></p> - </disp-quote> + <p specific-use="wrapper"> + <disp-quote> + <p>Notes can go in quotes.<fn> + <p>In quote.</p> + </fn></p> + </disp-quote> + </p> <list list-type="order"> <list-item> <p>And in list items.<fn> diff --git a/test/writer.jats_publishing b/test/writer.jats_publishing index 6d5a04057..915471576 100644 --- a/test/writer.jats_publishing +++ b/test/writer.jats_publishing @@ -78,31 +78,39 @@ Gruber’s markdown test suite.</p> <sec id="block-quotes"> <title>Block Quotes</title> <p>E-mail style:</p> - <disp-quote> - <p>This is a block quote. It is pretty short.</p> - </disp-quote> - <disp-quote> - <p>Code in a block quote:</p> - <preformat>sub status { - print "working"; -}</preformat> - <p>A list:</p> - <list list-type="order"> - <list-item> - <p>item one</p> - </list-item> - <list-item> - <p>item two</p> - </list-item> - </list> - <p>Nested block quotes:</p> + <p specific-use="wrapper"> <disp-quote> - <p>nested</p> + <p>This is a block quote. It is pretty short.</p> </disp-quote> + </p> + <p specific-use="wrapper"> <disp-quote> - <p>nested</p> + <p>Code in a block quote:</p> + <preformat>sub status { + print "working"; +}</preformat> + <p>A list:</p> + <list list-type="order"> + <list-item> + <p>item one</p> + </list-item> + <list-item> + <p>item two</p> + </list-item> + </list> + <p>Nested block quotes:</p> + <p specific-use="wrapper"> + <disp-quote> + <p>nested</p> + </disp-quote> + </p> + <p specific-use="wrapper"> + <disp-quote> + <p>nested</p> + </disp-quote> + </p> </disp-quote> - </disp-quote> + </p> <p>This should not be a block quote: 2 > 1.</p> <p>And a following paragraph.</p> </sec> @@ -829,10 +837,12 @@ These should not be escaped: \$ \\ \> \[ \{</preformat> </list-item> </list> <p>An e-mail address: <email>nobody@nowhere.net</email></p> - <disp-quote> - <p>Blockquoted: - <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p> - </disp-quote> + <p specific-use="wrapper"> + <disp-quote> + <p>Blockquoted: + <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p> + </disp-quote> + </p> <p>Auto-links should not occur here: <monospace><http://example.com/></monospace></p> <preformat>or here: <http://example.com/></preformat> @@ -856,9 +866,11 @@ These should not be escaped: \$ \\ \> \[ \{</preformat> <italic>not</italic> be a footnote reference, because it contains a space.[^my note] Here is an inline note.<xref ref-type="fn" rid="fn3">3</xref></p> - <disp-quote> - <p>Notes can go in quotes.<xref ref-type="fn" rid="fn4">4</xref></p> - </disp-quote> + <p specific-use="wrapper"> + <disp-quote> + <p>Notes can go in quotes.<xref ref-type="fn" rid="fn4">4</xref></p> + </disp-quote> + </p> <list list-type="order"> <list-item> <p>And in list items.<xref ref-type="fn" rid="fn5">5</xref></p> -- cgit v1.2.3 From 87083bd1d6d715e349313dc6af9cbac82f1bb4ea Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Wed, 20 Jan 2021 19:08:02 +0100 Subject: Text.Pandoc.Citeproc: use finer grained imports This allows to import the module in writers without causing a circular dependency. --- src/Text/Pandoc/Citeproc.hs | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index 9649c6971..6658c8c0c 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -18,33 +18,36 @@ import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences) import Text.Pandoc.Citeproc.BibTeX (readBibtexString, Variant(..)) import Text.Pandoc.Citeproc.MetaValue (metaValueToReference, metaValueToText) import Text.Pandoc.Readers.Markdown (yamlToRefs) -import Text.Pandoc.Class (setResourcePath, getResourcePath, getUserDataDir) import qualified Text.Pandoc.BCP47 as BCP47 -import Data.ByteString (ByteString) -import qualified Data.ByteString.Lazy as L +import Text.Pandoc.Builder (Inlines, Many(..), deleteMeta, setMeta) +import qualified Text.Pandoc.Builder as B import Text.Pandoc.Definition as Pandoc -import Text.Pandoc.Walk -import Text.Pandoc.Builder as B -import Text.Pandoc (PandocMonad(..), PandocError(..), - readDataFile, ReaderOptions(..), pandocExtensions, - report, LogMessage(..), fetchItem) +import Text.Pandoc.Class (PandocMonad(..), getResourcePath, getUserDataDir, + fetchItem, readDataFile, report, setResourcePath) +import Text.Pandoc.Error (PandocError(..)) +import Text.Pandoc.Extensions (pandocExtensions) +import Text.Pandoc.Logging (LogMessage(..)) +import Text.Pandoc.Options (ReaderOptions(..)) import Text.Pandoc.Shared (stringify, ordNub, blocksToInlines, tshow) import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Pandoc.Walk (query, walk, walkM) +import Control.Applicative ((<|>)) +import Control.Monad.Except (catchError, throwError) +import Control.Monad.State (State, evalState, get, put, runState) import Data.Aeson (eitherDecode) -import Data.Default -import Data.Ord () +import Data.ByteString (ByteString) +import qualified Data.ByteString.Lazy as L +import Data.Char (isPunctuation, isUpper) +import Data.Default (Default(def)) +import qualified Data.Foldable as Foldable import qualified Data.Map as M +import Data.Maybe (mapMaybe, fromMaybe) +import Data.Ord () +import qualified Data.Sequence as Seq import qualified Data.Set as Set -import Data.Char (isPunctuation, isUpper) import Data.Text (Text) import qualified Data.Text as T -import Control.Monad.State -import qualified Data.Sequence as Seq -import qualified Data.Foldable as Foldable -import System.FilePath -import Control.Applicative -import Control.Monad.Except -import Data.Maybe (mapMaybe, fromMaybe) +import System.FilePath (takeExtension) import Safe (lastMay, initSafe) -- import Debug.Trace as Trace (trace, traceShowId) -- cgit v1.2.3 From fa952c8dbea9ad47ea684729f862a3c6bdd0fecc Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 20 Jan 2021 21:08:01 -0800 Subject: Add biblatex, bibtex as output formats (closes #7040). * `biblatex` and `bibtex` are now supported as output as well as input formats. * New module Text.Pandoc.Writers.BibTeX, exporting writeBibTeX and writeBibLaTeX. [API change] * New unexported function `writeBibtexString` in Text.Pandoc.Citeproc.BibTeX. --- MANUAL.txt | 8 +- pandoc.cabal | 1 + src/Text/Pandoc/Citeproc/BibTeX.hs | 254 ++++++++++++++++++++++++++++++++++++- src/Text/Pandoc/Templates.hs | 2 + src/Text/Pandoc/Writers.hs | 5 + src/Text/Pandoc/Writers/BibTeX.hs | 48 +++++++ 6 files changed, 312 insertions(+), 6 deletions(-) create mode 100644 src/Text/Pandoc/Writers/BibTeX.hs (limited to 'src') diff --git a/MANUAL.txt b/MANUAL.txt index b9c4ef637..436e9d36c 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -281,6 +281,8 @@ header when requesting a document from a URL: ::: {#output-formats} - `asciidoc` ([AsciiDoc]) or `asciidoctor` ([AsciiDoctor]) - `beamer` ([LaTeX beamer][`beamer`] slide show) + - `bibtex` ([BibTeX] bibliography) + - `biblatex` ([BibLaTeX] bibliography) - `commonmark` ([CommonMark] Markdown) - `commonmark_x` ([CommonMark] Markdown with extensions) - `context` ([ConTeXt]) @@ -5258,11 +5260,11 @@ section from a BibTeX, BibLaTeX, or CSL JSON bibliography: pandoc chem.bib -s -f biblatex -t markdown pandoc chem.json -s -f csljson -t markdown -`pandoc` can also be used to produce CSL JSON bibliography -from BibTeX, BibLaTeX, or markdown YAML: +Indeed, `pandoc` can convert between any of these +citation formats: pandoc chem.bib -s -f biblatex -t csljson - pandoc chem.yaml -s -f markdown -t csljson + pandoc chem.yaml -s -f markdown -t biblatex Running pandoc on a bibliography file with the `--citeproc` option will create a formatted bibliography in the format diff --git a/pandoc.cabal b/pandoc.cabal index 0d63cbe35..27c25069b 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -573,6 +573,7 @@ library Text.Pandoc.Writers.Shared, Text.Pandoc.Writers.OOXML, Text.Pandoc.Writers.AnnotatedTable, + Text.Pandoc.Writers.BibTeX, Text.Pandoc.Lua, Text.Pandoc.PDF, Text.Pandoc.UTF8, diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs index ed723a11c..10730a1e9 100644 --- a/src/Text/Pandoc/Citeproc/BibTeX.hs +++ b/src/Text/Pandoc/Citeproc/BibTeX.hs @@ -1,4 +1,5 @@ {-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE FlexibleContexts #-} @@ -17,6 +18,7 @@ module Text.Pandoc.Citeproc.BibTeX ( Variant(..) , readBibtexString + , writeBibtexString ) where @@ -24,10 +26,11 @@ import Text.Pandoc.Definition import Text.Pandoc.Builder as B import Text.Pandoc.Readers.LaTeX (readLaTeX) import Text.Pandoc.Extensions (Extension(..), extensionsFromList) -import Text.Pandoc.Options (ReaderOptions(..)) -import Text.Pandoc.Class (runPure) +import Text.Pandoc.Options (ReaderOptions(..), WriterOptions) import Text.Pandoc.Error (PandocError) import Text.Pandoc.Shared (stringify) +import Text.Pandoc.Writers.LaTeX (writeLaTeX) +import Text.Pandoc.Class (runPure) import qualified Text.Pandoc.Walk as Walk import Citeproc.Types import Citeproc.Pandoc () @@ -46,8 +49,9 @@ import qualified Data.Sequence as Seq import Data.Char (isAlphaNum, isDigit, isLetter, isUpper, toLower, toUpper, isLower, isPunctuation) -import Data.List (foldl', intercalate) +import Data.List (foldl', intercalate, intersperse) import Safe (readMay) +import Text.Printf (printf) data Variant = Bibtex | Biblatex deriving (Show, Eq, Ord) @@ -68,6 +72,250 @@ readBibtexString variant locale idpred contents = do Left err -> Left err Right xs -> return xs +-- | Write BibTeX or BibLaTeX given given a 'Reference'. +writeBibtexString :: WriterOptions -- ^ options (for writing LaTex) + -> Variant -- ^ bibtex or biblatex + -> Maybe Lang -- ^ Language + -> Reference Inlines -- ^ Reference to write + -> Text +writeBibtexString opts variant mblang ref = + "@" <> bibtexType <> "{" <> unItemId (referenceId ref) <> ",\n " <> + renderFields fs <> "\n}\n" + + where + bibtexType = + case referenceType ref of + "article-magazine" -> "article" + "article-newspaper" -> "article" + "article-journal" -> "article" + "book" -> "book" + "pamphlet" -> "booklet" + "dataset" | variant == Biblatex -> "dataset" + "webpage" | variant == Biblatex -> "online" + "chapter" -> case getVariable "editor" of + Just _ -> "incollection" + Nothing -> "inbook" + "entry-encyclopedia" | variant == Biblatex -> "inreference" + | otherwise -> "inbook" + "paper-conference" -> "inproceedings" + "thesis" -> case getVariableAsText "genre" of + Just "mathesis" -> "mastersthesis" + _ -> "phdthesis" + "patent" | variant == Biblatex -> "patent" + "report" | variant == Biblatex -> "report" + | otherwise -> "techreport" + "speech" -> "unpublished" + "manuscript" -> "unpublished" + "graphic" | variant == Biblatex -> "artwork" + "song" | variant == Biblatex -> "music" + "legal_case" | variant == Biblatex -> "jurisdictionN" + "legislation" | variant == Biblatex -> "legislation" + "treaty" | variant == Biblatex -> "legal" + "personal_communication" | variant == Biblatex -> "letter" + "motion_picture" | variant == Biblatex -> "movie" + "review" | variant == Biblatex -> "review" + _ -> "misc" + + mbSubtype = + case referenceType ref of + "article-magazine" -> Just "magazine" + "article-newspaper" -> Just "newspaper" + _ -> Nothing + + fs = + case variant of + Biblatex -> + [ "author" + , "editor" + , "translator" + , "publisher" + , "title" + , "booktitle" + , "journal" + , "series" + , "edition" + , "volume" + , "volumes" + , "number" + , "pages" + , "date" + , "eventdate" + , "urldate" + , "address" + , "url" + , "doi" + , "isbn" + , "issn" + , "type" + , "entrysubtype" + , "note" + , "language" + , "abstract" + , "keywords" + ] + Bibtex -> + [ "author" + , "editor" + , "translator" + , "publisher" + , "title" + , "booktitle" + , "journal" + , "series" + , "edition" + , "volume" + , "number" + , "pages" + , "year" + , "month" + , "address" + , "type" + , "note" + ] + + valToInlines (TextVal t) = B.text t + valToInlines (FancyVal ils) = ils + valToInlines (NumVal n) = B.text (T.pack $ show n) + valToInlines (NamesVal names) = + mconcat $ intersperse (B.space <> B.text "and" <> B.space) + $ map renderName names + valToInlines (DateVal date) = B.text $ + case dateLiteral date of + Just t -> t + Nothing -> T.intercalate "/" (map renderDatePart (dateParts date)) <> + (if dateCirca date then "~" else mempty) + + renderDatePart (DateParts xs) = T.intercalate "-" $ + map (T.pack . printf "%02d") xs + + renderName name = + case nameLiteral name of + Just t -> B.text t + Nothing -> spacedMaybes + [ nameNonDroppingParticle name + , nameFamily name + , if nameCommaSuffix name + then (", " <>) <$> nameSuffix name + else nameSuffix name ] + <> + spacedMaybes + [ (", " <>) <$> nameGiven name, + nameDroppingParticle name ] + + titlecase = case mblang of + Just (Lang "en" _) -> titlecase' + Nothing -> titlecase' + _ -> id + + titlecase' = addTextCase mblang TitleCase . + (\ils -> B.fromList + (case B.toList ils of + Str t : xs -> Str t : Walk.walk spanAroundCapitalizedWords xs + xs -> Walk.walk spanAroundCapitalizedWords xs)) + + -- protect capitalized words when we titlecase + spanAroundCapitalizedWords (Str t) + | not (T.all (\c -> isLower c || not (isLetter c)) t) = + Span ("",["nocase"],[]) [Str t] + spanAroundCapitalizedWords x = x + + spacedMaybes = mconcat . intersperse B.space . mapMaybe (fmap B.text) + + toLaTeX x = + case runPure (writeLaTeX opts $ doc (B.plain x)) of + Left _ -> Nothing + Right t -> Just t + + renderField name = (\contents -> name <> " = {" <> contents <> "}") + <$> getContentsFor name + + getVariable v = lookupVariable (toVariable v) ref + + getVariableAsText v = (stringify . valToInlines) <$> getVariable v + + getYear val = + case val of + DateVal date -> + case dateLiteral date of + Just t -> toLaTeX (B.text t) + Nothing -> + case dateParts date of + [DateParts (y1:_), DateParts (y2:_)] -> + Just (T.pack (printf "%04d" y1) <> "--" <> + T.pack (printf "%04d" y2)) + [DateParts (y1:_)] -> + Just (T.pack (printf "%04d" y1)) + _ -> Nothing + _ -> Nothing + + toMonth 1 = "jan" + toMonth 2 = "feb" + toMonth 3 = "mar" + toMonth 4 = "apr" + toMonth 5 = "may" + toMonth 6 = "jun" + toMonth 7 = "jul" + toMonth 8 = "aug" + toMonth 9 = "sep" + toMonth 10 = "oct" + toMonth 11 = "nov" + toMonth 12 = "dec" + toMonth x = T.pack $ show x + + getMonth val = + case val of + DateVal date -> + case dateParts date of + [DateParts (_:m1:_), DateParts (_:m2:_)] -> + Just (toMonth m1 <> "--" <> toMonth m2) + [DateParts (_:m1:_)] -> Just (toMonth m1) + _ -> Nothing + _ -> Nothing + + getContentsFor :: Text -> Maybe Text + getContentsFor "type" = + getVariableAsText "genre" >>= + \case + "mathesis" -> Just "mastersthesis" + "phdthesis" -> Just "phdthesis" + _ -> Nothing + getContentsFor "entrysubtype" = mbSubtype + getContentsFor "journal" + | bibtexType `elem` ["article", "periodical", "suppperiodical", "review"] + = getVariable "container-title" >>= toLaTeX . valToInlines + | otherwise = Nothing + getContentsFor "booktitle" + | bibtexType `elem` + ["inbook","incollection","inproceedings","inreference","bookinbook"] + = (getVariable "volume-title" <|> getVariable "container-title") + >>= toLaTeX . valToInlines + | otherwise = Nothing + getContentsFor "series" = getVariable "collection-title" + >>= toLaTeX . valToInlines + getContentsFor "address" = getVariable "publisher-place" + >>= toLaTeX . valToInlines + getContentsFor "date" = getVariable "issued" >>= toLaTeX . valToInlines + getContentsFor "eventdate" = getVariable "event-date" >>= toLaTeX . valToInlines + getContentsFor "urldate" = getVariable "accessed" >>= toLaTeX . valToInlines + getContentsFor "year" = getVariable "issued" >>= getYear + getContentsFor "month" = getVariable "issued" >>= getMonth + getContentsFor "number" = (getVariable "number" + <|> getVariable "collection-number" + <|> getVariable "issue") >>= toLaTeX . valToInlines + + getContentsFor x = getVariable x >>= + if isURL x + then Just . stringify . valToInlines + else toLaTeX . + (if x == "title" + then titlecase + else id) . + valToInlines + + isURL x = x `elem` ["url","doi","issn","isbn"] + + renderFields = T.intercalate ",\n " . mapMaybe renderField + defaultLang :: Lang defaultLang = Lang "en" (Just "US") diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index e83f26329..3e539bff7 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -81,6 +81,8 @@ getDefaultTemplate writer = do case format of "native" -> return "" "csljson" -> return "" + "bibtex" -> return "" + "biblatex" -> return "" "json" -> return "" "docx" -> return "" "fb2" -> return "" diff --git a/src/Text/Pandoc/Writers.hs b/src/Text/Pandoc/Writers.hs index 49531d924..95d6270b5 100644 --- a/src/Text/Pandoc/Writers.hs +++ b/src/Text/Pandoc/Writers.hs @@ -21,6 +21,8 @@ module Text.Pandoc.Writers , writeAsciiDoc , writeAsciiDoctor , writeBeamer + , writeBibTeX + , writeBibLaTeX , writeCommonMark , writeConTeXt , writeCustom @@ -85,6 +87,7 @@ import Text.Pandoc.Options import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Error import Text.Pandoc.Writers.AsciiDoc +import Text.Pandoc.Writers.BibTeX import Text.Pandoc.Writers.CommonMark import Text.Pandoc.Writers.ConTeXt import Text.Pandoc.Writers.CslJson @@ -185,6 +188,8 @@ writers = [ ,("tei" , TextWriter writeTEI) ,("muse" , TextWriter writeMuse) ,("csljson" , TextWriter writeCslJson) + ,("bibtex" , TextWriter writeBibTeX) + ,("biblatex" , TextWriter writeBibLaTeX) ] -- | Retrieve writer, extensions based on formatSpec (format+extensions). diff --git a/src/Text/Pandoc/Writers/BibTeX.hs b/src/Text/Pandoc/Writers/BibTeX.hs new file mode 100644 index 000000000..e1cb47ca1 --- /dev/null +++ b/src/Text/Pandoc/Writers/BibTeX.hs @@ -0,0 +1,48 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Writers.BibTeX + Copyright : Copyright (C) 2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Writes a BibTeX or BibLaTeX bibliographies based on the +'references' metadata in a Pandoc document. +-} +module Text.Pandoc.Writers.BibTeX + ( writeBibTeX + , writeBibLaTeX + ) +where + +import Text.Pandoc.Options +import Text.Pandoc.Definition +import Data.Text (Text) +import Data.Maybe (mapMaybe) +import Citeproc (parseLang) +import Text.Pandoc.Class (PandocMonad) +import Text.Pandoc.Citeproc.BibTeX as BibTeX +import Text.Pandoc.Citeproc.MetaValue (metaValueToReference) +import Text.Pandoc.Writers.Shared (lookupMetaString) + +-- | Write BibTeX based on the references metadata from a Pandoc document. +writeBibTeX :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeBibTeX = writeBibTeX' BibTeX.Bibtex + +-- | Write BibLaTeX based on the references metadata from a Pandoc document. +writeBibLaTeX :: PandocMonad m => WriterOptions -> Pandoc -> m Text +writeBibLaTeX = writeBibTeX' BibTeX.Biblatex + +writeBibTeX' :: PandocMonad m => Variant -> WriterOptions -> Pandoc -> m Text +writeBibTeX' variant opts (Pandoc meta _) = do + let mblang = case lookupMetaString "lang" meta of + "" -> Nothing + t -> Just $ parseLang t + let refs = case lookupMeta "references" meta of + Just (MetaList xs) -> mapMaybe metaValueToReference xs + _ -> [] + return $ mconcat $ + map (BibTeX.writeBibtexString opts variant mblang) refs + -- cgit v1.2.3 From b4b3560191b3699dd4db9d069244925a3c6074db Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Wed, 20 Jan 2021 19:09:36 +0100 Subject: JATS writer: allow to use element-citation --- MANUAL.txt | 7 ++ pandoc.cabal | 1 + src/Text/Pandoc/Extensions.hs | 14 +++ src/Text/Pandoc/Writers/JATS.hs | 21 ++-- src/Text/Pandoc/Writers/JATS/References.hs | 160 +++++++++++++++++++++++++++++ src/Text/Pandoc/Writers/JATS/Types.hs | 4 +- test/command/7042.md | 146 ++++++++++++++++++++++++++ 7 files changed, 346 insertions(+), 7 deletions(-) create mode 100644 src/Text/Pandoc/Writers/JATS/References.hs create mode 100644 test/command/7042.md (limited to 'src') diff --git a/MANUAL.txt b/MANUAL.txt index b9c4ef637..7bf74a8f9 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -3093,6 +3093,13 @@ output format. Some aspects of [Pandoc's Markdown citation syntax](#citations) are also accepted in `org` input. +#### Extension: `element_citations` #### + +In the `jats` output formats, this causes reference items to +be replaced with `<element-citation>` elements. These +elements are not influenced by CSL styles, but all information +on the item is included in tags. + #### Extension: `ntb` #### In the `context` output format this enables the use of [Natural Tables diff --git a/pandoc.cabal b/pandoc.cabal index 0d63cbe35..07feb10dd 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -638,6 +638,7 @@ library Text.Pandoc.Readers.Metadata, Text.Pandoc.Readers.Roff, Text.Pandoc.Writers.Docx.StyleMap, + Text.Pandoc.Writers.JATS.References, Text.Pandoc.Writers.JATS.Table, Text.Pandoc.Writers.JATS.Types, Text.Pandoc.Writers.LaTeX.Caption, diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 39c2a0489..7aa32c52c 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -88,6 +88,7 @@ data Extension = -- does not affect readers/writers directly; it causes -- the eastAsianLineBreakFilter to be applied after -- parsing, in Text.Pandoc.App.convertWithOpts. + | Ext_element_citations -- ^ Use element-citation elements for JATS citations | Ext_emoji -- ^ Support emoji like :smile: | Ext_empty_paragraphs -- ^ Allow empty paragraphs | Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML @@ -412,6 +413,11 @@ getDefaultExtensions "textile" = extensionsFromList Ext_smart, Ext_raw_html, Ext_auto_identifiers] +getDefaultExtensions "jats" = extensionsFromList + [Ext_auto_identifiers] +getDefaultExtensions "jats_archiving" = getDefaultExtensions "jats" +getDefaultExtensions "jats_publishing" = getDefaultExtensions "jats" +getDefaultExtensions "jats_articleauthoring" = getDefaultExtensions "jats" getDefaultExtensions "opml" = pandocExtensions -- affects notes getDefaultExtensions _ = extensionsFromList [Ext_auto_identifiers] @@ -554,6 +560,14 @@ getAllExtensions f = universalExtensions <> getAll f , Ext_smart , Ext_raw_tex ] + getAll "jats" = + extensionsFromList + [ Ext_auto_identifiers + , Ext_element_citations + ] + getAll "jats_archiving" = getAll "jats" + getAll "jats_publishing" = getAll "jats" + getAll "jats_articleauthoring" = getAll "jats" getAll "opml" = allMarkdownExtensions -- affects notes getAll "twiki" = autoIdExtensions <> extensionsFromList diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index c75d40745..a9369db7a 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -29,6 +29,7 @@ import Data.Maybe (fromMaybe) import Data.Time (toGregorian, Day, parseTimeM, defaultTimeLocale, formatTime) import qualified Data.Text as T import Data.Text (Text) +import Text.Pandoc.Citeproc (getReferences) import Text.Pandoc.Class.PandocMonad (PandocMonad, report) import Text.Pandoc.Definition import Text.Pandoc.Highlighting (languages, languagesByExtension) @@ -40,6 +41,7 @@ import Text.DocLayout import Text.Pandoc.Shared import Text.Pandoc.Templates (renderTemplate) import Text.DocTemplates (Context(..), Val(..)) +import Text.Pandoc.Writers.JATS.References (referencesToJATS) import Text.Pandoc.Writers.JATS.Table (tableToJATS) import Text.Pandoc.Writers.JATS.Types import Text.Pandoc.Writers.Math @@ -71,15 +73,19 @@ writeJATS = writeJatsArchiving -- | Convert a @'Pandoc'@ document to JATS. writeJats :: PandocMonad m => JATSTagSet -> WriterOptions -> Pandoc -> m Text -writeJats tagSet opts d = - runReaderT (evalStateT (docToJATS opts d) initialState) - environment - where initialState = JATSState { jatsNotes = [] } - environment = JATSEnv +writeJats tagSet opts d = do + refs <- if extensionEnabled Ext_element_citations $ writerExtensions opts + then getReferences Nothing d + else pure [] + let environment = JATSEnv { jatsTagSet = tagSet , jatsInlinesWriter = inlinesToJATS , jatsBlockWriter = blockToJATS + , jatsReferences = refs } + let initialState = JATSState { jatsNotes = [] } + runReaderT (evalStateT (docToJATS opts d) initialState) + environment -- | Convert Pandoc document to string in JATS format. docToJATS :: PandocMonad m => WriterOptions -> Pandoc -> JATS m Text @@ -258,7 +264,10 @@ blockToJATS opts (Div (ident,_,_) [Para lst]) | "ref-" `T.isPrefixOf` ident = inTagsSimple "mixed-citation" <$> inlinesToJATS opts lst blockToJATS opts (Div ("refs",_,_) xs) = do - contents <- blocksToJATS opts xs + refs <- asks jatsReferences + contents <- if null refs + then blocksToJATS opts xs + else referencesToJATS opts refs return $ inTagsIndented "ref-list" contents blockToJATS opts (Div (ident,[cls],kvs) bs) | cls `elem` ["fig", "caption", "table-wrap"] = do contents <- blocksToJATS opts bs diff --git a/src/Text/Pandoc/Writers/JATS/References.hs b/src/Text/Pandoc/Writers/JATS/References.hs new file mode 100644 index 000000000..4ee7eb9dd --- /dev/null +++ b/src/Text/Pandoc/Writers/JATS/References.hs @@ -0,0 +1,160 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Writers.JATS.References + Copyright : © 2021 Albert Krewinkel + License : GNU GPL, version 2 or above + + Maintainer : Albert Krewinkel <tarleb@zeitkraut.de> + Stability : alpha + Portability : portable + +Creation of a bibliography list using @<element-citation>@ elements in +reference items. +-} +module Text.Pandoc.Writers.JATS.References + ( referencesToJATS + , referenceToJATS + ) where + +import Citeproc.Pandoc () +import Citeproc.Types + ( Date (..), DateParts (..), ItemId (..), Name (..), Reference (..) + , Val (..) , lookupVariable, valToText + ) +import Data.Text (Text) +import Text.DocLayout (Doc, empty, isEmpty, literal, vcat) +import Text.Pandoc.Class.PandocMonad (PandocMonad) +import Text.Pandoc.Builder (Inlines) +import Text.Pandoc.Options (WriterOptions) +import Text.Pandoc.Shared (tshow) +import Text.Pandoc.Writers.JATS.Types +import Text.Pandoc.XML (inTags) +import qualified Data.Text as T + +referencesToJATS :: PandocMonad m + => WriterOptions + -> [Reference Inlines] + -> JATS m (Doc Text) +referencesToJATS opts = + fmap (inTags True "ref-list" [] . vcat) . mapM (referenceToJATS opts) + +referenceToJATS :: PandocMonad m + => WriterOptions + -> Reference Inlines + -> JATS m (Doc Text) +referenceToJATS _opts ref = do + let refType = referenceType ref + let pubType = [("publication-type", refType) | not (T.null refType)] + let wrap = inTags True "ref" [("id", "ref-" <> unItemId (referenceId ref))] + . inTags True "element-citation" pubType + return . wrap . vcat $ + [ authors + , "title" `varInTag` + if refType == "book" + then "source" + else "article-title" + , if refType == "book" + then empty + else "container-title" `varInTag` "source" + , editors + , "publisher" `varInTag` "publisher-name" + , "publisher-place" `varInTag` "publisher-loc" + , yearTag + , accessed + , "volume" `varInTag` "volume" + , "issue" `varInTag` "issue" + , "page-first" `varInTag` "fpage" + , "page-last" `varInTag` "lpage" + , "pages" `varInTag` "page-range" + , "ISBN" `varInTag` "isbn" + , "ISSN" `varInTag` "issn" + , varInTagWith "doi" "pub-id" [("pub-id-type", "doi")] + , varInTagWith "pmid" "pub-id" [("pub-id-type", "pmid")] + ] + where + varInTag var tagName = varInTagWith var tagName [] + + varInTagWith var tagName tagAttribs = + case lookupVariable var ref >>= valToText of + Nothing -> mempty + Just val -> inTags' tagName tagAttribs $ literal val + + authors = case lookupVariable "author" ref of + Just (NamesVal names) -> + inTags True "person-group" [("person-group-type", "author")] . vcat $ + map toNameElements names + _ -> empty + + editors = case lookupVariable "editor" ref of + Just (NamesVal names) -> + inTags True "person-group" [("person-group-type", "editor")] . vcat $ + map toNameElements names + _ -> empty + + yearTag = + case lookupVariable "issued" ref of + Just (DateVal date) -> toDateElements date + _ -> empty + + accessed = + case lookupVariable "accessed" ref of + Just (DateVal d) -> inTags' "date-in-citation" + [("content-type", "access-date")] + (toDateElements d) + _ -> empty + +toDateElements :: Date -> Doc Text +toDateElements date = + case dateParts date of + dp@(DateParts (y:m:d:_)):_ -> yearElement y dp <> + monthElement m <> + dayElement d + dp@(DateParts (y:m:_)):_ -> yearElement y dp <> monthElement m + dp@(DateParts (y:_)):_ -> yearElement y dp + _ -> empty + +yearElement :: Int -> DateParts -> Doc Text +yearElement year dp = + inTags' "year" [("iso-8601-date", iso8601 dp)] $ literal (fourDigits year) + +monthElement :: Int -> Doc Text +monthElement month = inTags' "month" [] . literal $ twoDigits month + +dayElement :: Int -> Doc Text +dayElement day = inTags' "day" [] . literal $ twoDigits day + +iso8601 :: DateParts -> Text +iso8601 = T.intercalate "-" . \case + DateParts (y:m:d:_) -> [fourDigits y, twoDigits m, twoDigits d] + DateParts (y:m:_) -> [fourDigits y, twoDigits m] + DateParts (y:_) -> [fourDigits y] + _ -> [] + +twoDigits :: Int -> Text +twoDigits n = T.takeEnd 2 $ '0' `T.cons` tshow n + +fourDigits :: Int -> Text +fourDigits n = T.takeEnd 4 $ "000" <> tshow n + +toNameElements :: Name -> Doc Text +toNameElements name = + if not (isEmpty nameTags) + then inTags' "name" [] nameTags + else nameLiteral name `inNameTag` "string-name" + where + inNameTag val tag = maybe empty (inTags' tag [] . literal) val + surnamePrefix = maybe mempty (`T.snoc` ' ') $ + nameNonDroppingParticle name + givenSuffix = maybe mempty (T.cons ' ') $ + nameDroppingParticle name + nameTags = mconcat + [ ((surnamePrefix <>) <$> nameFamily name) `inNameTag` "surname" + , ((<> givenSuffix) <$> nameGiven name) `inNameTag` "given-names" + , nameSuffix name `inNameTag` "suffix" + ] + +-- | Put the supplied contents between start and end tags of tagType, +-- with specified attributes. +inTags' :: Text -> [(Text, Text)] -> Doc Text -> Doc Text +inTags' = inTags False diff --git a/src/Text/Pandoc/Writers/JATS/Types.hs b/src/Text/Pandoc/Writers/JATS/Types.hs index 54ed4a8bd..6fdddc0b5 100644 --- a/src/Text/Pandoc/Writers/JATS/Types.hs +++ b/src/Text/Pandoc/Writers/JATS/Types.hs @@ -17,11 +17,12 @@ module Text.Pandoc.Writers.JATS.Types ) where +import Citeproc.Types (Reference) import Control.Monad.Reader (ReaderT) import Control.Monad.State (StateT) import Data.Text (Text) import Text.DocLayout (Doc) -import Text.Pandoc.Definition (Block, Inline) +import Text.Pandoc.Builder (Block, Inline, Inlines) import Text.Pandoc.Options (WriterOptions) -- | JATS tag set variant @@ -40,6 +41,7 @@ data JATSEnv m = JATSEnv { jatsTagSet :: JATSTagSet , jatsInlinesWriter :: WriterOptions -> [Inline] -> JATS m (Doc Text) , jatsBlockWriter :: WriterOptions -> Block -> JATS m (Doc Text) + , jatsReferences :: [Reference Inlines] } -- | JATS writer type diff --git a/test/command/7042.md b/test/command/7042.md new file mode 100644 index 000000000..de0294da3 --- /dev/null +++ b/test/command/7042.md @@ -0,0 +1,146 @@ +``` +% pandoc -f markdown -t jats_publishing+element_citations --citeproc -s +--- +nocite: "[@*]" +references: +- author: + - family: Jane + given: Doe + container-title: Public Library of Tests + id: year-month + issued: 1999-08 + title: Year and month + type: article-journal +- accessed: 1999-01-22 + author: + - family: Negidius + given: Numerius + container-title: Public Library of Tests + id: access-date + issued: 1911-10-03 + title: Entry with access date + type: article-journal +- author: + - family: Beethoven + given: Ludwig + dropping-particle: van + - family: Bray + given: Jan + non-dropping-particle: de + container-title: Public Library of Tests + id: name-particles + issued: 1820 + title: Name particles, dropping and non-dropping + type: article-journal +- author: + - 宮水 三葉 + - 立花 瀧 + title: Big Book of Tests + id: book-with-japanese-authors + issued: 2016 + type: book +- author: + - family: Watson + given: J. D. + - family: Crick + given: F. H. C. + container-title: Nature + doi: '10.1038/171737a0' + id: full-journal-article-entry + issue: 4356 + issued: '1953-04-01' + pages: 737-738 + pmid: 13054692 + title: 'Molecular Structure of Nucleic Acids: A Structure for Deoxyribose Nucleic Acid' + type: article-journal + volume: 171 +... +^D +<?xml version="1.0" encoding="utf-8" ?> +<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN" + "JATS-publishing1.dtd"> +<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="1.2" article-type="other"> +<front> +<journal-meta> +<journal-id></journal-id> +<journal-title-group> +</journal-title-group> +<issn></issn> +<publisher> +<publisher-name></publisher-name> +</publisher> +</journal-meta> +<article-meta> +</article-meta> +</front> +<body> + +</body> +<back> +<ref-list> + <ref-list> + <ref id="ref-year-month"> + <element-citation publication-type="article-journal"> + <person-group person-group-type="author"> + <name><surname>Jane</surname><given-names>Doe</given-names></name> + </person-group> + <article-title>Year and month</article-title> + <source>Public Library of Tests</source> + <year iso-8601-date="1999-08">1999</year><month>08</month> + </element-citation> + </ref> + <ref id="ref-access-date"> + <element-citation publication-type="article-journal"> + <person-group person-group-type="author"> + <name><surname>Negidius</surname><given-names>Numerius</given-names></name> + </person-group> + <article-title>Entry with access date</article-title> + <source>Public Library of Tests</source> + <year iso-8601-date="1911-10-03">1911</year><month>10</month><day>03</day> + <date-in-citation content-type="access-date"><year iso-8601-date="1999-01-22">1999</year><month>01</month><day>22</day></date-in-citation> + </element-citation> + </ref> + <ref id="ref-name-particles"> + <element-citation publication-type="article-journal"> + <person-group person-group-type="author"> + <name><surname>Beethoven</surname><given-names>Ludwig van</given-names></name> + <name><surname>de Bray</surname><given-names>Jan</given-names></name> + </person-group> + <article-title>Name particles, dropping and non-dropping</article-title> + <source>Public Library of Tests</source> + <year iso-8601-date="1820">1820</year> + </element-citation> + </ref> + <ref id="ref-book-with-japanese-authors"> + <element-citation publication-type="book"> + <person-group person-group-type="author"> + <string-name>宮水 三葉</string-name> + <string-name>立花 瀧</string-name> + </person-group> + <source>Big Book of Tests</source> + <year iso-8601-date="2016">2016</year> + </element-citation> + </ref> + <ref id="ref-full-journal-article-entry"> + <element-citation publication-type="article-journal"> + <person-group person-group-type="author"> + <name><surname>Watson</surname><given-names>J. D.</given-names></name> + <name><surname>Crick</surname><given-names>F. H. C.</given-names></name> + </person-group> + <article-title>Molecular Structure of Nucleic Acids: A Structure for Deoxyribose Nucleic Acid</article-title> + <source>Nature</source> + <year iso-8601-date="1953-04-01">1953</year><month>04</month><day>01</day> + <volume>171</volume> + <issue>4356</issue> + <fpage>737</fpage> + <page-range>737-738</page-range> + <pub-id pub-id-type="doi">10.1038/171737a0</pub-id> + <pub-id pub-id-type="pmid">13054692</pub-id> + </element-citation> + </ref> + </ref-list> +</ref-list> +</back> +</article> + +``` -- cgit v1.2.3 From 198ce0cde99731680b62294a7dff1b218b29bd4d Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 22 Jan 2021 20:49:41 -0800 Subject: ImageSize: use viewBox for svg if no length, width. This change allows pandoc to extract size information from more SVGs. Closes #7045. --- src/Text/Pandoc/ImageSize.hs | 8 ++++++-- test/command/svg.md | 30 ++++++++++++++---------------- 2 files changed, 20 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index 098c16721..e19958f6a 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -328,12 +328,16 @@ getSize img = svgSize :: WriterOptions -> ByteString -> Maybe ImageSize svgSize opts img = do doc <- Xml.parseXMLDoc $ UTF8.toString img + let viewboxSize = do + vb <- Xml.findAttrBy (== Xml.QName "viewBox" Nothing Nothing) doc + [_,_,w,h] <- mapM safeRead (T.words (T.pack vb)) + return (w,h) let dpi = fromIntegral $ writerDpi opts let dirToInt dir = do dim <- Xml.findAttrBy (== Xml.QName dir Nothing Nothing) doc >>= lengthToDim . T.pack return $ inPixel opts dim - w <- dirToInt "width" - h <- dirToInt "height" + w <- dirToInt "width" <|> (fst <$> viewboxSize) + h <- dirToInt "height" <|> (snd <$> viewboxSize) return ImageSize { pxX = w , pxY = h diff --git a/test/command/svg.md b/test/command/svg.md index 57506570b..4ba836b20 100644 --- a/test/command/svg.md +++ b/test/command/svg.md @@ -36,28 +36,27 @@ % pandoc -f latex -t icml \includegraphics{command/SVG_logo.svg} ^D -[WARNING] Could not determine image size for 'command/SVG_logo.svg': could not determine SVG size <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> - <Rectangle Self="uec" StrokeWeight="0" ItemTransform="1 0 0 1 150 -100"> + <Rectangle Self="uec" StrokeWeight="0" ItemTransform="1 0 0 1 37.5 -37.5"> <Properties> <PathGeometry> <GeometryPathType PathOpen="false"> <PathPointArray> - <PathPointType Anchor="-150 -100" LeftDirection="-150 -100" RightDirection="-150 -100" /> - <PathPointType Anchor="-150 100" LeftDirection="-150 100" RightDirection="-150 100" /> - <PathPointType Anchor="150 100" LeftDirection="150 100" RightDirection="150 100" /> - <PathPointType Anchor="150 -100" LeftDirection="150 -100" RightDirection="150 -100" /> + <PathPointType Anchor="-37.5 -37.5" LeftDirection="-37.5 -37.5" RightDirection="-37.5 -37.5" /> + <PathPointType Anchor="-37.5 37.5" LeftDirection="-37.5 37.5" RightDirection="-37.5 37.5" /> + <PathPointType Anchor="37.5 37.5" LeftDirection="37.5 37.5" RightDirection="37.5 37.5" /> + <PathPointType Anchor="37.5 -37.5" LeftDirection="37.5 -37.5" RightDirection="37.5 -37.5" /> </PathPointArray> </GeometryPathType> </PathGeometry> </Properties> - <Image Self="ue6" ItemTransform="1 0 0 1 -150 -100"> + <Image Self="ue6" ItemTransform="1 0 0 1 -37.5 -37.5"> <Properties> <Profile type="string"> $ID/Embedded </Profile> - <GraphicBounds Left="0" Top="0" Right="300" Bottom="200" /> + <GraphicBounds Left="0" Top="0" Right="75" Bottom="75" /> </Properties> <Link Self="ueb" LinkResourceURI="file:command/SVG_logo.svg" /> </Image> @@ -70,28 +69,27 @@ % pandoc -f latex -t icml \includegraphics{command/SVG_logo-without-xml-declaration.svg} ^D -[WARNING] Could not determine image size for 'command/SVG_logo-without-xml-declaration.svg': could not determine SVG size <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> - <Rectangle Self="uec" StrokeWeight="0" ItemTransform="1 0 0 1 150 -100"> + <Rectangle Self="uec" StrokeWeight="0" ItemTransform="1 0 0 1 37.5 -37.5"> <Properties> <PathGeometry> <GeometryPathType PathOpen="false"> <PathPointArray> - <PathPointType Anchor="-150 -100" LeftDirection="-150 -100" RightDirection="-150 -100" /> - <PathPointType Anchor="-150 100" LeftDirection="-150 100" RightDirection="-150 100" /> - <PathPointType Anchor="150 100" LeftDirection="150 100" RightDirection="150 100" /> - <PathPointType Anchor="150 -100" LeftDirection="150 -100" RightDirection="150 -100" /> + <PathPointType Anchor="-37.5 -37.5" LeftDirection="-37.5 -37.5" RightDirection="-37.5 -37.5" /> + <PathPointType Anchor="-37.5 37.5" LeftDirection="-37.5 37.5" RightDirection="-37.5 37.5" /> + <PathPointType Anchor="37.5 37.5" LeftDirection="37.5 37.5" RightDirection="37.5 37.5" /> + <PathPointType Anchor="37.5 -37.5" LeftDirection="37.5 -37.5" RightDirection="37.5 -37.5" /> </PathPointArray> </GeometryPathType> </PathGeometry> </Properties> - <Image Self="ue6" ItemTransform="1 0 0 1 -150 -100"> + <Image Self="ue6" ItemTransform="1 0 0 1 -37.5 -37.5"> <Properties> <Profile type="string"> $ID/Embedded </Profile> - <GraphicBounds Left="0" Top="0" Right="300" Bottom="200" /> + <GraphicBounds Left="0" Top="0" Right="75" Bottom="75" /> </Properties> <Link Self="ueb" LinkResourceURI="file:command/SVG_logo-without-xml-declaration.svg" /> </Image> -- cgit v1.2.3 From 490065f3ed3dd9377a740ad6fcbc441a658889dd Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Tue, 26 Jan 2021 14:19:30 +0100 Subject: Lua: always load built-in Lua scripts from default data-dir The Lua modules `pandoc` and `pandoc.List` are now always loaded from the system's default data directory. Loading from a different directory by overriding the default path, e.g. via `--data-dir`, is no longer supported to avoid unexpected behavior and to address security concerns. --- src/Text/Pandoc/Lua/Init.hs | 18 ++++++++++++++---- src/Text/Pandoc/Lua/Module/Pandoc.hs | 8 ++++---- src/Text/Pandoc/Lua/Packages.hs | 33 +++++++-------------------------- src/Text/Pandoc/Lua/PandocLua.hs | 31 +++++++++++++++++++------------ 4 files changed, 44 insertions(+), 46 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Lua/Init.hs b/src/Text/Pandoc/Lua/Init.hs index 0a5ce85cb..baa6f0295 100644 --- a/src/Text/Pandoc/Lua/Init.hs +++ b/src/Text/Pandoc/Lua/Init.hs @@ -12,17 +12,18 @@ module Text.Pandoc.Lua.Init ( runLua ) where +import Control.Monad (when) import Control.Monad.Catch (try) import Control.Monad.Trans (MonadIO (..)) import Data.Data (Data, dataTypeConstrs, dataTypeOf, showConstr) import Foreign.Lua (Lua) import GHC.IO.Encoding (getForeignEncoding, setForeignEncoding, utf8) +import Text.Pandoc.Class.PandocMonad (readDataFile) import Text.Pandoc.Class.PandocIO (PandocIO) import Text.Pandoc.Error (PandocError) import Text.Pandoc.Lua.Packages (installPandocPackageSearcher) -import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua, - loadScriptFromDataDir, runPandocLua) - +import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua, runPandocLua) +import Text.Pandoc.Lua.Util (throwTopMessageAsError') import qualified Foreign.Lua as Lua import qualified Text.Pandoc.Definition as Pandoc import qualified Text.Pandoc.Lua.Module.Pandoc as ModulePandoc @@ -44,7 +45,7 @@ initLuaState = do liftPandocLua Lua.openlibs installPandocPackageSearcher initPandocModule - loadScriptFromDataDir "init.lua" + loadInitScript "init.lua" where initPandocModule :: PandocLua () initPandocModule = do @@ -61,6 +62,15 @@ initLuaState = do -- assign module to global variable liftPandocLua $ Lua.setglobal "pandoc" + loadInitScript :: FilePath -> PandocLua () + loadInitScript scriptFile = do + script <- readDataFile scriptFile + status <- liftPandocLua $ Lua.dostring script + when (status /= Lua.OK) . liftPandocLua $ + throwTopMessageAsError' + (("Couldn't load '" ++ scriptFile ++ "'.\n") ++) + + -- | AST elements are marshaled via normal constructor functions in the -- @pandoc@ module. However, accessing Lua globals from Haskell is -- expensive (due to error handling). Accessing the Lua registry is much diff --git a/src/Text/Pandoc/Lua/Module/Pandoc.hs b/src/Text/Pandoc/Lua/Module/Pandoc.hs index a9ce3866d..a8afecd2e 100644 --- a/src/Text/Pandoc/Lua/Module/Pandoc.hs +++ b/src/Text/Pandoc/Lua/Module/Pandoc.hs @@ -25,7 +25,7 @@ import Text.Pandoc.Definition (Block, Inline) import Text.Pandoc.Lua.Filter (walkInlines, walkBlocks, LuaFilter, SingletonsList (..)) import Text.Pandoc.Lua.Marshaling () import Text.Pandoc.Lua.PandocLua (PandocLua, addFunction, liftPandocLua, - loadScriptFromDataDir) + loadDefaultModule) import Text.Pandoc.Walk (Walkable) import Text.Pandoc.Options (ReaderOptions (readerExtensions)) import Text.Pandoc.Process (pipeProcess) @@ -38,11 +38,11 @@ import qualified Foreign.Lua as Lua import qualified Text.Pandoc.Lua.Util as LuaUtil import Text.Pandoc.Error --- | Push the "pandoc" on the lua stack. Requires the `list` module to be --- loaded. +-- | Push the "pandoc" package to the Lua stack. Requires the `List` +-- module to be loadable. pushModule :: PandocLua NumResults pushModule = do - loadScriptFromDataDir "pandoc.lua" + loadDefaultModule "pandoc" addFunction "read" readDoc addFunction "pipe" pipeFn addFunction "walk_block" walkBlock diff --git a/src/Text/Pandoc/Lua/Packages.hs b/src/Text/Pandoc/Lua/Packages.hs index d62fb725d..5949a1a7d 100644 --- a/src/Text/Pandoc/Lua/Packages.hs +++ b/src/Text/Pandoc/Lua/Packages.hs @@ -1,6 +1,3 @@ -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE LambdaCase #-} -{-# LANGUAGE ScopedTypeVariables #-} {- | Module : Text.Pandoc.Lua.Packages Copyright : Copyright © 2017-2021 Albert Krewinkel @@ -15,13 +12,9 @@ module Text.Pandoc.Lua.Packages ( installPandocPackageSearcher ) where -import Control.Monad.Catch (try) import Control.Monad (forM_) -import Data.ByteString (ByteString) -import Foreign.Lua (Lua, NumResults) -import Text.Pandoc.Error (PandocError) -import Text.Pandoc.Class.PandocMonad (readDataFile) -import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua) +import Foreign.Lua (NumResults) +import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua, loadDefaultModule) import qualified Foreign.Lua as Lua import qualified Foreign.Lua.Module.Text as Text @@ -54,24 +47,12 @@ pandocPackageSearcher pkgName = "pandoc.types" -> pushWrappedHsFun Types.pushModule "pandoc.utils" -> pushWrappedHsFun Utils.pushModule "text" -> pushWrappedHsFun Text.pushModule - _ -> searchPureLuaLoader + "pandoc.List" -> pushWrappedHsFun (loadDefaultModule pkgName) + _ -> reportPandocSearcherFailure where pushWrappedHsFun f = liftPandocLua $ do Lua.pushHaskellFunction f return 1 - searchPureLuaLoader = do - let filename = pkgName ++ ".lua" - try (readDataFile filename) >>= \case - Right script -> pushWrappedHsFun (loadStringAsPackage pkgName script) - Left (_ :: PandocError) -> liftPandocLua $ do - Lua.push ("\n\tno file '" ++ filename ++ "' in pandoc's datadir") - return (1 :: NumResults) - -loadStringAsPackage :: String -> ByteString -> Lua NumResults -loadStringAsPackage pkgName script = do - status <- Lua.dostring script - if status == Lua.OK - then return (1 :: NumResults) - else do - msg <- Lua.popValue - Lua.raiseError ("Error while loading `" <> pkgName <> "`.\n" <> msg) + reportPandocSearcherFailure = liftPandocLua $ do + Lua.push ("\n\t" <> pkgName <> "is not one of pandoc's default packages") + return (1 :: NumResults) diff --git a/src/Text/Pandoc/Lua/PandocLua.hs b/src/Text/Pandoc/Lua/PandocLua.hs index 4beac22b7..750e019b6 100644 --- a/src/Text/Pandoc/Lua/PandocLua.hs +++ b/src/Text/Pandoc/Lua/PandocLua.hs @@ -23,24 +23,23 @@ module Text.Pandoc.Lua.PandocLua , runPandocLua , liftPandocLua , addFunction - , loadScriptFromDataDir + , loadDefaultModule ) where -import Control.Monad (when) import Control.Monad.Catch (MonadCatch, MonadMask, MonadThrow) import Control.Monad.Except (MonadError (catchError, throwError)) import Control.Monad.IO.Class (MonadIO (liftIO)) import Foreign.Lua (Lua (..), NumResults, Pushable, ToHaskellFunction) import Text.Pandoc.Class.PandocIO (PandocIO) -import Text.Pandoc.Class.PandocMonad (PandocMonad (..), readDataFile) -import Text.Pandoc.Error (PandocError) +import Text.Pandoc.Class.PandocMonad (PandocMonad (..), readDefaultDataFile) +import Text.Pandoc.Error (PandocError (PandocLuaError)) import Text.Pandoc.Lua.Global (Global (..), setGlobals) import Text.Pandoc.Lua.ErrorConversion (errorConversion) import qualified Control.Monad.Catch as Catch +import qualified Data.Text as T import qualified Foreign.Lua as Lua import qualified Text.Pandoc.Class.IO as IO -import qualified Text.Pandoc.Lua.Util as LuaUtil -- | Type providing access to both, pandoc and Lua operations. newtype PandocLua a = PandocLua { unPandocLua :: Lua a } @@ -86,14 +85,22 @@ addFunction name fn = liftPandocLua $ do Lua.pushHaskellFunction fn Lua.rawset (-3) --- | Load a file from pandoc's data directory. -loadScriptFromDataDir :: FilePath -> PandocLua () -loadScriptFromDataDir scriptFile = do - script <- readDataFile scriptFile +-- | Load a pure Lua module included with pandoc. Leaves the result on +-- the stack and returns @NumResults 1@. +-- +-- The script is loaded from the default data directory. We do not load +-- from data directories supplied via command line, as this could cause +-- scripts to be executed even though they had not been passed explicitly. +loadDefaultModule :: String -> PandocLua NumResults +loadDefaultModule name = do + script <- readDefaultDataFile (name <> ".lua") status <- liftPandocLua $ Lua.dostring script - when (status /= Lua.OK) . liftPandocLua $ - LuaUtil.throwTopMessageAsError' - (("Couldn't load '" ++ scriptFile ++ "'.\n") ++) + if status == Lua.OK + then return (1 :: NumResults) + else do + msg <- liftPandocLua Lua.popValue + let err = "Error while loading `" <> name <> "`.\n" <> msg + throwError $ PandocLuaError (T.pack err) -- | Global variables which should always be set. defaultGlobals :: PandocIO [Global] -- cgit v1.2.3 From 12bc6625352aaece955f9f0700f88e9280721ced Mon Sep 17 00:00:00 2001 From: Mauro Bieg <mb21@users.noreply.github.com> Date: Tue, 26 Jan 2021 08:53:26 +0100 Subject: LaTeX writer: change BCP47 lang tag from jp to ja fixes #7047 --- src/Text/Pandoc/Writers/LaTeX.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index df922e17b..93603a26e 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -1520,7 +1520,7 @@ commonFromBcp47 (Lang l _ _ _) = fromIso l fromIso "ie" = "interlingua" fromIso "is" = "icelandic" fromIso "it" = "italian" - fromIso "jp" = "japanese" + fromIso "ja" = "japanese" fromIso "km" = "khmer" fromIso "kmr" = "kurmanji" fromIso "kn" = "kannada" -- cgit v1.2.3 From 98c2a52b4ee6c833c0a2f2652386cec024e377eb Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 26 Jan 2021 22:45:57 -0800 Subject: Clean up BibTeX parsing. Previously there was a messy code path that gave strange results in some cases, not passing through raw tex but trying to extract a string content. This was an artefact of trying to handle some special bibtex-specific commands in the BibTeX reader. Now we just handle these in the LaTeX reader and simplify parsing in the BibTeX reader. This does mean that more raw tex will be passed through (and currently this is not sensitive to the `raw_tex` extension; this should be fixed). Closes #7049. --- src/Text/Pandoc/Citeproc/BibTeX.hs | 33 +-------------------------------- src/Text/Pandoc/Readers/LaTeX.hs | 18 ++++++++++++++++++ test/command/biblatex-cotton.md | 6 +++--- test/command/biblatex-murray.md | 5 +++-- 4 files changed, 25 insertions(+), 37 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs index 10730a1e9..5b9068378 100644 --- a/src/Text/Pandoc/Citeproc/BibTeX.hs +++ b/src/Text/Pandoc/Citeproc/BibTeX.hs @@ -750,41 +750,10 @@ blocksToInlines bs = _ -> B.fromList $ Walk.query (:[]) bs adjustSpans :: Lang -> Inline -> Inline -adjustSpans lang (RawInline (Format "latex") s) - | s == "\\hyphen" || s == "\\hyphen " = Str "-" - | otherwise = parseRawLaTeX lang s +adjustSpans lang (Span ("",[],[("bibstring",s)]) _) = Str $ resolveKey' lang s adjustSpans _ SoftBreak = Space adjustSpans _ x = x -parseRawLaTeX :: Lang -> Text -> Inline -parseRawLaTeX lang t@(T.stripPrefix "\\" -> Just xs) = - case parseLaTeX lang contents of - Right [Para ys] -> f command ys - Right [Plain ys] -> f command ys - Right [] -> f command [] - _ -> RawInline (Format "latex") t - where (command', contents') = T.break (\c -> c =='{' || c =='\\') xs - command = T.strip command' - contents = T.drop 1 $ T.dropEnd 1 contents' - f "mkbibquote" ils = Span nullAttr [Quoted DoubleQuote ils] - f "mkbibemph" ils = Span nullAttr [Emph ils] - f "mkbibitalic" ils = Span nullAttr [Emph ils] - f "mkbibbold" ils = Span nullAttr [Strong ils] - f "mkbibparens" ils = Span nullAttr $ - [Str "("] ++ ils ++ [Str ")"] - f "mkbibbrackets" ils = Span nullAttr $ - [Str "["] ++ ils ++ [Str "]"] - -- ... both should be nestable & should work in year fields - f "autocap" ils = Span nullAttr ils - -- TODO: should work in year fields - f "textnormal" ils = Span ("",["nodecor"],[]) ils - f "bibstring" [Str s] = Str $ resolveKey' lang s - f "adddot" [] = Str "." - f "adddotspace" [] = Span nullAttr [Str ".", Space] - f "addabbrvspace" [] = Space - f _ ils = Span nullAttr ils -parseRawLaTeX _ t = RawInline (Format "latex") t - latex' :: Text -> Bib [Block] latex' t = do lang <- gets localeLang diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index f49323996..91c71c000 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -940,6 +940,24 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList , ("uline", underline <$> tok) -- plain tex stuff that should just be passed through as raw tex , ("ifdim", ifdim) + -- bibtex + , ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok) + , ("mkbibemph", spanWith nullAttr . emph <$> tok) + , ("mkbibitalic", spanWith nullAttr . emph <$> tok) + , ("mkbibbold", spanWith nullAttr . strong <$> tok) + , ("mkbibparens", + spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok) + , ("mkbibbrackets", + spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok) + , ("autocap", spanWith nullAttr <$> tok) + , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok) + , ("bibstring", + (\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize + <$> braced) + , ("adddot", pure (str ".")) + , ("adddotspace", pure (spanWith nullAttr (str "." <> space))) + , ("addabbrvspace", pure space) + , ("hyphen", pure (str "-")) ] accent :: PandocMonad m => Char -> Maybe Char -> LP m Inlines diff --git a/test/command/biblatex-cotton.md b/test/command/biblatex-cotton.md index 51c3ee2f4..7a7a2e296 100644 --- a/test/command/biblatex-cotton.md +++ b/test/command/biblatex-cotton.md @@ -43,9 +43,9 @@ Cotton, F. A., Wilkinson, G., Murillio, C. A., & Bochmann, M. (1999). --- nocite: "[@*]" references: -- annote: A book entry with author authors and an edition field. By - default, long author and editor lists are automatically truncated. - This is configurable +- annote: A book entry with `\arabic{author}`{=latex} authors and an + edition field. By default, long author and editor lists are + automatically truncated. This is configurable author: - family: Cotton given: Frank Albert diff --git a/test/command/biblatex-murray.md b/test/command/biblatex-murray.md index 78081c6fe..8242e4822 100644 --- a/test/command/biblatex-murray.md +++ b/test/command/biblatex-murray.md @@ -55,8 +55,9 @@ properties as a function of core size. *Langmuir*, *14*(1), 17–30. --- nocite: "[@*]" references: -- annote: An article entry with author authors. By default, long author - and editor lists are automatically truncated. This is configurable +- annote: An article entry with `\arabic{author}`{=latex} authors. By + default, long author and editor lists are automatically truncated. + This is configurable author: - family: Hostetler given: Michael J. -- cgit v1.2.3 From 300b9b0ea365187240115afbfed0df7fa438a7b3 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Fri, 29 Jan 2021 09:37:45 +0100 Subject: JATS writer: escape special chars in reference elements. Prevents the generation of invalid markup if a citation element contains an ampersand or another character with a special meaning in XML. --- src/Text/Pandoc/Writers/JATS/References.hs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/JATS/References.hs b/src/Text/Pandoc/Writers/JATS/References.hs index 4ee7eb9dd..903144128 100644 --- a/src/Text/Pandoc/Writers/JATS/References.hs +++ b/src/Text/Pandoc/Writers/JATS/References.hs @@ -29,7 +29,7 @@ import Text.Pandoc.Builder (Inlines) import Text.Pandoc.Options (WriterOptions) import Text.Pandoc.Shared (tshow) import Text.Pandoc.Writers.JATS.Types -import Text.Pandoc.XML (inTags) +import Text.Pandoc.XML (escapeStringForXML, inTags) import qualified Data.Text as T referencesToJATS :: PandocMonad m @@ -78,7 +78,8 @@ referenceToJATS _opts ref = do varInTagWith var tagName tagAttribs = case lookupVariable var ref >>= valToText of Nothing -> mempty - Just val -> inTags' tagName tagAttribs $ literal val + Just val -> inTags' tagName tagAttribs . literal $ + escapeStringForXML val authors = case lookupVariable "author" ref of Just (NamesVal names) -> @@ -143,7 +144,9 @@ toNameElements name = then inTags' "name" [] nameTags else nameLiteral name `inNameTag` "string-name" where - inNameTag val tag = maybe empty (inTags' tag [] . literal) val + inNameTag mVal tag = case mVal of + Nothing -> empty + Just val -> inTags' tag [] . literal $ escapeStringForXML val surnamePrefix = maybe mempty (`T.snoc` ' ') $ nameNonDroppingParticle name givenSuffix = maybe mempty (T.cons ' ') $ -- cgit v1.2.3 From 9223788a05fe619d567bcbdf4cb31db63de86f32 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 29 Jan 2021 18:29:17 -0800 Subject: Markdown writer: handle math right before digit. We insert an HTML comment to avoid a `$` right before a digit, which pandoc will not recognize as a math delimiter. --- src/Text/Pandoc/Writers/Markdown.hs | 6 +++++- test/command/7058.md | 6 ++++++ 2 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 test/command/7058.md (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index de9075ac4..898905603 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -22,7 +22,7 @@ module Text.Pandoc.Writers.Markdown ( writePlain) where import Control.Monad.Reader import Control.Monad.State.Strict -import Data.Char (isAlphaNum) +import Data.Char (isAlphaNum, isDigit) import Data.Default import Data.List (find, intersperse, sortOn, transpose) import qualified Data.Map as M @@ -987,6 +987,10 @@ inlineListToMarkdown opts lst = do inlist <- asks envInList go (if inlist then avoidBadWrapsInList lst else lst) where go [] = return empty + go (x@Math{}:y@(Str t):zs) + | T.all isDigit (T.take 1 t) -- starts with digit -- see #7058 + = liftM2 (<>) (inlineToMarkdown opts x) + (go (RawInline (Format "html") "<!-- -->" : y : zs)) go (i:is) = case i of Link {} -> case is of -- If a link is followed by another link, or '[', '(' or ':' diff --git a/test/command/7058.md b/test/command/7058.md new file mode 100644 index 000000000..69e5dd445 --- /dev/null +++ b/test/command/7058.md @@ -0,0 +1,6 @@ +``` +% pandoc -f latex -t markdown +5\(-\)8 \(x\) +^D +5$-$`<!-- -->`{=html}8 $x$ +``` -- cgit v1.2.3 From 66959172588409becb42993bb94ea106ac3e5606 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 30 Jan 2021 18:10:22 -0800 Subject: CslJson writer: output `[]` if no references in input, instead of raising a PandocAppError as before. --- src/Text/Pandoc/Writers/CslJson.hs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/CslJson.hs b/src/Text/Pandoc/Writers/CslJson.hs index 13b95586b..f0defdd38 100644 --- a/src/Text/Pandoc/Writers/CslJson.hs +++ b/src/Text/Pandoc/Writers/CslJson.hs @@ -46,11 +46,11 @@ writeCslJson _opts (Pandoc meta _) = do locale <- case getLocale lang of Left e -> throwError $ PandocCiteprocError e Right l -> return l - case lookupMeta "references" meta of - Just (MetaList rs) -> return $ (UTF8.toText $ - toCslJson locale (mapMaybe metaValueToReference rs)) - <> "\n" - _ -> throwError $ PandocAppError "No references field" + let rs = case lookupMeta "references" meta of + Just (MetaList rs) -> rs + _ -> [] + return $ UTF8.toText + (toCslJson locale (mapMaybe metaValueToReference rs)) <> "\n" fromInlines :: [Inline] -> CslJson Text fromInlines = foldMap fromInline . B.fromList -- cgit v1.2.3 From 9c8ff53b54836cbe7fa9417784f3204cd041f3ac Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Sun, 31 Jan 2021 14:37:47 +0100 Subject: CslJson writer: fix compiler warning --- src/Text/Pandoc/Writers/CslJson.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/CslJson.hs b/src/Text/Pandoc/Writers/CslJson.hs index f0defdd38..a10def95e 100644 --- a/src/Text/Pandoc/Writers/CslJson.hs +++ b/src/Text/Pandoc/Writers/CslJson.hs @@ -47,7 +47,7 @@ writeCslJson _opts (Pandoc meta _) = do Left e -> throwError $ PandocCiteprocError e Right l -> return l let rs = case lookupMeta "references" meta of - Just (MetaList rs) -> rs + Just (MetaList xs) -> xs _ -> [] return $ UTF8.toText (toCslJson locale (mapMaybe metaValueToReference rs)) <> "\n" -- cgit v1.2.3 From d1875b69ec136c19d9b2336ff57e360ff7b7bef6 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 31 Jan 2021 12:05:46 -0800 Subject: RST reader: fix handling of header in CSV tables. The interpretation of this line is not affected by the delim option. Closes #7064. --- src/Text/Pandoc/Readers/RST.hs | 9 +++++---- test/command/7064.md | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 4 deletions(-) create mode 100644 test/command/7064.md (limited to 'src') diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 707af905f..0f32d993c 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -877,10 +877,11 @@ csvTableDirective top fields rawcsv = do (bs, _) <- fetchItem u return $ UTF8.toText bs Nothing -> return rawcsv - let res = parseCSV opts (case explicitHeader of - Just h -> h <> "\n" <> rawcsv' - Nothing -> rawcsv') - case res of + let header' = case explicitHeader of + Just h -> parseCSV defaultCSVOptions h + Nothing -> Right [] + let res = parseCSV opts rawcsv' + case (<>) <$> header' <*> res of Left e -> throwError $ PandocParsecError "csv table" e Right rawrows -> do diff --git a/test/command/7064.md b/test/command/7064.md new file mode 100644 index 000000000..58b72e363 --- /dev/null +++ b/test/command/7064.md @@ -0,0 +1,32 @@ +``` +% pandoc -f rst -t html +.. csv-table:: Changes + :header: "Version", "Date", "Description" + :widths: 15, 15, 70 + :delim: $ + + 0.1.0 $ 18/02/2013 $ Initial Release +^D +<table> +<caption>Changes</caption> +<colgroup> +<col style="width: 15%" /> +<col style="width: 15%" /> +<col style="width: 70%" /> +</colgroup> +<thead> +<tr class="header"> +<th>Version</th> +<th>Date</th> +<th>Description</th> +</tr> +</thead> +<tbody> +<tr class="odd"> +<td>0.1.0</td> +<td>18/02/2013</td> +<td>Initial Release</td> +</tr> +</tbody> +</table> +``` -- cgit v1.2.3 From b239c89a82b66abc55bf7c08e37492938c817c56 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 1 Feb 2021 08:43:26 -0800 Subject: BibTeX writer fixes. Closes #7067. + Require citeproc 0.3.0.7, which correctly titlecases when titles contain non-ASCII characters. + Correctly handle 'pages' (= 'page' in CSL). + Correctly handle BibLaTeX 'langid' (= 'language' in CSL). + In BibTeX output, protect foreign titles since there's no language field. --- pandoc.cabal | 2 +- src/Text/Pandoc/Citeproc/BibTeX.hs | 21 ++++++--- stack.yaml | 4 +- test/command/7067.md | 90 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 108 insertions(+), 9 deletions(-) create mode 100644 test/command/7067.md (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index 0890318e3..1bdd2a6b3 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -410,7 +410,7 @@ library blaze-markup >= 0.8 && < 0.9, bytestring >= 0.9 && < 0.12, case-insensitive >= 1.2 && < 1.3, - citeproc >= 0.3.0.6 && < 0.4, + citeproc >= 0.3.0.7 && < 0.4, commonmark >= 0.1.1.3 && < 0.2, commonmark-extensions >= 0.2.0.4 && < 0.3, commonmark-pandoc >= 0.2 && < 0.3, diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs index 5b9068378..2b43fffb6 100644 --- a/src/Text/Pandoc/Citeproc/BibTeX.hs +++ b/src/Text/Pandoc/Citeproc/BibTeX.hs @@ -115,7 +115,7 @@ writeBibtexString opts variant mblang ref = "motion_picture" | variant == Biblatex -> "movie" "review" | variant == Biblatex -> "review" _ -> "misc" - + mbSubtype = case referenceType ref of "article-magazine" -> Just "magazine" @@ -149,7 +149,7 @@ writeBibtexString opts variant mblang ref = , "type" , "entrysubtype" , "note" - , "language" + , "langid" , "abstract" , "keywords" ] @@ -202,12 +202,19 @@ writeBibtexString opts variant mblang ref = [ (", " <>) <$> nameGiven name, nameDroppingParticle name ] - titlecase = case mblang of + mblang' = (parseLang <$> getVariableAsText "language") <|> mblang + + titlecase = case mblang' of Just (Lang "en" _) -> titlecase' Nothing -> titlecase' - _ -> id - - titlecase' = addTextCase mblang TitleCase . + _ -> + case variant of + Bibtex -> B.spanWith nullAttr + -- BibTex lacks a language field, so we wrap non-English + -- titles in {} to protect case. + Biblatex -> id + + titlecase' = addTextCase mblang' TitleCase . (\ils -> B.fromList (case B.toList ils of Str t : xs -> Str t : Walk.walk spanAroundCapitalizedWords xs @@ -299,6 +306,8 @@ writeBibtexString opts variant mblang ref = getContentsFor "urldate" = getVariable "accessed" >>= toLaTeX . valToInlines getContentsFor "year" = getVariable "issued" >>= getYear getContentsFor "month" = getVariable "issued" >>= getMonth + getContentsFor "pages" = getVariable "page" >>= toLaTeX . valToInlines + getContentsFor "langid" = getVariable "language" >>= toLaTeX . valToInlines getContentsFor "number" = (getVariable "number" <|> getVariable "collection-number" <|> getVariable "issue") >>= toLaTeX . valToInlines diff --git a/stack.yaml b/stack.yaml index 6c8c39fa7..dc6fa6517 100644 --- a/stack.yaml +++ b/stack.yaml @@ -19,10 +19,10 @@ extra-deps: - doctemplates-0.9 - texmath-0.12.1 - tasty-bench-0.1 -- citeproc-0.3.0.6 +- citeproc-0.3.0.7 #- citeproc: # git: https://github.com/jgm/citeproc.git -# commit: da1f9702fa70d7e1bad9a796f897e920e2d578f7 +# commit: feb3b7580c6738eec3b23921f7c1739cfba611aa ghc-options: "$locals": -fhide-source-paths -Wno-missing-home-modules resolver: lts-16.23 diff --git a/test/command/7067.md b/test/command/7067.md new file mode 100644 index 000000000..a66e3aa1f --- /dev/null +++ b/test/command/7067.md @@ -0,0 +1,90 @@ +``` +% pandoc -t biblatex +--- +references: +- id: garaud + author: + - family: Garaud + given: Marcel + container-title: Bulletin de la Societé des antiquaires de l’Ouest + collection-title: 4 + issued: + - year: 1967 + language: fr-FR + page: 11-27 + title: Recherches sur les défrichements dans la Gâtine poitevine aux XI^e^ et XII^e^ siècles + type: article-journal + volume: 9 +... +^D +@article{garaud, + author = {Garaud, Marcel}, + title = {Recherches sur les défrichements dans la Gâtine poitevine aux +XI\textsuperscript{e} et XII\textsuperscript{e} siècles}, + journal = {Bulletin de la Societé des antiquaires de l’Ouest}, + series = {4}, + volume = {9}, + pages = {11-27}, + date = {1967}, + langid = {fr-FR} +} +``` + +``` +% pandoc -t bibtex +--- +references: +- id: garaud + author: + - family: Garaud + given: Marcel + container-title: Bulletin de la Société des antiquaires de l’Ouest + collection-title: 4 + issued: + - year: 1967 + language: fr-FR + page: 11-27 + title: Recherches sur les défrichements dans la Gâtine poitevine aux XI^e^ et XII^e^ siècles + type: article-journal + volume: 9 +... +^D +@article{garaud, + author = {Garaud, Marcel}, + title = {{Recherches sur les défrichements dans la Gâtine poitevine aux +XI\textsuperscript{e} et XII\textsuperscript{e} siècles}}, + journal = {Bulletin de la Société des antiquaires de l’Ouest}, + series = {4}, + volume = {9}, + pages = {11-27}, + year = {1967} +} +``` + +This tests the titlecasing of a word with an accented second letter: +``` +% pandoc -t bibtex +--- +references: +- id: garaud + author: + - family: Garaud + given: Marcel + container-title: English Journal + issued: + - year: 1967 + language: en-US + title: Research on the défrichements in the Gâtine poitevine + type: article-journal + volume: 9 +... +^D +@article{garaud, + author = {Garaud, Marcel}, + title = {Research on the Défrichements in the {Gâtine} Poitevine}, + journal = {English Journal}, + volume = {9}, + year = {1967} +} +``` + -- cgit v1.2.3 From 02d3c71e7224853ecabaa9ac4cd947ec2ac1e579 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 1 Feb 2021 18:02:17 -0800 Subject: BibTeX writer: use doclayout and doctemplate. This change allows bibtex/biblatex output to wrap as other formats do, depending on the settings of `--wrap` and `--columns`. It also introduces default templates for bibtex and biblatex, which allow for using the variables `header-include`, `include-before` or `include-after` (or alternatively the command line options `--include-in-header`, `--include-before-body`, `--include-after-body`) to insert content into the generated bibtex/biblatex. This change requires a change in the return type of the unexported `T.P.Citeproc.writeBibTeXString` from `Text` to `Doc Text`. Closes #7068. --- data/templates/default.biblatex | 10 ++++++++++ pandoc.cabal | 2 ++ src/Text/Pandoc/Citeproc/BibTeX.hs | 41 +++++++++++++++++++++----------------- src/Text/Pandoc/Templates.hs | 2 -- src/Text/Pandoc/Writers/BibTeX.hs | 19 +++++++++++++++--- test/command/7067.md | 6 +++--- 6 files changed, 54 insertions(+), 26 deletions(-) create mode 100644 data/templates/default.biblatex (limited to 'src') diff --git a/data/templates/default.biblatex b/data/templates/default.biblatex new file mode 100644 index 000000000..6bf2632d8 --- /dev/null +++ b/data/templates/default.biblatex @@ -0,0 +1,10 @@ +$for(header-includes)$ +$header-includes$ +$endfor$ +$for(include-before)$ +$include-before$ +$endfor$ +$body$ +$for(include-after)$ +$include-after$ +$endfor$ diff --git a/pandoc.cabal b/pandoc.cabal index 1bdd2a6b3..2f8c255a8 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -56,6 +56,8 @@ data-files: data/templates/default.icml data/templates/default.opml data/templates/default.latex + data/templates/default.bibtex + data/templates/default.biblatex data/templates/default.context data/templates/default.texinfo data/templates/default.jira diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs index 2b43fffb6..416fe439e 100644 --- a/src/Text/Pandoc/Citeproc/BibTeX.hs +++ b/src/Text/Pandoc/Citeproc/BibTeX.hs @@ -52,6 +52,8 @@ import Data.Char (isAlphaNum, isDigit, isLetter, import Data.List (foldl', intercalate, intersperse) import Safe (readMay) import Text.Printf (printf) +import Text.DocLayout (literal, hsep, nest, hang, Doc(..), + braces, ($$), cr) data Variant = Bibtex | Biblatex deriving (Show, Eq, Ord) @@ -77,10 +79,11 @@ writeBibtexString :: WriterOptions -- ^ options (for writing LaTex) -> Variant -- ^ bibtex or biblatex -> Maybe Lang -- ^ Language -> Reference Inlines -- ^ Reference to write - -> Text + -> Doc Text writeBibtexString opts variant mblang ref = - "@" <> bibtexType <> "{" <> unItemId (referenceId ref) <> ",\n " <> - renderFields fs <> "\n}\n" + "@" <> bibtexType <> "{" <> literal (unItemId (referenceId ref)) <> "," + $$ nest 2 (renderFields fs) + $$ "}" <> cr where bibtexType = @@ -231,10 +234,12 @@ writeBibtexString opts variant mblang ref = toLaTeX x = case runPure (writeLaTeX opts $ doc (B.plain x)) of Left _ -> Nothing - Right t -> Just t + Right t -> Just $ hsep . map literal $ T.words t - renderField name = (\contents -> name <> " = {" <> contents <> "}") - <$> getContentsFor name + renderField :: Text -> Maybe (Doc Text) + renderField name = + (((literal name) <>) . hang 2 " = " . braces) + <$> getContentsFor name getVariable v = lookupVariable (toVariable v) ref @@ -248,10 +253,10 @@ writeBibtexString opts variant mblang ref = Nothing -> case dateParts date of [DateParts (y1:_), DateParts (y2:_)] -> - Just (T.pack (printf "%04d" y1) <> "--" <> + Just $ literal (T.pack (printf "%04d" y1) <> "--" <> T.pack (printf "%04d" y2)) [DateParts (y1:_)] -> - Just (T.pack (printf "%04d" y1)) + Just $ literal (T.pack (printf "%04d" y1)) _ -> Nothing _ -> Nothing @@ -274,19 +279,19 @@ writeBibtexString opts variant mblang ref = DateVal date -> case dateParts date of [DateParts (_:m1:_), DateParts (_:m2:_)] -> - Just (toMonth m1 <> "--" <> toMonth m2) - [DateParts (_:m1:_)] -> Just (toMonth m1) + Just $ literal (toMonth m1 <> "--" <> toMonth m2) + [DateParts (_:m1:_)] -> Just $ literal (toMonth m1) _ -> Nothing _ -> Nothing - getContentsFor :: Text -> Maybe Text + getContentsFor :: Text -> Maybe (Doc Text) getContentsFor "type" = getVariableAsText "genre" >>= \case "mathesis" -> Just "mastersthesis" "phdthesis" -> Just "phdthesis" _ -> Nothing - getContentsFor "entrysubtype" = mbSubtype + getContentsFor "entrysubtype" = literal <$> mbSubtype getContentsFor "journal" | bibtexType `elem` ["article", "periodical", "suppperiodical", "review"] = getVariable "container-title" >>= toLaTeX . valToInlines @@ -314,7 +319,7 @@ writeBibtexString opts variant mblang ref = getContentsFor x = getVariable x >>= if isURL x - then Just . stringify . valToInlines + then Just . literal . stringify . valToInlines else toLaTeX . (if x == "title" then titlecase @@ -323,7 +328,7 @@ writeBibtexString opts variant mblang ref = isURL x = x `elem` ["url","doi","issn","isbn"] - renderFields = T.intercalate ",\n " . mapMaybe renderField + renderFields = mconcat . intersperse ("," <> cr) . mapMaybe renderField defaultLang :: Lang defaultLang = Lang "en" (Just "US") @@ -1038,14 +1043,14 @@ getOldDate prefix = do let dateparts = filter (\x -> x /= DateParts []) $ map toDateParts [(year',month',day'), (endyear',endmonth',endday')] - literal <- if null dateparts - then Just <$> getRawField (prefix <> "year") - else return Nothing + literal' <- if null dateparts + then Just <$> getRawField (prefix <> "year") + else return Nothing return $ Date { dateParts = dateparts , dateCirca = False , dateSeason = Nothing - , dateLiteral = literal } + , dateLiteral = literal' } getRawField :: Text -> Bib Text getRawField f = do diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index 3e539bff7..e83f26329 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -81,8 +81,6 @@ getDefaultTemplate writer = do case format of "native" -> return "" "csljson" -> return "" - "bibtex" -> return "" - "biblatex" -> return "" "json" -> return "" "docx" -> return "" "fb2" -> return "" diff --git a/src/Text/Pandoc/Writers/BibTeX.hs b/src/Text/Pandoc/Writers/BibTeX.hs index e1cb47ca1..b9ae0c13a 100644 --- a/src/Text/Pandoc/Writers/BibTeX.hs +++ b/src/Text/Pandoc/Writers/BibTeX.hs @@ -25,7 +25,11 @@ import Citeproc (parseLang) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Citeproc.BibTeX as BibTeX import Text.Pandoc.Citeproc.MetaValue (metaValueToReference) -import Text.Pandoc.Writers.Shared (lookupMetaString) +import Text.Pandoc.Writers.Shared (lookupMetaString, defField, + addVariablesToContext) +import Text.DocLayout (render, vcat) +import Text.DocTemplates (Context(..)) +import Text.Pandoc.Templates (renderTemplate) -- | Write BibTeX based on the references metadata from a Pandoc document. writeBibTeX :: PandocMonad m => WriterOptions -> Pandoc -> m Text @@ -43,6 +47,15 @@ writeBibTeX' variant opts (Pandoc meta _) = do let refs = case lookupMeta "references" meta of Just (MetaList xs) -> mapMaybe metaValueToReference xs _ -> [] - return $ mconcat $ - map (BibTeX.writeBibtexString opts variant mblang) refs + let main = vcat $ map (BibTeX.writeBibtexString opts variant mblang) refs + let context = defField "body" main + $ addVariablesToContext opts (mempty :: Context Text) + let colwidth = if writerWrapText opts == WrapAuto + then Just $ writerColumns opts + else Nothing + return $ render colwidth $ + case writerTemplate opts of + Nothing -> main + Just tpl -> renderTemplate tpl context + diff --git a/test/command/7067.md b/test/command/7067.md index a66e3aa1f..34b3bb8ea 100644 --- a/test/command/7067.md +++ b/test/command/7067.md @@ -20,7 +20,7 @@ references: @article{garaud, author = {Garaud, Marcel}, title = {Recherches sur les défrichements dans la Gâtine poitevine aux -XI\textsuperscript{e} et XII\textsuperscript{e} siècles}, + XI\textsuperscript{e} et XII\textsuperscript{e} siècles}, journal = {Bulletin de la Societé des antiquaires de l’Ouest}, series = {4}, volume = {9}, @@ -51,8 +51,8 @@ references: ^D @article{garaud, author = {Garaud, Marcel}, - title = {{Recherches sur les défrichements dans la Gâtine poitevine aux -XI\textsuperscript{e} et XII\textsuperscript{e} siècles}}, + title = {{Recherches sur les défrichements dans la Gâtine poitevine + aux XI\textsuperscript{e} et XII\textsuperscript{e} siècles}}, journal = {Bulletin de la Société des antiquaires de l’Ouest}, series = {4}, volume = {9}, -- cgit v1.2.3 From ec8509295a8de19462ecd352a22b2784158e9ec6 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 2 Feb 2021 17:00:03 -0800 Subject: Add parseOptionsFromArgs [API change, addition]. Exported by Text.Pandoc.App. --- src/Text/Pandoc/App.hs | 4 +++- src/Text/Pandoc/App/CommandLineOptions.hs | 7 ++++++- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 437af3257..6a071ad5a 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -19,6 +19,7 @@ module Text.Pandoc.App ( , Filter(..) , defaultOpts , parseOptions + , parseOptionsFromArgs , options , applyFilters ) where @@ -47,7 +48,8 @@ import Text.Pandoc import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths) import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts, IpynbOutput (..) ) -import Text.Pandoc.App.CommandLineOptions (parseOptions, options) +import Text.Pandoc.App.CommandLineOptions (parseOptions, parseOptionsFromArgs, + options) import Text.Pandoc.App.OutputSettings (OutputSettings (..), optToOutputSettings) import Text.Pandoc.BCP47 (Lang (..), parseBCP47) import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters) diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs index 307f28b5c..a4c510d97 100644 --- a/src/Text/Pandoc/App/CommandLineOptions.hs +++ b/src/Text/Pandoc/App/CommandLineOptions.hs @@ -17,6 +17,7 @@ Does a pandoc conversion based on command-line options. -} module Text.Pandoc.App.CommandLineOptions ( parseOptions + , parseOptionsFromArgs , options , engines , lookupHighlightStyle @@ -73,9 +74,13 @@ parseOptions :: [OptDescr (Opt -> IO Opt)] -> Opt -> IO Opt parseOptions options' defaults = do rawArgs <- map UTF8.decodeArg <$> getArgs prg <- getProgName + parseOptionsFromArgs options' defaults prg rawArgs +parseOptionsFromArgs + :: [OptDescr (Opt -> IO Opt)] -> Opt -> String -> [String] -> IO Opt +parseOptionsFromArgs options' defaults prg rawArgs = do let (actions, args, unrecognizedOpts, errors) = - getOpt' Permute options' rawArgs + getOpt' Permute options' (map UTF8.decodeArg rawArgs) let unknownOptionErrors = foldr (handleUnrecognizedOption . takeWhile (/= '=')) [] -- cgit v1.2.3 From 61b108d52789f20fb03c4f8a74719c1d53021c91 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Wed, 27 Jan 2021 15:17:39 +0100 Subject: Lua: add module "pandoc.path" The module allows to work with file paths in a convenient and platform-independent manner. Closes: #6001 Closes: #6565 --- doc/lua-filters.md | 172 ++++++++++++++++++++++++++++++++++++++++ pandoc.cabal | 1 + src/Text/Pandoc/Lua/Packages.hs | 2 + stack.yaml | 4 +- test/Tests/Lua/Module.hs | 2 + test/lua/module/pandoc-path.lua | 17 ++++ 6 files changed, 197 insertions(+), 1 deletion(-) create mode 100644 test/lua/module/pandoc-path.lua (limited to 'src') diff --git a/doc/lua-filters.md b/doc/lua-filters.md index 787365212..33c0d27bd 100644 --- a/doc/lua-filters.md +++ b/doc/lua-filters.md @@ -3376,6 +3376,178 @@ methods and convenience functions. `comp`: : Comparison function as described above. +# Module pandoc.path + +Module for file path manipulations. + +## Static Fields {#pandoc.path-fields} + +### separator {#pandoc.path.separator} + +The character that separates directories. + +### search_path_separator {#pandoc.path.search_path_separator} + +The character that is used to separate the entries in the `PATH` +environment variable. + +## Functions {#pandoc.path-functions} + +### directory (filepath) {#pandoc.path.directory} + +Get the directory name; move up one level. + +Parameters: + +filepath +: path (string) + +Returns: + +- The filepath up to the last directory separator. (string) + +### filename (filepath) {#pandoc.path.filename} + +Get the file name. + +Parameters: + +filepath +: path (string) + +Returns: + +- File name part of the input path. (string) + +### is_absolute (filepath) {#pandoc.path.is_absolute} + +Checks whether a path is absolute, i.e. not fixed to a root. + +Parameters: + +filepath +: path (string) + +Returns: + +- `true` iff `filepath` is an absolute path, `false` otherwise. + (boolean) + +### is_relative (filepath) {#pandoc.path.is_relative} + +Checks whether a path is relative or fixed to a root. + +Parameters: + +filepath +: path (string) + +Returns: + +- `true` iff `filepath` is a relative path, `false` otherwise. + (boolean) + +### join (filepaths) {#pandoc.path.join} + +Join path elements back together by the directory separator. + +Parameters: + +filepaths +: path components (list of strings) + +Returns: + +- The joined path. (string) + +### make_relative (path, root[, unsafe]) {#pandoc.path.make_relative} + +Contract a filename, based on a relative path. Note that the +resulting path will usually not introduce `..` paths, as the +presence of symlinks means `../b` may not reach `a/b` if it starts +from `a/c`. For a worked example see [this blog +post](http://neilmitchell.blogspot.co.uk/2015/10/filepaths-are-subtle-symlinks-are-hard.html). + +Set `unsafe` to a truthy value to a allow `..` in paths. + +Parameters: + +path +: path to be made relative (string) + +root +: root path (string) + +unsafe +: whether to allow `..` in the result. (boolean) + +Returns: + +- contracted filename (string) + +### normalize (filepath) {#pandoc.path.normalize} + +Normalizes a path. + +- `//` outside of the drive can be made blank +- `/` becomes the `path.separator` +- `./` -\> '' +- an empty path becomes `.` + +Parameters: + +filepath +: path (string) + +Returns: + +- The normalized path. (string) + +### split (filepath) {#pandoc.path.split} + +Splits a path by the directory separator. + +Parameters: + +filepath +: path (string) + +Returns: + +- List of all path components. (list of strings) + +### split_extension (filepath) {#pandoc.path.split_extension} + +Splits the last extension from a file path and returns the parts. The +extension, if present, includes the leading separator; if the path has +no extension, then the empty string is returned as the extension. + +Parameters: + +filepath +: path (string) + +Returns: + +- filepath without extension (string) + +- extension or empty string (string) + +### split_search_path (search_path) {#pandoc.path.split_search_path} + +Takes a string and splits it on the `search_path_separator` character. +Blank items are ignored on Windows, and converted to `.` on Posix. On +Windows path elements are stripped of quotes. + +Parameters: + +search_path +: platform-specific search path (string) + +Returns: + +- list of directories in search path (list of strings) + # Module pandoc.system Access to system information and functionality. diff --git a/pandoc.cabal b/pandoc.cabal index 2f8c255a8..4b808af84 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -429,6 +429,7 @@ library filepath >= 1.1 && < 1.5, haddock-library >= 1.8 && < 1.10, hslua >= 1.1 && < 1.4, + hslua-module-path >= 0.0.1 && < 0.1.0, hslua-module-system >= 0.2 && < 0.3, hslua-module-text >= 0.2.1 && < 0.4, http-client >= 0.4.30 && < 0.8, diff --git a/src/Text/Pandoc/Lua/Packages.hs b/src/Text/Pandoc/Lua/Packages.hs index 5949a1a7d..2f1c139db 100644 --- a/src/Text/Pandoc/Lua/Packages.hs +++ b/src/Text/Pandoc/Lua/Packages.hs @@ -17,6 +17,7 @@ import Foreign.Lua (NumResults) import Text.Pandoc.Lua.PandocLua (PandocLua, liftPandocLua, loadDefaultModule) import qualified Foreign.Lua as Lua +import qualified Foreign.Lua.Module.Path as Path import qualified Foreign.Lua.Module.Text as Text import qualified Text.Pandoc.Lua.Module.Pandoc as Pandoc import qualified Text.Pandoc.Lua.Module.MediaBag as MediaBag @@ -43,6 +44,7 @@ pandocPackageSearcher pkgName = case pkgName of "pandoc" -> pushWrappedHsFun Pandoc.pushModule "pandoc.mediabag" -> pushWrappedHsFun MediaBag.pushModule + "pandoc.path" -> pushWrappedHsFun Path.pushModule "pandoc.system" -> pushWrappedHsFun System.pushModule "pandoc.types" -> pushWrappedHsFun Types.pushModule "pandoc.utils" -> pushWrappedHsFun Utils.pushModule diff --git a/stack.yaml b/stack.yaml index dc6fa6517..da7acd8db 100644 --- a/stack.yaml +++ b/stack.yaml @@ -10,7 +10,9 @@ extra-deps: - haddock-library-1.9.0 - skylighting-0.10.2 - skylighting-core-0.10.2 -- hslua-1.1.2 +- hslua-1.3.0 +- hslua-module-path-0.0.1 +- hslua-module-text-0.3.0.1 - jira-wiki-markup-1.3.2 - HsYAML-aeson-0.2.0.0 - commonmark-0.1.1.3 diff --git a/test/Tests/Lua/Module.hs b/test/Tests/Lua/Module.hs index d88633cf8..8be445f65 100644 --- a/test/Tests/Lua/Module.hs +++ b/test/Tests/Lua/Module.hs @@ -25,6 +25,8 @@ tests = ("lua" </> "module" </> "pandoc-list.lua") , testPandocLua "pandoc.mediabag" ("lua" </> "module" </> "pandoc-mediabag.lua") + , testPandocLua "pandoc.path" + ("lua" </> "module" </> "pandoc-path.lua") , testPandocLua "pandoc.types" ("lua" </> "module" </> "pandoc-types.lua") , testPandocLua "pandoc.util" diff --git a/test/lua/module/pandoc-path.lua b/test/lua/module/pandoc-path.lua new file mode 100644 index 000000000..9a5a3f6c8 --- /dev/null +++ b/test/lua/module/pandoc-path.lua @@ -0,0 +1,17 @@ +local tasty = require 'tasty' +local path = require 'pandoc.path' + +local assert = tasty.assert +local test = tasty.test_case +local group = tasty.test_group + +return { + group 'path separator' { + test('is string', function () + assert.are_same(type(path.separator), 'string') + end), + test('is slash or backslash', function () + assert.is_truthy(path.separator:match '^[/\\]$') + end), + }, +} -- cgit v1.2.3 From b79aba6ea1ecb6a1f619128638369ef47f64d26d Mon Sep 17 00:00:00 2001 From: Nick Berendsen <58348935+Desbeers@users.noreply.github.com> Date: Wed, 3 Feb 2021 18:00:18 +0100 Subject: ePub writer: `belongs-to-collection` metadata (#7063) --- MANUAL.txt | 9 ++++ src/Text/Pandoc/Writers/EPUB.hs | 99 ++++++++++++++++++++++++----------------- 2 files changed, 67 insertions(+), 41 deletions(-) (limited to 'src') diff --git a/MANUAL.txt b/MANUAL.txt index 4a82703a3..e771d3da4 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -5931,6 +5931,15 @@ The following fields are recognized: `rights` ~ A string value. + +`belongs-to-collection` + ~ A string value. identifies the name of a collection to which + the EPUB Publication belongs. + +`group-position` + ~ The `group-position` field indicates the numeric position in which + the EPUB Publication belongs relative to other works belonging to + the same `belongs-to-collection` field. `cover-image` ~ A string value (path to cover image). diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 783f190f5..1f16f6772 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -74,26 +74,28 @@ data EPUBState = EPUBState { type E m = StateT EPUBState m data EPUBMetadata = EPUBMetadata{ - epubIdentifier :: [Identifier] - , epubTitle :: [Title] - , epubDate :: [Date] - , epubLanguage :: String - , epubCreator :: [Creator] - , epubContributor :: [Creator] - , epubSubject :: [String] - , epubDescription :: Maybe String - , epubType :: Maybe String - , epubFormat :: Maybe String - , epubPublisher :: Maybe String - , epubSource :: Maybe String - , epubRelation :: Maybe String - , epubCoverage :: Maybe String - , epubRights :: Maybe String - , epubCoverImage :: Maybe String - , epubStylesheets :: [FilePath] - , epubPageDirection :: Maybe ProgressionDirection - , epubIbooksFields :: [(String, String)] - , epubCalibreFields :: [(String, String)] + epubIdentifier :: [Identifier] + , epubTitle :: [Title] + , epubDate :: [Date] + , epubLanguage :: String + , epubCreator :: [Creator] + , epubContributor :: [Creator] + , epubSubject :: [String] + , epubDescription :: Maybe String + , epubType :: Maybe String + , epubFormat :: Maybe String + , epubPublisher :: Maybe String + , epubSource :: Maybe String + , epubRelation :: Maybe String + , epubCoverage :: Maybe String + , epubRights :: Maybe String + , epubBelongsToCollection :: Maybe String + , epubGroupPosition :: Maybe String + , epubCoverImage :: Maybe String + , epubStylesheets :: [FilePath] + , epubPageDirection :: Maybe ProgressionDirection + , epubIbooksFields :: [(String, String)] + , epubCalibreFields :: [(String, String)] } deriving Show data Date = Date{ @@ -235,6 +237,8 @@ addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md | name == "relation" = md { epubRelation = Just $ strContent e } | name == "coverage" = md { epubCoverage = Just $ strContent e } | name == "rights" = md { epubRights = Just $ strContent e } + | name == "belongs-to-collection" = md { epubBelongsToCollection = Just $ strContent e } + | name == "group-position" = md { epubGroupPosition = Just $ strContent e } | otherwise = md where getAttr n = lookupAttr (opfName n) attrs addMetadataFromXML e@(Element (QName "meta" _ _) attrs _ _) md = @@ -313,26 +317,28 @@ simpleList s meta = metadataFromMeta :: WriterOptions -> Meta -> EPUBMetadata metadataFromMeta opts meta = EPUBMetadata{ - epubIdentifier = identifiers - , epubTitle = titles - , epubDate = date - , epubLanguage = language - , epubCreator = creators - , epubContributor = contributors - , epubSubject = subjects - , epubDescription = description - , epubType = epubtype - , epubFormat = format - , epubPublisher = publisher - , epubSource = source - , epubRelation = relation - , epubCoverage = coverage - , epubRights = rights - , epubCoverImage = coverImage - , epubStylesheets = stylesheets - , epubPageDirection = pageDirection - , epubIbooksFields = ibooksFields - , epubCalibreFields = calibreFields + epubIdentifier = identifiers + , epubTitle = titles + , epubDate = date + , epubLanguage = language + , epubCreator = creators + , epubContributor = contributors + , epubSubject = subjects + , epubDescription = description + , epubType = epubtype + , epubFormat = format + , epubPublisher = publisher + , epubSource = source + , epubRelation = relation + , epubCoverage = coverage + , epubRights = rights + , epubBelongsToCollection = belongsToCollection + , epubGroupPosition = groupPosition + , epubCoverImage = coverImage + , epubStylesheets = stylesheets + , epubPageDirection = pageDirection + , epubIbooksFields = ibooksFields + , epubCalibreFields = calibreFields } where identifiers = getIdentifier meta titles = getTitle meta @@ -350,6 +356,8 @@ metadataFromMeta opts meta = EPUBMetadata{ relation = metaValueToString <$> lookupMeta "relation" meta coverage = metaValueToString <$> lookupMeta "coverage" meta rights = metaValueToString <$> lookupMeta "rights" meta + belongsToCollection = metaValueToString <$> lookupMeta "belongs-to-collection" meta + groupPosition = metaValueToString <$> lookupMeta "group-position" meta coverImage = (TS.unpack <$> lookupContext "epub-cover-image" (writerVariables opts)) @@ -931,7 +939,7 @@ metadataElement version md currentTime = ++ descriptionNodes ++ typeNodes ++ formatNodes ++ publisherNodes ++ sourceNodes ++ relationNodes ++ coverageNodes ++ rightsNodes ++ coverImageNodes - ++ modifiedNodes + ++ modifiedNodes ++ belongsToCollectionNodes withIds base f = concat . zipWith f (map (\x -> base ++ ('-' : show x)) ([1..] :: [Int])) identifierNodes = withIds "epub-id" toIdentifierNode $ @@ -970,6 +978,15 @@ metadataElement version md currentTime = $ epubCoverImage md modifiedNodes = [ unode "meta" ! [("property", "dcterms:modified")] $ showDateTimeISO8601 currentTime | version == EPUB3 ] + belongsToCollectionNodes = + maybe [] + (\belongsToCollection -> (unode "meta" ! [("property", "belongs-to-collection"), ("id", "epub-id-1")] $ belongsToCollection ) + : + [unode "meta" ! [("refines", "#epub-id-1"), ("property", "collection-type")] $ ("series" :: String) ]) + (epubBelongsToCollection md)++ + maybe [] + (\groupPosition -> [unode "meta" ! [("refines", "#epub-id-1"), ("property", "group-position")] $ groupPosition ]) + (epubGroupPosition md) dcTag n s = unode ("dc:" ++ n) s dcTag' n s = [dcTag n s] toIdentifierNode id' (Identifier txt scheme) -- cgit v1.2.3 From a5169f68b251e04b0a68a7d93a30bafcb3f85e78 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Thu, 4 Feb 2021 19:07:59 +0100 Subject: Lua filters: use same function names in Haskell and Lua --- .hlint.yaml | 1 + src/Text/Pandoc/Lua/Module/MediaBag.hs | 27 ++++++++++++++------------- src/Text/Pandoc/Lua/Module/Pandoc.hs | 31 ++++++++++++++++--------------- 3 files changed, 31 insertions(+), 28 deletions(-) (limited to 'src') diff --git a/.hlint.yaml b/.hlint.yaml index 6b74014d4..d5ebffd34 100644 --- a/.hlint.yaml +++ b/.hlint.yaml @@ -60,6 +60,7 @@ - Text.Pandoc.Citeproc - Text.Pandoc.Extensions - Text.Pandoc.Lua.Marshaling.Version + - Text.Pandoc.Lua.Module.Pandoc - Text.Pandoc.Lua.Module.Utils - Text.Pandoc.Readers.Odt.ContentReader - Text.Pandoc.Readers.Odt.Namespaces diff --git a/src/Text/Pandoc/Lua/Module/MediaBag.hs b/src/Text/Pandoc/Lua/Module/MediaBag.hs index 715e53885..78b699176 100644 --- a/src/Text/Pandoc/Lua/Module/MediaBag.hs +++ b/src/Text/Pandoc/Lua/Module/MediaBag.hs @@ -13,6 +13,7 @@ module Text.Pandoc.Lua.Module.MediaBag ( pushModule ) where +import Prelude hiding (lookup) import Control.Monad (zipWithM_) import Foreign.Lua (Lua, NumResults, Optional) import Text.Pandoc.Class.CommonState (CommonState (..)) @@ -36,10 +37,10 @@ pushModule = do liftPandocLua Lua.newtable addFunction "delete" delete addFunction "empty" empty - addFunction "insert" insertMediaFn + addFunction "insert" insert addFunction "items" items - addFunction "lookup" lookupMediaFn - addFunction "list" mediaDirectoryFn + addFunction "lookup" lookup + addFunction "list" list addFunction "fetch" fetch return 1 @@ -53,11 +54,11 @@ empty :: PandocLua NumResults empty = 0 <$ modifyCommonState (\st -> st { stMediaBag = mempty }) -- | Insert a new item into the media bag. -insertMediaFn :: FilePath - -> Optional MimeType - -> BL.ByteString - -> PandocLua NumResults -insertMediaFn fp optionalMime contents = do +insert :: FilePath + -> Optional MimeType + -> BL.ByteString + -> PandocLua NumResults +insert fp optionalMime contents = do mb <- getMediaBag setMediaBag $ MB.insertMedia fp (Lua.fromOptional optionalMime) contents mb return (Lua.NumResults 0) @@ -66,9 +67,9 @@ insertMediaFn fp optionalMime contents = do items :: PandocLua NumResults items = getMediaBag >>= liftPandocLua . pushIterator -lookupMediaFn :: FilePath - -> PandocLua NumResults -lookupMediaFn fp = do +lookup :: FilePath + -> PandocLua NumResults +lookup fp = do res <- MB.lookupMedia fp <$> getMediaBag liftPandocLua $ case res of Nothing -> 1 <$ Lua.pushnil @@ -77,8 +78,8 @@ lookupMediaFn fp = do Lua.push contents return 2 -mediaDirectoryFn :: PandocLua NumResults -mediaDirectoryFn = do +list :: PandocLua NumResults +list = do dirContents <- MB.mediaDirectory <$> getMediaBag liftPandocLua $ do Lua.newtable diff --git a/src/Text/Pandoc/Lua/Module/Pandoc.hs b/src/Text/Pandoc/Lua/Module/Pandoc.hs index a8afecd2e..8d30f9a0c 100644 --- a/src/Text/Pandoc/Lua/Module/Pandoc.hs +++ b/src/Text/Pandoc/Lua/Module/Pandoc.hs @@ -14,6 +14,7 @@ module Text.Pandoc.Lua.Module.Pandoc ( pushModule ) where +import Prelude hiding (read) import Control.Monad (when) import Control.Monad.Except (throwError) import Data.Default (Default (..)) @@ -43,10 +44,10 @@ import Text.Pandoc.Error pushModule :: PandocLua NumResults pushModule = do loadDefaultModule "pandoc" - addFunction "read" readDoc - addFunction "pipe" pipeFn - addFunction "walk_block" walkBlock - addFunction "walk_inline" walkInline + addFunction "read" read + addFunction "pipe" pipe + addFunction "walk_block" walk_block + addFunction "walk_inline" walk_inline return 1 walkElement :: (Walkable (SingletonsList Inline) a, @@ -54,14 +55,14 @@ walkElement :: (Walkable (SingletonsList Inline) a, => a -> LuaFilter -> PandocLua a walkElement x f = liftPandocLua $ walkInlines f x >>= walkBlocks f -walkInline :: Inline -> LuaFilter -> PandocLua Inline -walkInline = walkElement +walk_inline :: Inline -> LuaFilter -> PandocLua Inline +walk_inline = walkElement -walkBlock :: Block -> LuaFilter -> PandocLua Block -walkBlock = walkElement +walk_block :: Block -> LuaFilter -> PandocLua Block +walk_block = walkElement -readDoc :: T.Text -> Optional T.Text -> PandocLua NumResults -readDoc content formatSpecOrNil = liftPandocLua $ do +read :: T.Text -> Optional T.Text -> PandocLua NumResults +read content formatSpecOrNil = liftPandocLua $ do let formatSpec = fromMaybe "markdown" (Lua.fromOptional formatSpecOrNil) res <- Lua.liftIO . runIO $ getReader formatSpec >>= \(rdr,es) -> @@ -79,11 +80,11 @@ readDoc content formatSpecOrNil = liftPandocLua $ do Left e -> Lua.raiseError $ show e -- | Pipes input through a command. -pipeFn :: String - -> [String] - -> BL.ByteString - -> PandocLua NumResults -pipeFn command args input = liftPandocLua $ do +pipe :: String -- ^ path to executable + -> [String] -- ^ list of arguments + -> BL.ByteString -- ^ input passed to process via stdin + -> PandocLua NumResults +pipe command args input = liftPandocLua $ do (ec, output) <- Lua.liftIO $ pipeProcess Nothing command args input case ec of ExitSuccess -> 1 <$ Lua.push output -- cgit v1.2.3 From 8e9131db4edfda6deafdf94939b907e4dce3cbaa Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 6 Feb 2021 21:52:12 -0800 Subject: Markdown reader: improved handling of mmd link attributes in references. Previously they only worked for links that had titles. Closes #7080. --- src/Text/Pandoc/Readers/Markdown.hs | 2 ++ test/command/7080.md | 8 ++++++++ 2 files changed, 10 insertions(+) create mode 100644 test/command/7080.md (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 5c3a21bb7..e46553dd8 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -334,6 +334,8 @@ referenceKey = try $ do skipMany spaceChar notFollowedBy' referenceTitle notFollowedBy' $ guardEnabled Ext_link_attributes >> attributes + notFollowedBy' $ guardEnabled Ext_mmd_link_attributes >> + try (spnl <* keyValAttr) notFollowedBy' (() <$ reference) many1Char $ notFollowedBy space >> litChar let betweenAngles = try $ char '<' >> manyTillChar litChar (char '>') diff --git a/test/command/7080.md b/test/command/7080.md new file mode 100644 index 000000000..0727c1083 --- /dev/null +++ b/test/command/7080.md @@ -0,0 +1,8 @@ +``` +% pandoc -f markdown_mmd -t native +![][image] + +[image]: image.png width=100px height=150px +^D +[Para [Image ("",[],[("width","100px"),("height","150px")]) [] ("image.png","")]] +``` -- cgit v1.2.3 From d202f7eb77242bd2d9395b950b74fc9b22f9ae13 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Sun, 7 Feb 2021 19:02:35 +0100 Subject: Avoid unnecessary use of NoImplicitPrelude pragma (#7089) --- app/pandoc.hs | 2 -- benchmark/benchmark-pandoc.hs | 2 -- benchmark/weigh-pandoc.hs | 2 -- pandoc.cabal | 2 +- src/Text/Pandoc/UTF8.hs | 1 - test/Tests/Command.hs | 2 -- test/Tests/Helpers.hs | 2 -- test/Tests/Lua.hs | 2 -- test/Tests/Old.hs | 2 -- test/Tests/Readers/Creole.hs | 2 -- test/Tests/Readers/Docx.hs | 2 -- test/Tests/Readers/DokuWiki.hs | 2 -- test/Tests/Readers/EPUB.hs | 2 -- test/Tests/Readers/FB2.hs | 2 -- test/Tests/Readers/HTML.hs | 2 -- test/Tests/Readers/JATS.hs | 2 -- test/Tests/Readers/Jira.hs | 1 - test/Tests/Readers/LaTeX.hs | 2 -- test/Tests/Readers/Man.hs | 1 - test/Tests/Readers/Markdown.hs | 2 -- test/Tests/Readers/Muse.hs | 2 -- test/Tests/Readers/Odt.hs | 2 -- test/Tests/Readers/Org/Block.hs | 2 -- test/Tests/Readers/Org/Block/CodeBlock.hs | 2 -- test/Tests/Readers/Org/Block/Figure.hs | 2 -- test/Tests/Readers/Org/Block/Header.hs | 2 -- test/Tests/Readers/Org/Block/List.hs | 2 -- test/Tests/Readers/Org/Block/Table.hs | 2 -- test/Tests/Readers/Org/Directive.hs | 2 -- test/Tests/Readers/Org/Inline.hs | 2 -- test/Tests/Readers/Org/Inline/Citation.hs | 2 -- test/Tests/Readers/Org/Inline/Note.hs | 2 -- test/Tests/Readers/Org/Inline/Smart.hs | 2 -- test/Tests/Readers/Org/Meta.hs | 2 -- test/Tests/Readers/Org/Shared.hs | 2 -- test/Tests/Readers/RST.hs | 2 -- test/Tests/Readers/Txt2Tags.hs | 2 -- test/Tests/Shared.hs | 2 -- test/Tests/Writers/AnnotatedTable.hs | 1 - test/Tests/Writers/AsciiDoc.hs | 2 -- test/Tests/Writers/ConTeXt.hs | 2 -- test/Tests/Writers/Docbook.hs | 2 -- test/Tests/Writers/Docx.hs | 2 -- test/Tests/Writers/FB2.hs | 2 -- test/Tests/Writers/HTML.hs | 2 -- test/Tests/Writers/JATS.hs | 2 -- test/Tests/Writers/LaTeX.hs | 2 -- test/Tests/Writers/Markdown.hs | 2 -- test/Tests/Writers/Ms.hs | 2 -- test/Tests/Writers/Muse.hs | 1 - test/Tests/Writers/Native.hs | 2 -- test/Tests/Writers/OOXML.hs | 2 -- test/Tests/Writers/Org.hs | 2 -- test/Tests/Writers/Plain.hs | 2 -- test/Tests/Writers/Powerpoint.hs | 2 -- test/Tests/Writers/RST.hs | 2 -- test/Tests/Writers/TEI.hs | 2 -- test/test-pandoc.hs | 2 -- trypandoc/trypandoc.hs | 2 -- 59 files changed, 1 insertion(+), 112 deletions(-) (limited to 'src') diff --git a/app/pandoc.hs b/app/pandoc.hs index 162570f18..0e30f45d1 100644 --- a/app/pandoc.hs +++ b/app/pandoc.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {- | Module : Main Copyright : Copyright (C) 2006-2021 John MacFarlane @@ -12,7 +11,6 @@ Parses command-line options and calls the appropriate readers and writers. -} module Main where -import Prelude import qualified Control.Exception as E import Text.Pandoc.App (convertWithOpts, defaultOpts, options, parseOptions) import Text.Pandoc.Error (handleError) diff --git a/benchmark/benchmark-pandoc.hs b/benchmark/benchmark-pandoc.hs index 045573273..496732693 100644 --- a/benchmark/benchmark-pandoc.hs +++ b/benchmark/benchmark-pandoc.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- Copyright (C) 2012-2021 John MacFarlane <jgm@berkeley.edu> @@ -17,7 +16,6 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} -import Prelude import Text.Pandoc import Text.Pandoc.MIME import Control.Monad.Except (throwError, liftIO) diff --git a/benchmark/weigh-pandoc.hs b/benchmark/weigh-pandoc.hs index ad4c83ad7..b77fa9ee9 100644 --- a/benchmark/weigh-pandoc.hs +++ b/benchmark/weigh-pandoc.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Main @@ -11,7 +10,6 @@ Benchmarks to determine resource use of readers and writers. -} -import Prelude import Weigh import Text.Pandoc import Data.Text (Text, unpack) diff --git a/pandoc.cabal b/pandoc.cabal index 7696e22b3..72e7c2da5 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -400,7 +400,6 @@ common common-options default-language: Haskell2010 build-depends: base >= 4.9 && < 5, text >= 1.1.1.0 && < 1.3 - other-extensions: NoImplicitPrelude ghc-options: -Wall -fno-warn-unused-do-bind -Wincomplete-record-updates -Wnoncanonical-monad-instances @@ -409,6 +408,7 @@ common common-options hs-source-dirs: prelude other-modules: Prelude build-depends: base-compat >= 0.9 + other-extensions: NoImplicitPrelude if os(windows) cpp-options: -D_WINDOWS diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index b583dbbdb..567f5abe5 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.UTF8 diff --git a/test/Tests/Command.hs b/test/Tests/Command.hs index 8118e9759..07d825f73 100644 --- a/test/Tests/Command.hs +++ b/test/Tests/Command.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE TupleSections #-} {- | Module : Tests.Command @@ -14,7 +13,6 @@ Run commands, and test results, defined in markdown files. module Tests.Command (runTest, tests) where -import Prelude import Data.Algorithm.Diff import System.Environment.Executable (getExecutablePath) import qualified Data.ByteString as BS diff --git a/test/Tests/Helpers.hs b/test/Tests/Helpers.hs index 813c80adb..31e727a66 100644 --- a/test/Tests/Helpers.hs +++ b/test/Tests/Helpers.hs @@ -1,5 +1,4 @@ {-# LANGUAGE CPP #-} -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleInstances #-} {- | Module : Tests.Helpers @@ -22,7 +21,6 @@ module Tests.Helpers ( test ) where -import Prelude import Data.Algorithm.Diff import qualified Data.Map as M import Data.Text (Text, unpack) diff --git a/test/Tests/Lua.hs b/test/Tests/Lua.hs index 1dfbbd053..31c011900 100644 --- a/test/Tests/Lua.hs +++ b/test/Tests/Lua.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {- | @@ -14,7 +13,6 @@ Unit and integration tests for pandoc's Lua subsystem. -} module Tests.Lua ( runLuaTest, tests ) where -import Prelude import Control.Monad (when) import System.FilePath ((</>)) import Test.Tasty (TestTree, localOption) diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs index a031700a6..17ece49fd 100644 --- a/test/Tests/Old.hs +++ b/test/Tests/Old.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE TupleSections #-} {- | Module : Tests.Old @@ -13,7 +12,6 @@ -} module Tests.Old (tests) where -import Prelude import Data.Algorithm.Diff import System.Exit import System.FilePath ((<.>), (</>)) diff --git a/test/Tests/Readers/Creole.hs b/test/Tests/Readers/Creole.hs index 1fc0e62d7..3320b78e8 100644 --- a/test/Tests/Readers/Creole.hs +++ b/test/Tests/Readers/Creole.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Creole @@ -14,7 +13,6 @@ Tests for the creole reader. -} module Tests.Readers.Creole (tests) where -import Prelude import Data.Text (Text) import qualified Data.Text as T import Test.Tasty diff --git a/test/Tests/Readers/Docx.hs b/test/Tests/Readers/Docx.hs index 12007f502..263e04173 100644 --- a/test/Tests/Readers/Docx.hs +++ b/test/Tests/Readers/Docx.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Docx @@ -13,7 +12,6 @@ Tests for the word docx reader. -} module Tests.Readers.Docx (tests) where -import Prelude import Codec.Archive.Zip import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as B diff --git a/test/Tests/Readers/DokuWiki.hs b/test/Tests/Readers/DokuWiki.hs index d5f0c45a9..84ba86d46 100644 --- a/test/Tests/Readers/DokuWiki.hs +++ b/test/Tests/Readers/DokuWiki.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {- | @@ -14,7 +13,6 @@ Tests for DokuWiki reader. -} module Tests.Readers.DokuWiki (tests) where -import Prelude import Data.Text (Text) import qualified Data.Text as T import Test.Tasty diff --git a/test/Tests/Readers/EPUB.hs b/test/Tests/Readers/EPUB.hs index 3c75dd08d..13e239911 100644 --- a/test/Tests/Readers/EPUB.hs +++ b/test/Tests/Readers/EPUB.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {- | Module : Tests.Readers.EPUB Copyright : © 2006-2021 John MacFarlane @@ -12,7 +11,6 @@ Tests for the EPUB mediabag. -} module Tests.Readers.EPUB (tests) where -import Prelude import qualified Data.ByteString.Lazy as BL import qualified Data.Text as T import Test.Tasty diff --git a/test/Tests/Readers/FB2.hs b/test/Tests/Readers/FB2.hs index fbb2e2150..42054a235 100644 --- a/test/Tests/Readers/FB2.hs +++ b/test/Tests/Readers/FB2.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {- | Module : Tests.Readers.FB2 Copyright : © 2018-2020 Alexander Krotov @@ -12,7 +11,6 @@ Tests for the EPUB mediabag. -} module Tests.Readers.FB2 (tests) where -import Prelude import Test.Tasty import Tests.Helpers import Test.Tasty.Golden (goldenVsString) diff --git a/test/Tests/Readers/HTML.hs b/test/Tests/Readers/HTML.hs index 578c76860..f23af2cb1 100644 --- a/test/Tests/Readers/HTML.hs +++ b/test/Tests/Readers/HTML.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.HTML @@ -13,7 +12,6 @@ Tests for the HTML reader. -} module Tests.Readers.HTML (tests) where -import Prelude import Data.Text (Text) import qualified Data.Text as T import Test.Tasty diff --git a/test/Tests/Readers/JATS.hs b/test/Tests/Readers/JATS.hs index 3c61f602f..525499c86 100644 --- a/test/Tests/Readers/JATS.hs +++ b/test/Tests/Readers/JATS.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.JATS @@ -13,7 +12,6 @@ Tests for the JATS reader. -} module Tests.Readers.JATS (tests) where -import Prelude import Data.Text (Text) import Test.Tasty import Tests.Helpers diff --git a/test/Tests/Readers/Jira.hs b/test/Tests/Readers/Jira.hs index bf78fe1fe..e170d2aaa 100644 --- a/test/Tests/Readers/Jira.hs +++ b/test/Tests/Readers/Jira.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {- | diff --git a/test/Tests/Readers/LaTeX.hs b/test/Tests/Readers/LaTeX.hs index 2a52ffd18..77104c853 100644 --- a/test/Tests/Readers/LaTeX.hs +++ b/test/Tests/Readers/LaTeX.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.LaTeX @@ -13,7 +12,6 @@ Tests for the LaTeX reader. -} module Tests.Readers.LaTeX (tests) where -import Prelude import Data.Text (Text) import qualified Data.Text as T import qualified Text.Pandoc.UTF8 as UTF8 diff --git a/test/Tests/Readers/Man.hs b/test/Tests/Readers/Man.hs index 4f3ab5a28..d36151d58 100644 --- a/test/Tests/Readers/Man.hs +++ b/test/Tests/Readers/Man.hs @@ -13,7 +13,6 @@ Tests for the Man reader. -} module Tests.Readers.Man (tests) where -import Prelude import Data.Text (Text) import Test.Tasty import Tests.Helpers diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs index 18f909583..0930deae6 100644 --- a/test/Tests/Readers/Markdown.hs +++ b/test/Tests/Readers/Markdown.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Markdown @@ -13,7 +12,6 @@ Tests for the Markdown reader. -} module Tests.Readers.Markdown (tests) where -import Prelude import Data.Text (Text, unpack) import qualified Data.Text as T import Test.Tasty diff --git a/test/Tests/Readers/Muse.hs b/test/Tests/Readers/Muse.hs index 4ec1631e0..68bdc87b4 100644 --- a/test/Tests/Readers/Muse.hs +++ b/test/Tests/Readers/Muse.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Muse @@ -13,7 +12,6 @@ Tests for the Muse reader. -} module Tests.Readers.Muse (tests) where -import Prelude import Data.List (intersperse) import Data.Monoid (Any (..)) import Data.Text (Text) diff --git a/test/Tests/Readers/Odt.hs b/test/Tests/Readers/Odt.hs index f5e427ba2..9b5ec6b9e 100644 --- a/test/Tests/Readers/Odt.hs +++ b/test/Tests/Readers/Odt.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Odt @@ -14,7 +13,6 @@ Tests for the ODT reader. -} module Tests.Readers.Odt (tests) where -import Prelude import Control.Monad (liftM) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as B diff --git a/test/Tests/Readers/Org/Block.hs b/test/Tests/Readers/Org/Block.hs index 2ce07c4bb..779563794 100644 --- a/test/Tests/Readers/Org/Block.hs +++ b/test/Tests/Readers/Org/Block.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Block @@ -13,7 +12,6 @@ Tests parsing of org blocks. -} module Tests.Readers.Org.Block (tests) where -import Prelude import Test.Tasty (TestTree, testGroup) import Tests.Helpers ((=?>)) import Tests.Readers.Org.Shared ((=:), spcSep) diff --git a/test/Tests/Readers/Org/Block/CodeBlock.hs b/test/Tests/Readers/Org/Block/CodeBlock.hs index d40c3bc1d..6b83ec6a9 100644 --- a/test/Tests/Readers/Org/Block/CodeBlock.hs +++ b/test/Tests/Readers/Org/Block/CodeBlock.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Block.CodeBlock @@ -13,7 +12,6 @@ Test parsing of org code blocks. -} module Tests.Readers.Org.Block.CodeBlock (tests) where -import Prelude import Test.Tasty (TestTree) import Tests.Helpers ((=?>)) import Tests.Readers.Org.Shared ((=:), spcSep) diff --git a/test/Tests/Readers/Org/Block/Figure.hs b/test/Tests/Readers/Org/Block/Figure.hs index 8822f5b03..eb5be1c2b 100644 --- a/test/Tests/Readers/Org/Block/Figure.hs +++ b/test/Tests/Readers/Org/Block/Figure.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Block.Figure @@ -13,7 +12,6 @@ Test parsing of org figures. -} module Tests.Readers.Org.Block.Figure (tests) where -import Prelude import Test.Tasty (TestTree) import Tests.Helpers ((=?>)) import Tests.Readers.Org.Shared ((=:)) diff --git a/test/Tests/Readers/Org/Block/Header.hs b/test/Tests/Readers/Org/Block/Header.hs index 887055451..1344ad79b 100644 --- a/test/Tests/Readers/Org/Block/Header.hs +++ b/test/Tests/Readers/Org/Block/Header.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Block.Header @@ -13,7 +12,6 @@ Test parsing of org header blocks. -} module Tests.Readers.Org.Block.Header (tests) where -import Prelude import Test.Tasty (TestTree, testGroup) import Tests.Helpers ((=?>)) import Tests.Readers.Org.Shared ((=:), spcSep, tagSpan) diff --git a/test/Tests/Readers/Org/Block/List.hs b/test/Tests/Readers/Org/Block/List.hs index ac03c583b..9686b5148 100644 --- a/test/Tests/Readers/Org/Block/List.hs +++ b/test/Tests/Readers/Org/Block/List.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Block.Header @@ -13,7 +12,6 @@ Test parsing of org lists. -} module Tests.Readers.Org.Block.List (tests) where -import Prelude import Test.Tasty (TestTree) import Tests.Helpers ((=?>)) import Tests.Readers.Org.Shared ((=:), spcSep) diff --git a/test/Tests/Readers/Org/Block/Table.hs b/test/Tests/Readers/Org/Block/Table.hs index cb38fcc12..ce18e6a5b 100644 --- a/test/Tests/Readers/Org/Block/Table.hs +++ b/test/Tests/Readers/Org/Block/Table.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Block.Table @@ -13,7 +12,6 @@ Test parsing of org tables. -} module Tests.Readers.Org.Block.Table (tests) where -import Prelude import Test.Tasty (TestTree) import Tests.Helpers ((=?>)) import Tests.Readers.Org.Shared ((=:), spcSep) diff --git a/test/Tests/Readers/Org/Directive.hs b/test/Tests/Readers/Org/Directive.hs index 00cb9762b..85d1bc088 100644 --- a/test/Tests/Readers/Org/Directive.hs +++ b/test/Tests/Readers/Org/Directive.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Directive @@ -13,7 +12,6 @@ Tests parsing of org directives (like @#+OPTIONS@). -} module Tests.Readers.Org.Directive (tests) where -import Prelude import Data.Time (UTCTime (UTCTime), secondsToDiffTime) import Data.Time.Calendar (Day (ModifiedJulianDay)) import Test.Tasty (TestTree, testGroup) diff --git a/test/Tests/Readers/Org/Inline.hs b/test/Tests/Readers/Org/Inline.hs index 13e9fef21..111d74879 100644 --- a/test/Tests/Readers/Org/Inline.hs +++ b/test/Tests/Readers/Org/Inline.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Inline @@ -13,7 +12,6 @@ Tests parsing of org inlines. -} module Tests.Readers.Org.Inline (tests) where -import Prelude import Data.List (intersperse) import Test.Tasty (TestTree, testGroup) import Tests.Helpers ((=?>)) diff --git a/test/Tests/Readers/Org/Inline/Citation.hs b/test/Tests/Readers/Org/Inline/Citation.hs index 87bb3ca75..5538f1ec8 100644 --- a/test/Tests/Readers/Org/Inline/Citation.hs +++ b/test/Tests/Readers/Org/Inline/Citation.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Inline.Citation @@ -13,7 +12,6 @@ Test parsing of citations in org input. -} module Tests.Readers.Org.Inline.Citation (tests) where -import Prelude import Test.Tasty (TestTree, testGroup) import Tests.Helpers ((=?>)) import Tests.Readers.Org.Shared ((=:)) diff --git a/test/Tests/Readers/Org/Inline/Note.hs b/test/Tests/Readers/Org/Inline/Note.hs index 20157d2ae..c37133d54 100644 --- a/test/Tests/Readers/Org/Inline/Note.hs +++ b/test/Tests/Readers/Org/Inline/Note.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Inline.Note @@ -13,7 +12,6 @@ Test parsing of footnotes in org input. -} module Tests.Readers.Org.Inline.Note (tests) where -import Prelude import Test.Tasty (TestTree) import Tests.Helpers ((=?>)) import Tests.Readers.Org.Shared ((=:)) diff --git a/test/Tests/Readers/Org/Inline/Smart.hs b/test/Tests/Readers/Org/Inline/Smart.hs index 7fde380af..db96eb2ca 100644 --- a/test/Tests/Readers/Org/Inline/Smart.hs +++ b/test/Tests/Readers/Org/Inline/Smart.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Inline.Smart @@ -13,7 +12,6 @@ Test smart parsing of quotes, apostrophe, etc. -} module Tests.Readers.Org.Inline.Smart (tests) where -import Prelude import Data.Text (Text) import Test.Tasty (TestTree) import Tests.Helpers ((=?>), purely, test) diff --git a/test/Tests/Readers/Org/Meta.hs b/test/Tests/Readers/Org/Meta.hs index 3c50f891b..6363d84b0 100644 --- a/test/Tests/Readers/Org/Meta.hs +++ b/test/Tests/Readers/Org/Meta.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Meta @@ -13,7 +12,6 @@ Tests parsing of org meta data (mostly lines starting with @#+@). -} module Tests.Readers.Org.Meta (tests) where -import Prelude import Test.Tasty (TestTree, testGroup) import Tests.Helpers ((=?>)) import Tests.Readers.Org.Shared ((=:), spcSep) diff --git a/test/Tests/Readers/Org/Shared.hs b/test/Tests/Readers/Org/Shared.hs index 4d0848575..c584eff19 100644 --- a/test/Tests/Readers/Org/Shared.hs +++ b/test/Tests/Readers/Org/Shared.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Org.Shared @@ -18,7 +17,6 @@ module Tests.Readers.Org.Shared , tagSpan ) where -import Prelude import Data.List (intersperse) import Data.Text (Text) import Tests.Helpers (ToString, purely, test) diff --git a/test/Tests/Readers/RST.hs b/test/Tests/Readers/RST.hs index 68241b7f9..a12b59fc2 100644 --- a/test/Tests/Readers/RST.hs +++ b/test/Tests/Readers/RST.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {- | @@ -14,7 +13,6 @@ Tests for the RST reader. -} module Tests.Readers.RST (tests) where -import Prelude import Data.Text (Text) import qualified Data.Text as T import Test.Tasty diff --git a/test/Tests/Readers/Txt2Tags.hs b/test/Tests/Readers/Txt2Tags.hs index 62f336690..013f29d68 100644 --- a/test/Tests/Readers/Txt2Tags.hs +++ b/test/Tests/Readers/Txt2Tags.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Readers.Txt2Tags @@ -14,7 +13,6 @@ Tests for the Txt2Tags reader. -} module Tests.Readers.Txt2Tags (tests) where -import Prelude import Data.List (intersperse) import Data.Text (Text) import qualified Data.Text as T diff --git a/test/Tests/Shared.hs b/test/Tests/Shared.hs index 72a59fec0..e415ea153 100644 --- a/test/Tests/Shared.hs +++ b/test/Tests/Shared.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Shared @@ -13,7 +12,6 @@ Tests for functions used in many parts of the library. -} module Tests.Shared (tests) where -import Prelude import System.FilePath.Posix (joinPath) import Test.Tasty import Test.Tasty.HUnit (assertBool, testCase, (@?=)) diff --git a/test/Tests/Writers/AnnotatedTable.hs b/test/Tests/Writers/AnnotatedTable.hs index 7e16cf8e0..53cca80a6 100644 --- a/test/Tests/Writers/AnnotatedTable.hs +++ b/test/Tests/Writers/AnnotatedTable.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Tests.Writers.AnnotatedTable diff --git a/test/Tests/Writers/AsciiDoc.hs b/test/Tests/Writers/AsciiDoc.hs index 75f6e5e97..04655635f 100644 --- a/test/Tests/Writers/AsciiDoc.hs +++ b/test/Tests/Writers/AsciiDoc.hs @@ -1,8 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.AsciiDoc (tests) where -import Prelude import Data.Text (unpack) import Test.Tasty import Tests.Helpers diff --git a/test/Tests/Writers/ConTeXt.hs b/test/Tests/Writers/ConTeXt.hs index c747e5d2f..5c1c98d4e 100644 --- a/test/Tests/Writers/ConTeXt.hs +++ b/test/Tests/Writers/ConTeXt.hs @@ -1,8 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.ConTeXt (tests) where -import Prelude import Data.Text (unpack, pack) import Test.Tasty import Test.Tasty.QuickCheck diff --git a/test/Tests/Writers/Docbook.hs b/test/Tests/Writers/Docbook.hs index 621c1280b..842aed7ae 100644 --- a/test/Tests/Writers/Docbook.hs +++ b/test/Tests/Writers/Docbook.hs @@ -1,8 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.Docbook (tests) where -import Prelude import Data.Text (unpack) import Test.Tasty import Tests.Helpers diff --git a/test/Tests/Writers/Docx.hs b/test/Tests/Writers/Docx.hs index 66a5c3d36..2e0f1e3fb 100644 --- a/test/Tests/Writers/Docx.hs +++ b/test/Tests/Writers/Docx.hs @@ -1,7 +1,5 @@ -{-# LANGUAGE NoImplicitPrelude #-} module Tests.Writers.Docx (tests) where -import Prelude import Text.Pandoc import Test.Tasty import Tests.Writers.OOXML diff --git a/test/Tests/Writers/FB2.hs b/test/Tests/Writers/FB2.hs index 7699c58e9..2e10636fa 100644 --- a/test/Tests/Writers/FB2.hs +++ b/test/Tests/Writers/FB2.hs @@ -1,8 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.FB2 (tests) where -import Prelude import Test.Tasty import Tests.Helpers import Text.Pandoc diff --git a/test/Tests/Writers/HTML.hs b/test/Tests/Writers/HTML.hs index 6ff0a6e1d..328801e31 100644 --- a/test/Tests/Writers/HTML.hs +++ b/test/Tests/Writers/HTML.hs @@ -1,8 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.HTML (tests) where -import Prelude import Data.Text (unpack) import Test.Tasty import Tests.Helpers diff --git a/test/Tests/Writers/JATS.hs b/test/Tests/Writers/JATS.hs index 7d98f979b..2f501c890 100644 --- a/test/Tests/Writers/JATS.hs +++ b/test/Tests/Writers/JATS.hs @@ -1,8 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.JATS (tests) where -import Prelude import Data.Text (unpack) import Test.Tasty import Tests.Helpers diff --git a/test/Tests/Writers/LaTeX.hs b/test/Tests/Writers/LaTeX.hs index 44e23d48e..ae5879099 100644 --- a/test/Tests/Writers/LaTeX.hs +++ b/test/Tests/Writers/LaTeX.hs @@ -1,8 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.LaTeX (tests) where -import Prelude import Data.Text (unpack) import Test.Tasty import Tests.Helpers diff --git a/test/Tests/Writers/Markdown.hs b/test/Tests/Writers/Markdown.hs index 4b819de24..d4f927ebe 100644 --- a/test/Tests/Writers/Markdown.hs +++ b/test/Tests/Writers/Markdown.hs @@ -1,9 +1,7 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {-# OPTIONS_GHC -fno-warn-name-shadowing #-} module Tests.Writers.Markdown (tests) where -import Prelude import Data.Text (unpack) import Test.Tasty import Tests.Helpers diff --git a/test/Tests/Writers/Ms.hs b/test/Tests/Writers/Ms.hs index d73603314..ad6849633 100644 --- a/test/Tests/Writers/Ms.hs +++ b/test/Tests/Writers/Ms.hs @@ -1,8 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.Ms (tests) where -import Prelude import Test.Tasty import Tests.Helpers import Text.Pandoc diff --git a/test/Tests/Writers/Muse.hs b/test/Tests/Writers/Muse.hs index d0df0799f..5bddca3af 100644 --- a/test/Tests/Writers/Muse.hs +++ b/test/Tests/Writers/Muse.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.Muse (tests) where diff --git a/test/Tests/Writers/Native.hs b/test/Tests/Writers/Native.hs index 905e83b1e..d7771ca19 100644 --- a/test/Tests/Writers/Native.hs +++ b/test/Tests/Writers/Native.hs @@ -1,7 +1,5 @@ -{-# LANGUAGE NoImplicitPrelude #-} module Tests.Writers.Native (tests) where -import Prelude import Data.Text (unpack) import Test.Tasty import Test.Tasty.QuickCheck diff --git a/test/Tests/Writers/OOXML.hs b/test/Tests/Writers/OOXML.hs index 628ea9409..376f02c55 100644 --- a/test/Tests/Writers/OOXML.hs +++ b/test/Tests/Writers/OOXML.hs @@ -1,10 +1,8 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.OOXML (ooxmlTest) where -import Prelude import Text.Pandoc import Test.Tasty import Test.Tasty.Golden.Advanced diff --git a/test/Tests/Writers/Org.hs b/test/Tests/Writers/Org.hs index c99f7344d..9cbe360da 100644 --- a/test/Tests/Writers/Org.hs +++ b/test/Tests/Writers/Org.hs @@ -1,8 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.Org (tests) where -import Prelude import Test.Tasty import Tests.Helpers import Text.Pandoc diff --git a/test/Tests/Writers/Plain.hs b/test/Tests/Writers/Plain.hs index b8d1f6693..17edc9dbd 100644 --- a/test/Tests/Writers/Plain.hs +++ b/test/Tests/Writers/Plain.hs @@ -1,8 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.Plain (tests) where -import Prelude import Test.Tasty import Tests.Helpers import Text.Pandoc diff --git a/test/Tests/Writers/Powerpoint.hs b/test/Tests/Writers/Powerpoint.hs index be98fe0e7..87ebe990c 100644 --- a/test/Tests/Writers/Powerpoint.hs +++ b/test/Tests/Writers/Powerpoint.hs @@ -1,7 +1,5 @@ -{-# LANGUAGE NoImplicitPrelude #-} module Tests.Writers.Powerpoint (tests) where -import Prelude import Tests.Writers.OOXML (ooxmlTest) import Text.Pandoc import Test.Tasty diff --git a/test/Tests/Writers/RST.hs b/test/Tests/Writers/RST.hs index a52423fad..94745e9a2 100644 --- a/test/Tests/Writers/RST.hs +++ b/test/Tests/Writers/RST.hs @@ -1,8 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.RST (tests) where -import Prelude import Control.Monad.Identity import Test.Tasty import Test.Tasty.HUnit diff --git a/test/Tests/Writers/TEI.hs b/test/Tests/Writers/TEI.hs index 31e970495..fa372909f 100644 --- a/test/Tests/Writers/TEI.hs +++ b/test/Tests/Writers/TEI.hs @@ -1,8 +1,6 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.TEI (tests) where -import Prelude import Test.Tasty import Tests.Helpers import Text.Pandoc diff --git a/test/test-pandoc.hs b/test/test-pandoc.hs index 9973dffc8..4d9da525b 100644 --- a/test/test-pandoc.hs +++ b/test/test-pandoc.hs @@ -1,9 +1,7 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# OPTIONS_GHC -Wall #-} module Main where -import Prelude import System.Environment (getArgs) import qualified Control.Exception as E import Text.Pandoc.App (convertWithOpts, defaultOpts, options, diff --git a/trypandoc/trypandoc.hs b/trypandoc/trypandoc.hs index 9a53aa18c..810752afa 100644 --- a/trypandoc/trypandoc.hs +++ b/trypandoc/trypandoc.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Main @@ -12,7 +11,6 @@ Provides a webservice which allows to try pandoc in the browser. -} module Main where -import Prelude import Network.Wai.Handler.CGI import Network.Wai import Control.Applicative ((<$>)) -- cgit v1.2.3 From 69b7401e31216ae52164950970cf88b778d8d198 Mon Sep 17 00:00:00 2001 From: Nils Carlson <nils@nilscarlson.se> Date: Mon, 8 Feb 2021 17:36:58 +0000 Subject: DocBook reader: Support informalfigure (#7079) Add support for informalfigure. --- src/Text/Pandoc/Readers/DocBook.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index d3b2dd4d3..ada3e98ec 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -206,7 +206,7 @@ List of all DocBook tags, with [x] indicating implemented, [x] info - A wrapper for information about a component or other block. (DocBook v5) [x] informalequation - A displayed mathematical equation without a title [x] informalexample - A displayed example without a title -[ ] informalfigure - A untitled figure +[x] informalfigure - An untitled figure [ ] informaltable - A table without a title [ ] initializer - The initializer for a FieldSynopsis [x] inlineequation - A mathematical equation or expression occurring inline @@ -669,6 +669,7 @@ blockTags = , "index" , "info" , "informalexample" + , "informalfigure" , "informaltable" , "itemizedlist" , "linegroup" @@ -855,6 +856,7 @@ parseBlock (Elem e) = "variablelist" -> definitionList <$> deflistitems "procedure" -> bulletList <$> steps "figure" -> getFigure e + "informalfigure" -> getFigure e "mediaobject" -> para <$> getMediaobject e "caption" -> skip "info" -> addMetadataFromElement e -- cgit v1.2.3 From 5cd1c1001fbfc768fdb146f14ae056a9a7ba94eb Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 8 Feb 2021 09:15:43 -0800 Subject: ODT reader: give more information if zip can't be unpacked. --- src/Text/Pandoc/Readers/Odt.hs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs index 7b8bfd4b5..9b66b60ec 100644 --- a/src/Text/Pandoc/Readers/Odt.hs +++ b/src/Text/Pandoc/Readers/Odt.hs @@ -23,6 +23,8 @@ import System.FilePath import Control.Monad.Except (throwError) +import qualified Data.Text as T + import Text.Pandoc.Class.PandocMonad (PandocMonad) import qualified Text.Pandoc.Class.PandocMonad as P import Text.Pandoc.Definition @@ -60,7 +62,8 @@ readOdt' _ bytes = bytesToOdt bytes-- of bytesToOdt :: B.ByteString -> Either PandocError (Pandoc, MediaBag) bytesToOdt bytes = case toArchiveOrFail bytes of Right archive -> archiveToOdt archive - Left _ -> Left $ PandocParseError "Couldn't parse odt file." + Left err -> Left $ PandocParseError + $ "Could not unzip ODT: " <> T.pack err -- archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag) -- cgit v1.2.3 From f70795dc5e431c0132acc8bb2d20bb5dec942de7 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 8 Feb 2021 09:39:29 -0800 Subject: ODT reader: finer-grained errors on parse failure. See #7091. --- src/Text/Pandoc/Readers/Odt.hs | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs index 9b66b60ec..9943d3147 100644 --- a/src/Text/Pandoc/Readers/Odt.hs +++ b/src/Text/Pandoc/Readers/Odt.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE PatternGuards #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Reader.Odt @@ -67,29 +66,27 @@ bytesToOdt bytes = case toArchiveOrFail bytes of -- archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag) -archiveToOdt archive - | Just contentEntry <- findEntryByPath "content.xml" archive - , Just stylesEntry <- findEntryByPath "styles.xml" archive - , Just contentElem <- entryToXmlElem contentEntry - , Just stylesElem <- entryToXmlElem stylesEntry - , Right styles <- chooseMax (readStylesAt stylesElem ) - (readStylesAt contentElem) - , media <- filteredFilesFromArchive archive filePathIsOdtMedia - , startState <- readerState styles media - , Right pandocWithMedia <- runConverter' read_body - startState - contentElem - - = Right pandocWithMedia - - | otherwise - -- Not very detailed, but I don't think more information would be helpful - = Left $ PandocParseError "Couldn't parse odt file." - where - filePathIsOdtMedia :: FilePath -> Bool +archiveToOdt archive = either (Left. PandocParseError) Right $ do + let onFailure msg Nothing = Left msg + onFailure _ (Just x) = Right x + contentEntry <- onFailure "Could not find content.xml" + (findEntryByPath "content.xml" archive) + stylesEntry <- onFailure "Could not find styles.xml" + (findEntryByPath "styles.xml" archive) + contentElem <- onFailure "Could not find content element" + (entryToXmlElem contentEntry) + stylesElem <- onFailure "Could not find styles element" + (entryToXmlElem stylesEntry) + styles <- either (\_ -> Left "Could not read styles") Right + (chooseMax (readStylesAt stylesElem ) (readStylesAt contentElem)) + let filePathIsOdtMedia :: FilePath -> Bool filePathIsOdtMedia fp = let (dir, name) = splitFileName fp in (dir == "Pictures/") || (dir /= "./" && name == "content.xml") + let media = filteredFilesFromArchive archive filePathIsOdtMedia + let startState = readerState styles media + either (\_ -> Left "Could not convert opendocument") Right + (runConverter' read_body startState contentElem) -- -- cgit v1.2.3 From 8ca191604dcd13af27c11d2da225da646ebce6fc Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 8 Feb 2021 23:35:19 -0800 Subject: Add new unexported module T.P.XMLParser. This exports functions that uses xml-conduit's parser to produce an xml-light Element or [Content]. This allows existing pandoc code to use a better parser without much modification. The new parser is used in all places where xml-light's parser was previously used. Benchmarks show a significant performance improvement in parsing XML-based formats (especially ODT and FB2). Note that the xml-light types use String, so the conversion from xml-conduit types involves a lot of extra allocation. It would be desirable to avoid that in the future by gradually switching to using xml-conduit directly. This can be done module by module. The new parser also reports errors, which we report when possible. A new constructor PandocXMLError has been added to PandocError in T.P.Error [API change]. Closes #7091, which was the main stimulus. These changes revealed the need for some changes in the tests. The docbook-reader.docbook test lacked definitions for the entities it used; these have been added. And the docx golden tests have been updated, because the new parser does not preserve the order of attributes. Add entity defs to docbook-reader.docbook. Update golden tests for docx. --- MANUAL.txt | 1 + pandoc.cabal | 2 + src/Text/Pandoc/Error.hs | 3 + src/Text/Pandoc/ImageSize.hs | 5 +- src/Text/Pandoc/Readers/DocBook.hs | 52 ++++++++++------ src/Text/Pandoc/Readers/Docx/Parse.hs | 21 ++++--- src/Text/Pandoc/Readers/Docx/Parse/Styles.hs | 28 +++++---- src/Text/Pandoc/Readers/EPUB.hs | 17 ++++-- src/Text/Pandoc/Readers/FB2.hs | 10 ++-- src/Text/Pandoc/Readers/JATS.hs | 9 ++- src/Text/Pandoc/Readers/OPML.hs | 10 +++- src/Text/Pandoc/Readers/Odt.hs | 24 ++++---- src/Text/Pandoc/Writers/EPUB.hs | 13 +++- src/Text/Pandoc/Writers/FB2.hs | 11 +++- src/Text/Pandoc/Writers/ODT.hs | 29 +++++---- src/Text/Pandoc/Writers/OOXML.hs | 9 +-- src/Text/Pandoc/Writers/Powerpoint/Output.hs | 4 +- src/Text/Pandoc/XMLParser.hs | 66 +++++++++++++++++++++ test/Tests/Readers/JATS.hs | 1 + test/command/5321.md | 4 +- test/docbook-reader.docbook | 7 ++- test/docx/golden/block_quotes.docx | Bin 10092 -> 10098 bytes test/docx/golden/codeblock.docx | Bin 9944 -> 9950 bytes test/docx/golden/comments.docx | Bin 10279 -> 10285 bytes test/docx/golden/custom_style_no_reference.docx | Bin 10042 -> 10048 bytes test/docx/golden/custom_style_preserve.docx | Bin 10666 -> 10673 bytes test/docx/golden/custom_style_reference.docx | Bin 12434 -> 12434 bytes test/docx/golden/definition_list.docx | Bin 9941 -> 9947 bytes .../golden/document-properties-short-desc.docx | Bin 9947 -> 9953 bytes test/docx/golden/document-properties.docx | Bin 10423 -> 10429 bytes test/docx/golden/headers.docx | Bin 10080 -> 10086 bytes test/docx/golden/image.docx | Bin 26758 -> 26764 bytes test/docx/golden/inline_code.docx | Bin 9880 -> 9886 bytes test/docx/golden/inline_formatting.docx | Bin 10060 -> 10066 bytes test/docx/golden/inline_images.docx | Bin 26816 -> 26822 bytes test/docx/golden/link_in_notes.docx | Bin 10101 -> 10107 bytes test/docx/golden/links.docx | Bin 10276 -> 10282 bytes test/docx/golden/lists.docx | Bin 10352 -> 10358 bytes test/docx/golden/lists_continuing.docx | Bin 10143 -> 10149 bytes test/docx/golden/lists_multiple_initial.docx | Bin 10232 -> 10238 bytes test/docx/golden/lists_restarting.docx | Bin 10144 -> 10150 bytes test/docx/golden/nested_anchors_in_header.docx | Bin 10239 -> 10245 bytes test/docx/golden/notes.docx | Bin 10046 -> 10052 bytes test/docx/golden/raw-blocks.docx | Bin 9980 -> 9986 bytes test/docx/golden/raw-bookmarks.docx | Bin 10115 -> 10121 bytes test/docx/golden/table_one_row.docx | Bin 9932 -> 9938 bytes test/docx/golden/table_with_list_cell.docx | Bin 10249 -> 10255 bytes test/docx/golden/tables.docx | Bin 10266 -> 10272 bytes test/docx/golden/track_changes_deletion.docx | Bin 9924 -> 9930 bytes test/docx/golden/track_changes_insertion.docx | Bin 9907 -> 9913 bytes test/docx/golden/track_changes_move.docx | Bin 9941 -> 9947 bytes .../golden/track_changes_scrubbed_metadata.docx | Bin 10053 -> 10059 bytes test/docx/golden/unicode.docx | Bin 9865 -> 9871 bytes test/docx/golden/verbatim_subsuper.docx | Bin 9913 -> 9919 bytes test/jats-reader.native | 2 +- test/jats-reader.xml | 1 + test/pptx/code-custom.pptx | Bin 28230 -> 28221 bytes test/pptx/code-custom_templated.pptx | Bin 395524 -> 395516 bytes test/pptx/code.pptx | Bin 28229 -> 28220 bytes test/pptx/code_templated.pptx | Bin 395522 -> 395514 bytes test/pptx/document-properties-short-desc.pptx | Bin 27012 -> 27004 bytes .../document-properties-short-desc_templated.pptx | Bin 394298 -> 394288 bytes test/pptx/document-properties.pptx | Bin 27417 -> 27408 bytes test/pptx/document-properties_templated.pptx | Bin 394701 -> 394691 bytes test/pptx/endnotes.pptx | Bin 26969 -> 26962 bytes test/pptx/endnotes_templated.pptx | Bin 394262 -> 394253 bytes test/pptx/endnotes_toc.pptx | Bin 27892 -> 27789 bytes test/pptx/endnotes_toc_templated.pptx | Bin 395186 -> 395083 bytes test/pptx/images.pptx | Bin 44626 -> 44619 bytes test/pptx/images_templated.pptx | Bin 411916 -> 411909 bytes test/pptx/inline_formatting.pptx | Bin 26156 -> 26148 bytes test/pptx/inline_formatting_templated.pptx | Bin 393447 -> 393438 bytes test/pptx/lists.pptx | Bin 27056 -> 27049 bytes test/pptx/lists_templated.pptx | Bin 394349 -> 394340 bytes test/pptx/raw_ooxml.pptx | Bin 26948 -> 26940 bytes test/pptx/raw_ooxml_templated.pptx | Bin 394240 -> 394231 bytes test/pptx/remove_empty_slides.pptx | Bin 44073 -> 44065 bytes test/pptx/remove_empty_slides_templated.pptx | Bin 411359 -> 411352 bytes test/pptx/slide_breaks.pptx | Bin 28582 -> 28575 bytes test/pptx/slide_breaks_slide_level_1.pptx | Bin 27751 -> 27744 bytes .../pptx/slide_breaks_slide_level_1_templated.pptx | Bin 395045 -> 395038 bytes test/pptx/slide_breaks_templated.pptx | Bin 395875 -> 395868 bytes test/pptx/slide_breaks_toc.pptx | Bin 29539 -> 29532 bytes test/pptx/slide_breaks_toc_templated.pptx | Bin 396833 -> 396826 bytes test/pptx/speaker_notes.pptx | Bin 35444 -> 35436 bytes test/pptx/speaker_notes_after_metadata.pptx | Bin 31683 -> 31675 bytes .../speaker_notes_after_metadata_templated.pptx | Bin 398964 -> 398955 bytes test/pptx/speaker_notes_afterheader.pptx | Bin 30700 -> 30691 bytes test/pptx/speaker_notes_afterheader_templated.pptx | Bin 397988 -> 397979 bytes test/pptx/speaker_notes_afterseps.pptx | Bin 51612 -> 51604 bytes test/pptx/speaker_notes_afterseps_templated.pptx | Bin 418903 -> 418896 bytes test/pptx/speaker_notes_templated.pptx | Bin 402736 -> 402728 bytes test/pptx/start_numbering_at.pptx | Bin 27031 -> 27023 bytes test/pptx/start_numbering_at_templated.pptx | Bin 394323 -> 394314 bytes test/pptx/tables.pptx | Bin 27573 -> 27566 bytes test/pptx/tables_templated.pptx | Bin 394868 -> 394859 bytes test/pptx/two_column.pptx | Bin 26075 -> 26065 bytes test/pptx/two_column_templated.pptx | Bin 393366 -> 393355 bytes 98 files changed, 238 insertions(+), 91 deletions(-) create mode 100644 src/Text/Pandoc/XMLParser.hs (limited to 'src') diff --git a/MANUAL.txt b/MANUAL.txt index 5b90d039a..dc3b4ca77 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -1464,6 +1464,7 @@ Nonzero exit codes have the following meanings: 24 PandocCiteprocError 31 PandocEpubSubdirectoryError 43 PandocPDFError + 44 PandocXMLError 47 PandocPDFProgramNotFoundError 61 PandocHttpError 62 PandocShouldNeverHappenError diff --git a/pandoc.cabal b/pandoc.cabal index 72e7c2da5..e56456c68 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -493,6 +493,7 @@ library unicode-transforms >= 0.3 && < 0.4, unordered-containers >= 0.2 && < 0.3, xml >= 1.3.12 && < 1.4, + xml-conduit >= 1.7 && < 1.10, zip-archive >= 0.2.3.4 && < 0.5, zlib >= 0.5 && < 0.7 if os(windows) && arch(i386) @@ -686,6 +687,7 @@ library Text.Pandoc.Lua.PandocLua, Text.Pandoc.Lua.Util, Text.Pandoc.Lua.Walk, + Text.Pandoc.XMLParser, Text.Pandoc.CSS, Text.Pandoc.CSV, Text.Pandoc.RoffChar, diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 204cf15ca..831405f42 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -48,6 +48,7 @@ data PandocError = PandocIOError Text IOError | PandocFailOnWarningError | PandocPDFProgramNotFoundError Text | PandocPDFError Text + | PandocXMLError Text Text | PandocFilterError Text Text | PandocLuaError Text | PandocCouldNotFindDataFileError Text @@ -103,6 +104,8 @@ handleError (Left e) = PandocPDFProgramNotFoundError pdfprog -> err 47 $ pdfprog <> " not found. Please select a different --pdf-engine or install " <> pdfprog PandocPDFError logmsg -> err 43 $ "Error producing PDF.\n" <> logmsg + PandocXMLError fp logmsg -> err 44 $ "Invalid XML" <> + (if T.null fp then "" else " in " <> fp) <> ":\n" <> logmsg PandocFilterError filtername msg -> err 83 $ "Error running filter " <> filtername <> ":\n" <> msg PandocLuaError msg -> err 84 $ "Error running Lua:\n" <> msg diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index e19958f6a..e0a1af8e8 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -45,7 +45,9 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import qualified Text.Pandoc.UTF8 as UTF8 import qualified Text.XML.Light as Xml +import Text.Pandoc.XMLParser (parseXMLElement) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import qualified Data.Text.Encoding as TE import Control.Applicative import qualified Data.Attoparsec.ByteString.Char8 as A @@ -327,7 +329,8 @@ getSize img = svgSize :: WriterOptions -> ByteString -> Maybe ImageSize svgSize opts img = do - doc <- Xml.parseXMLDoc $ UTF8.toString img + doc <- either (const mzero) return $ parseXMLElement + $ TL.fromStrict $ UTF8.toText img let viewboxSize = do vb <- Xml.findAttrBy (== Xml.QName "viewBox" Nothing Nothing) doc [_,_,w,h] <- mapM safeRead (T.words (T.pack vb)) diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index ada3e98ec..ad0108843 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -12,7 +12,7 @@ Conversion of DocBook XML to 'Pandoc' document. -} module Text.Pandoc.Readers.DocBook ( readDocBook ) where import Control.Monad.State.Strict -import Data.Char (isSpace, toUpper) +import Data.Char (isSpace, toUpper, isLetter) import Data.Default import Data.Either (rights) import Data.Foldable (asum) @@ -21,7 +21,10 @@ import Data.List (intersperse,elemIndex) import Data.Maybe (fromMaybe,mapMaybe) import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL +import Control.Monad.Except (throwError) import Text.HTML.TagSoup.Entity (lookupEntity) +import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad, report) import Text.Pandoc.Options @@ -29,6 +32,7 @@ import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces) import Text.TeXMath (readMathML, writeTeX) import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLContents) {- @@ -537,22 +541,25 @@ instance Default DBState where readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readDocBook _ inp = do - let tree = normalizeTree . parseXML . handleInstructions $ crFilter inp + tree <- either (throwError . PandocXMLError "") (return . normalizeTree) $ + parseXMLContents (TL.fromStrict . handleInstructions $ crFilter inp) (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree return $ Pandoc (dbMeta st') (toList . mconcat $ bs) --- We treat <?asciidoc-br?> specially (issue #1236), converting it --- to <br/>, since xml-light doesn't parse the instruction correctly. --- Other xml instructions are simply removed from the input stream. +-- We treat certain processing instructions by converting them to tags +-- beginning "pi-". handleInstructions :: Text -> Text -handleInstructions = T.pack . handleInstructions' . T.unpack - -handleInstructions' :: String -> String -handleInstructions' ('<':'?':'a':'s':'c':'i':'i':'d':'o':'c':'-':'b':'r':'?':'>':xs) = '<':'b':'r':'/':'>': handleInstructions' xs -handleInstructions' xs = case break (=='<') xs of - (ys, []) -> ys - ([], '<':zs) -> '<' : handleInstructions' zs - (ys, zs) -> ys ++ handleInstructions' zs +handleInstructions t = + let (x,y) = T.breakOn "<?" t + in if T.null y + then x + else + let (w,z) = T.breakOn "?>" y + in (if T.takeWhile (\c -> isLetter c || c == '-') + (T.drop 2 w) `elem` ["asciidoc-br", "dbfo"] + then x <> "<pi-" <> T.drop 2 w <> "/>" + else x <> w <> T.take 2 z) <> + handleInstructions (T.drop 2 z) getFigure :: PandocMonad m => Element -> DB m Blocks getFigure e = do @@ -892,7 +899,11 @@ parseBlock (Elem e) = "subtitle" -> return mempty -- handled in parent element _ -> skip >> getBlocks e where skip = do - lift $ report $ IgnoredElement $ T.pack $ qName (elName e) + let qn = T.pack $ qName $ elName e + let name = if "pi-" `T.isPrefixOf` qn + then "<?" <> qn <> "?>" + else qn + lift $ report $ IgnoredElement name return mempty codeBlockWithLang = do @@ -964,7 +975,7 @@ parseBlock (Elem e) = cs -> map toAlignment cs let parseWidth s = safeRead (T.filter (\x -> (x >= '0' && x <= '9') || x == '.') s) - let textWidth = case filterChild (named "?dbfo") e of + let textWidth = case filterChild (named "pi-dbfo") e of Just d -> case attrValue "table-width" d of "" -> 1.0 w -> fromMaybe 100.0 (parseWidth w) / 100.0 @@ -1165,12 +1176,15 @@ parseInline (Elem e) = "title" -> return mempty "affiliation" -> skip -- Note: this isn't a real docbook tag; it's what we convert - -- <?asciidor-br?> to in handleInstructions, above. A kludge to - -- work around xml-light's inability to parse an instruction. - "br" -> return linebreak + -- <?asciidor-br?> to in handleInstructions, above. + "pi-asciidoc-br" -> return linebreak _ -> skip >> innerInlines id where skip = do - lift $ report $ IgnoredElement $ T.pack $ qName (elName e) + let qn = T.pack $ qName $ elName e + let name = if "pi-" `T.isPrefixOf` qn + then "<?" <> qn <> "?>" + else qn + lift $ report $ IgnoredElement name return mempty innerInlines f = extractSpaces f . mconcat <$> diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index fdcffcc3f..056dab6c2 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -74,6 +74,7 @@ import Text.TeXMath.Readers.OMML (readOMML) import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont) import Text.XML.Light import qualified Text.XML.Light.Cursor as XMLC +import Text.Pandoc.XMLParser (parseXMLElement) data ReaderEnv = ReaderEnv { envNotes :: Notes , envComments :: Comments @@ -343,10 +344,16 @@ archiveToDocxWithWarnings archive = do Right doc -> Right (Docx doc, stateWarnings st) Left e -> Left e +parseXMLFromEntry :: Entry -> Maybe Element +parseXMLFromEntry entry = + case parseXMLElement (UTF8.toTextLazy (fromEntry entry)) of + Left _ -> Nothing + Right el -> Just el + getDocumentXmlPath :: Archive -> Maybe FilePath getDocumentXmlPath zf = do entry <- findEntryByPath "_rels/.rels" zf - relsElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry + relsElem <- parseXMLFromEntry entry let rels = filterChildrenName (\n -> qName n == "Relationship") relsElem rel <- find (\e -> findAttr (QName "Type" Nothing Nothing) e == Just "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument") @@ -362,7 +369,7 @@ archiveToDocument :: Archive -> D Document archiveToDocument zf = do docPath <- asks envDocXmlPath entry <- maybeToD $ findEntryByPath docPath zf - docElem <- maybeToD $ (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry + docElem <- maybeToD $ parseXMLFromEntry entry let namespaces = elemToNameSpaces docElem bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem let bodyElem' = fromMaybe bodyElem (walkDocument namespaces bodyElem) @@ -401,9 +408,9 @@ constructBogusParStyleData stName = ParStyle archiveToNotes :: Archive -> Notes archiveToNotes zf = let fnElem = findEntryByPath "word/footnotes.xml" zf - >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) + >>= parseXMLFromEntry enElem = findEntryByPath "word/endnotes.xml" zf - >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) + >>= parseXMLFromEntry fn_namespaces = maybe [] elemToNameSpaces fnElem en_namespaces = maybe [] elemToNameSpaces enElem ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces @@ -415,7 +422,7 @@ archiveToNotes zf = archiveToComments :: Archive -> Comments archiveToComments zf = let cmtsElem = findEntryByPath "word/comments.xml" zf - >>= (parseXMLDoc . UTF8.toStringLazy . fromEntry) + >>= parseXMLFromEntry cmts_namespaces = maybe [] elemToNameSpaces cmtsElem cmts = elemToComments cmts_namespaces <$> (cmtsElem >>= walkDocument cmts_namespaces) in @@ -445,7 +452,7 @@ filePathToRelationships :: Archive -> FilePath -> FilePath -> [Relationship] filePathToRelationships ar docXmlPath fp | Just relType <- filePathToRelType fp docXmlPath , Just entry <- findEntryByPath fp ar - , Just relElems <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry = + , Just relElems <- parseXMLFromEntry entry = mapMaybe (relElemToRelationship relType) $ elChildren relElems filePathToRelationships _ _ _ = [] @@ -527,7 +534,7 @@ archiveToNumbering' zf = case findEntryByPath "word/numbering.xml" zf of Nothing -> Just $ Numbering [] [] [] Just entry -> do - numberingElem <- (parseXMLDoc . UTF8.toStringLazy . fromEntry) entry + numberingElem <- parseXMLFromEntry entry let namespaces = elemToNameSpaces numberingElem numElems = findChildrenByName namespaces "w" "num" numberingElem absNumElems = findChildrenByName namespaces "w" "abstractNum" numberingElem diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs index 236167187..edade8654 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs @@ -53,6 +53,7 @@ import Data.Coerce import Text.Pandoc.Readers.Docx.Util import qualified Text.Pandoc.UTF8 as UTF8 import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLElement) newtype CharStyleId = CharStyleId T.Text deriving (Show, Eq, Ord, IsString, FromStyleId) @@ -135,19 +136,22 @@ defaultRunStyle = RunStyle { isBold = Nothing , rParentStyle = Nothing } -archiveToStyles' :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) => - (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2) +archiveToStyles' + :: (Ord k1, Ord k2, ElemToStyle a1, ElemToStyle a2) + => (a1 -> k1) -> (a2 -> k2) -> Archive -> (M.Map k1 a1, M.Map k2 a2) archiveToStyles' conv1 conv2 zf = - let stylesElem = findEntryByPath "word/styles.xml" zf >>= - (parseXMLDoc . UTF8.toStringLazy . fromEntry) - in - case stylesElem of - Nothing -> (M.empty, M.empty) - Just styElem -> - let namespaces = elemToNameSpaces styElem - in - ( M.fromList $ map (\r -> (conv1 r, r)) $ buildBasedOnList namespaces styElem Nothing, - M.fromList $ map (\p -> (conv2 p, p)) $ buildBasedOnList namespaces styElem Nothing) + case findEntryByPath "word/styles.xml" zf of + Nothing -> (M.empty, M.empty) + Just entry -> + case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of + Left _ -> (M.empty, M.empty) + Right styElem -> + let namespaces = elemToNameSpaces styElem + in + ( M.fromList $ map (\r -> (conv1 r, r)) $ + buildBasedOnList namespaces styElem Nothing, + M.fromList $ map (\p -> (conv2 p, p)) $ + buildBasedOnList namespaces styElem Nothing) isBasedOnStyle :: (ElemToStyle a, FromStyleId (StyleId a)) => NameSpaces -> Element -> Maybe a -> Bool isBasedOnStyle ns element parentStyle diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index 5e3326e6d..369c4f0c9 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -17,7 +17,7 @@ module Text.Pandoc.Readers.EPUB (readEPUB) where -import Codec.Archive.Zip (Archive (..), Entry, findEntryByPath, fromEntry, +import Codec.Archive.Zip (Archive (..), Entry(..), findEntryByPath, fromEntry, toArchiveOrFail) import Control.DeepSeq (NFData, deepseq) import Control.Monad (guard, liftM, liftM2, mplus) @@ -41,9 +41,10 @@ import Text.Pandoc.MIME (MimeType) import Text.Pandoc.Options (ReaderOptions (..)) import Text.Pandoc.Readers.HTML (readHtml) import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI) -import qualified Text.Pandoc.UTF8 as UTF8 (toStringLazy) +import qualified Text.Pandoc.UTF8 as UTF8 (toTextLazy) import Text.Pandoc.Walk (query, walk) import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLElement) type Items = M.Map String (FilePath, MimeType) @@ -181,7 +182,7 @@ renameMeta s = T.pack s getManifest :: PandocMonad m => Archive -> m (String, Element) getManifest archive = do metaEntry <- findEntryByPathE ("META-INF" </> "container.xml") archive - docElem <- (parseXMLDocE . UTF8.toStringLazy . fromEntry) metaEntry + docElem <- parseXMLDocE metaEntry let namespaces = mapMaybe attrToNSPair (elAttribs docElem) ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces) as <- fmap (map attrToPair . elAttribs) @@ -190,7 +191,7 @@ getManifest archive = do let rootdir = dropFileName manifestFile --mime <- lookup "media-type" as manifest <- findEntryByPathE manifestFile archive - (rootdir,) <$> (parseXMLDocE . UTF8.toStringLazy . fromEntry $ manifest) + (rootdir,) <$> parseXMLDocE manifest -- Fixup @@ -284,8 +285,12 @@ findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry findEntryByPathE (normalise . unEscapeString -> path) a = mkE ("No entry on path: " ++ path) $ findEntryByPath path a -parseXMLDocE :: PandocMonad m => String -> m Element -parseXMLDocE doc = mkE "Unable to parse XML doc" $ parseXMLDoc doc +parseXMLDocE :: PandocMonad m => Entry -> m Element +parseXMLDocE entry = + either (throwError . PandocXMLError fp) return $ parseXMLElement doc + where + doc = UTF8.toTextLazy . fromEntry $ entry + fp = T.pack $ eRelativePath entry findElementE :: PandocMonad m => QName -> Element -> m Element findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs index b0d2f092b..b804eab4f 100644 --- a/src/Text/Pandoc/Readers/FB2.hs +++ b/src/Text/Pandoc/Readers/FB2.hs @@ -32,6 +32,7 @@ import Data.List (intersperse) import qualified Data.Map as M import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import Data.Default import Data.Maybe import Text.HTML.TagSoup.Entity (lookupEntity) @@ -42,6 +43,7 @@ import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Shared (crFilter) import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLElement) type FB2 m = StateT FB2State m @@ -64,10 +66,10 @@ instance HasMeta FB2State where readFB2 :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readFB2 _ inp = - case parseXMLDoc $ crFilter inp of - Nothing -> throwError $ PandocParseError "Not an XML document" - Just e -> do - (bs, st) <- runStateT (parseRootElement e) def + case parseXMLElement $ TL.fromStrict $ crFilter inp of + Left msg -> throwError $ PandocXMLError "" msg + Right el -> do + (bs, st) <- runStateT (parseRootElement el) def let authors = if null $ fb2Authors st then id else setMeta "author" (map text $ reverse $ fb2Authors st) diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index c638da519..dfd343b7a 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -14,6 +14,8 @@ Conversion of JATS XML to 'Pandoc' document. module Text.Pandoc.Readers.JATS ( readJATS ) where import Control.Monad.State.Strict +import Control.Monad.Except (throwError) +import Text.Pandoc.Error (PandocError(..)) import Data.Char (isDigit, isSpace, toUpper) import Data.Default import Data.Generics @@ -22,6 +24,7 @@ import qualified Data.Map as Map import Data.Maybe (maybeToList, fromMaybe) import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad) @@ -29,6 +32,7 @@ import Text.Pandoc.Options import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces) import Text.TeXMath (readMathML, writeTeX) import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLContents) import qualified Data.Set as S (fromList, member) import Data.Set ((\\)) @@ -51,8 +55,9 @@ instance Default JATSState where readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readJATS _ inp = do - let tree = normalizeTree . parseXML - $ T.unpack $ crFilter inp + tree <- either (throwError . PandocXMLError "") + (return . normalizeTree) $ + parseXMLContents (TL.fromStrict $ crFilter inp) (bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree return $ Pandoc (jatsMeta st') (toList . mconcat $ bs) diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index 5b8996025..bdadc4dd9 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -19,14 +19,18 @@ import Data.Generics import Data.Maybe (fromMaybe) import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Options +import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Readers.HTML (readHtml) import Text.Pandoc.Readers.Markdown (readMarkdown) import Text.Pandoc.Shared (crFilter, blocksToInlines') import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLContents) +import Control.Monad.Except (throwError) type OPML m = StateT OPMLState m @@ -49,8 +53,10 @@ instance Default OPMLState where readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readOPML opts inp = do (bs, st') <- runStateT - (mapM parseBlock $ normalizeTree $ - parseXML (T.unpack (crFilter inp))) def{ opmlOptions = opts } + (case parseXMLContents (TL.fromStrict (crFilter inp)) of + Left msg -> throwError $ PandocXMLError "" msg + Right ns -> mapM parseBlock $ normalizeTree ns) + def{ opmlOptions = opts } return $ setTitle (opmlDocTitle st') $ setAuthors (opmlDocAuthors st') $ diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs index 9943d3147..85308deb1 100644 --- a/src/Text/Pandoc/Readers/Odt.hs +++ b/src/Text/Pandoc/Readers/Odt.hs @@ -15,6 +15,7 @@ module Text.Pandoc.Readers.Odt ( readOdt ) where import Codec.Archive.Zip import qualified Text.XML.Light as XML +import Text.Pandoc.XMLParser (parseXMLElement) import qualified Data.ByteString.Lazy as B @@ -66,18 +67,18 @@ bytesToOdt bytes = case toArchiveOrFail bytes of -- archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag) -archiveToOdt archive = either (Left. PandocParseError) Right $ do - let onFailure msg Nothing = Left msg +archiveToOdt archive = do + let onFailure msg Nothing = Left $ PandocParseError msg onFailure _ (Just x) = Right x contentEntry <- onFailure "Could not find content.xml" (findEntryByPath "content.xml" archive) stylesEntry <- onFailure "Could not find styles.xml" (findEntryByPath "styles.xml" archive) - contentElem <- onFailure "Could not find content element" - (entryToXmlElem contentEntry) - stylesElem <- onFailure "Could not find styles element" - (entryToXmlElem stylesEntry) - styles <- either (\_ -> Left "Could not read styles") Right + contentElem <- entryToXmlElem contentEntry + stylesElem <- entryToXmlElem stylesEntry + styles <- either + (\_ -> Left $ PandocParseError "Could not read styles") + Right (chooseMax (readStylesAt stylesElem ) (readStylesAt contentElem)) let filePathIsOdtMedia :: FilePath -> Bool filePathIsOdtMedia fp = @@ -85,10 +86,13 @@ archiveToOdt archive = either (Left. PandocParseError) Right $ do in (dir == "Pictures/") || (dir /= "./" && name == "content.xml") let media = filteredFilesFromArchive archive filePathIsOdtMedia let startState = readerState styles media - either (\_ -> Left "Could not convert opendocument") Right + either (\_ -> Left $ PandocParseError "Could not convert opendocument") Right (runConverter' read_body startState contentElem) -- -entryToXmlElem :: Entry -> Maybe XML.Element -entryToXmlElem = XML.parseXMLDoc . UTF8.toStringLazy . fromEntry +entryToXmlElem :: Entry -> Either PandocError XML.Element +entryToXmlElem entry = + case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of + Right x -> Right x + Left msg -> Left $ PandocXMLError (T.pack $ eRelativePath entry) msg diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 1f16f6772..e99fa2567 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -55,8 +55,9 @@ import Text.Pandoc.Walk (query, walk, walkM) import Text.Pandoc.Writers.HTML (writeHtmlStringForEPUB) import Text.Printf (printf) import Text.XML.Light (Attr (..), Element (..), Node (..), QName (..), - add_attrs, lookupAttr, node, onlyElems, parseXML, + add_attrs, lookupAttr, node, onlyElems, ppElement, showElement, strContent, unode, unqual) +import Text.Pandoc.XMLParser (parseXMLContents) import Text.Pandoc.XML (escapeStringForXML) import Text.DocTemplates (FromContext(lookupContext), Context(..), ToContext(toVal), Val(..)) @@ -160,7 +161,12 @@ mkEntry path content = do getEPUBMetadata :: PandocMonad m => WriterOptions -> Meta -> E m EPUBMetadata getEPUBMetadata opts meta = do let md = metadataFromMeta opts meta - let elts = maybe [] (onlyElems . parseXML) $ writerEpubMetadata opts + elts <- case writerEpubMetadata opts of + Nothing -> return [] + Just t -> case parseXMLContents (TL.fromStrict t) of + Left msg -> throwError $ + PandocXMLError "epub metadata" msg + Right ns -> return (onlyElems ns) let md' = foldr addMetadataFromXML md elts let addIdentifier m = if null (epubIdentifier m) @@ -836,7 +842,8 @@ pandocToEPUB version opts doc = do : case subs of [] -> [] (_:_) -> [unode "ol" ! [("class","toc")] $ subs] - where titElements = parseXML titRendered + where titElements = either (const []) id $ + parseXMLContents (TL.fromStrict titRendered) titRendered = case P.runPure (writeHtmlStringForEPUB version opts{ writerTemplate = Nothing diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 25b1f28d1..9334d6e9a 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -19,7 +19,7 @@ FictionBook is an XML-based e-book format. For more information see: module Text.Pandoc.Writers.FB2 (writeFB2) where import Control.Monad (zipWithM) -import Control.Monad.Except (catchError) +import Control.Monad.Except (catchError, throwError) import Control.Monad.State.Strict (StateT, evalStateT, get, gets, lift, liftM, modify) import Data.ByteString.Base64 (encode) import Data.Char (isAscii, isControl, isSpace) @@ -27,16 +27,18 @@ import Data.Either (lefts, rights) import Data.List (intercalate) import Data.Text (Text, pack) import qualified Data.Text as T +import qualified Data.Text.Lazy as TL import qualified Data.Text.Encoding as TE import Network.HTTP (urlEncode) import Text.XML.Light import qualified Text.XML.Light as X import qualified Text.XML.Light.Cursor as XC -import qualified Text.XML.Light.Input as XI +import Text.Pandoc.XMLParser (parseXMLContents) import Text.Pandoc.Class.PandocMonad (PandocMonad, report) import qualified Text.Pandoc.Class.PandocMonad as P import Text.Pandoc.Definition +import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Logging import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def) import Text.Pandoc.Shared (capitalize, isURI, orderedListMarkers, @@ -307,7 +309,10 @@ blockToXml (CodeBlock _ s) = return . spaceBeforeAfter . map (el "p" . el "code" . T.unpack) . T.lines $ s blockToXml (RawBlock f str) = if f == Format "fb2" - then return $ XI.parseXML str + then + case parseXMLContents (TL.fromStrict str) of + Left msg -> throwError $ PandocXMLError "" msg + Right nds -> return nds else return [] blockToXml (Div _ bs) = cMapM blockToXml bs blockToXml (BlockQuote bs) = list . el "cite" <$> cMapM blockToXml bs diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 05dfad5eb..a32ff618c 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -13,7 +13,7 @@ Conversion of 'Pandoc' documents to ODT. -} module Text.Pandoc.Writers.ODT ( writeODT ) where import Codec.Archive.Zip -import Control.Monad.Except (catchError) +import Control.Monad.Except (catchError, throwError) import Control.Monad.State.Strict import qualified Data.ByteString.Lazy as B import Data.Generics (everywhere', mkT) @@ -27,6 +27,7 @@ import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import qualified Text.Pandoc.Class.PandocMonad as P import Text.Pandoc.Definition +import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.ImageSize import Text.Pandoc.Logging import Text.Pandoc.MIME (extensionFromMimeType, getMimeType) @@ -35,10 +36,11 @@ import Text.DocLayout import Text.Pandoc.Shared (stringify, pandocVersion, tshow) import Text.Pandoc.Writers.Shared (lookupMetaString, lookupMetaBlocks, fixDisplayMath) -import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toStringLazy) +import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toTextLazy) import Text.Pandoc.Walk import Text.Pandoc.Writers.OpenDocument (writeOpenDocument) import Text.Pandoc.XML +import Text.Pandoc.XMLParser (parseXMLElement) import Text.TeXMath import Text.XML.Light @@ -172,17 +174,18 @@ updateStyleWithLang :: PandocMonad m => Maybe Lang -> Archive -> O m Archive updateStyleWithLang Nothing arch = return arch updateStyleWithLang (Just lang) arch = do epochtime <- floor `fmap` lift P.getPOSIXTime - return arch{ zEntries = [if eRelativePath e == "styles.xml" - then case parseXMLDoc - (toStringLazy (fromEntry e)) of - Nothing -> e - Just d -> - toEntry "styles.xml" epochtime - ( fromStringLazy - . ppTopElement - . addLang lang $ d ) - else e - | e <- zEntries arch] } + entries <- mapM (\e -> if eRelativePath e == "styles.xml" + then case parseXMLElement + (toTextLazy (fromEntry e)) of + Left msg -> throwError $ + PandocXMLError "styles.xml" msg + Right d -> return $ + toEntry "styles.xml" epochtime + ( fromStringLazy + . ppTopElement + . addLang lang $ d ) + else return e) (zEntries arch) + return arch{ zEntries = entries } addLang :: Lang -> Element -> Element addLang lang = everywhere' (mkT updateLangAttr) diff --git a/src/Text/Pandoc/Writers/OOXML.hs b/src/Text/Pandoc/Writers/OOXML.hs index 3ac007f4e..8f60e70d5 100644 --- a/src/Text/Pandoc/Writers/OOXML.hs +++ b/src/Text/Pandoc/Writers/OOXML.hs @@ -35,6 +35,7 @@ import qualified Data.Text as T import Text.Pandoc.Class.PandocMonad (PandocMonad) import qualified Text.Pandoc.UTF8 as UTF8 import Text.XML.Light as XML +import Text.Pandoc.XMLParser (parseXMLElement) mknode :: Node t => String -> [(String,String)] -> t -> Element mknode s attrs = @@ -62,10 +63,10 @@ parseXml refArchive distArchive relpath = findEntryByPath relpath distArchive of Nothing -> throwError $ PandocSomeError $ T.pack relpath <> " missing in reference file" - Just e -> case parseXMLDoc . UTF8.toStringLazy . fromEntry $ e of - Nothing -> throwError $ PandocSomeError $ - T.pack relpath <> " corrupt in reference file" - Just d -> return d + Just e -> case parseXMLElement . UTF8.toTextLazy . fromEntry $ e of + Left msg -> + throwError $ PandocXMLError (T.pack relpath) msg + Right d -> return d -- Copied from Util diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs index 8554db622..cd092969b 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs @@ -29,6 +29,7 @@ import Data.Time.Clock (UTCTime) import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds, posixSecondsToUTCTime) import System.FilePath.Posix (splitDirectories, splitExtension, takeExtension) import Text.XML.Light +import Text.Pandoc.XMLParser (parseXMLElement) import Text.Pandoc.Definition import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Class.PandocMonad (PandocMonad) @@ -77,7 +78,8 @@ getPresentationSize :: Archive -> Archive -> Maybe (Integer, Integer) getPresentationSize refArchive distArchive = do entry <- findEntryByPath "ppt/presentation.xml" refArchive `mplus` findEntryByPath "ppt/presentation.xml" distArchive - presElement <- parseXMLDoc $ UTF8.toStringLazy $ fromEntry entry + presElement <- either (const Nothing) return $ + parseXMLElement $ UTF8.toTextLazy $ fromEntry entry let ns = elemToNameSpaces presElement sldSize <- findChild (elemName ns "p" "sldSz") presElement cxS <- findAttr (QName "cx" Nothing Nothing) sldSize diff --git a/src/Text/Pandoc/XMLParser.hs b/src/Text/Pandoc/XMLParser.hs new file mode 100644 index 000000000..8ad22a66a --- /dev/null +++ b/src/Text/Pandoc/XMLParser.hs @@ -0,0 +1,66 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.XMLParser + Copyright : Copyright (C) 2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Bridge to allow using xml-conduit's parser with xml-light's types. +-} +module Text.Pandoc.XMLParser + ( parseXMLElement + , parseXMLContents + , module Text.XML.Light.Types + ) where + +import qualified Control.Exception as E +import qualified Text.XML as Conduit +import Text.XML.Unresolved (InvalidEventStream(..)) +import qualified Text.XML.Light as Light +import Text.XML.Light.Types +import qualified Data.Text as T +import qualified Data.Text.Lazy as TL +import qualified Data.Map as M +import Data.Maybe (mapMaybe) + +-- Drop in replacement for parseXMLDoc in xml-light. +parseXMLElement :: TL.Text -> Either T.Text Light.Element +parseXMLElement t = + elementToElement . Conduit.documentRoot <$> + either (Left . T.pack . E.displayException) Right + (Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t) + +parseXMLContents :: TL.Text -> Either T.Text [Light.Content] +parseXMLContents t = + case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t of + Left e -> + case E.fromException e of + Just (ContentAfterRoot _) -> + elContent <$> parseXMLElement ("<wrapper>" <> t <> "</wrapper>") + _ -> Left . T.pack . E.displayException $ e + Right x -> Right [Light.Elem . elementToElement . Conduit.documentRoot $ x] + +elementToElement :: Conduit.Element -> Light.Element +elementToElement (Conduit.Element name attribMap nodes) = + Light.Element (nameToQname name) attrs (mapMaybe nodeToContent nodes) Nothing + where + attrs = map (\(n,v) -> Light.Attr (nameToQname n) (T.unpack v)) $ + M.toList attribMap + nameToQname (Conduit.Name localName mbns mbpref) = + case mbpref of + Nothing | "xmlns:" `T.isPrefixOf` localName -> + Light.QName (T.unpack $ T.drop 6 localName) (T.unpack <$> mbns) + (Just "xmlns") + _ -> Light.QName (T.unpack localName) (T.unpack <$> mbns) + (T.unpack <$> mbpref) + +nodeToContent :: Conduit.Node -> Maybe Light.Content +nodeToContent (Conduit.NodeElement el) = + Just (Light.Elem (elementToElement el)) +nodeToContent (Conduit.NodeContent t) = + Just (Light.Text (Light.CData Light.CDataText (T.unpack t) Nothing)) +nodeToContent _ = Nothing + diff --git a/test/Tests/Readers/JATS.hs b/test/Tests/Readers/JATS.hs index 525499c86..a9c9a0586 100644 --- a/test/Tests/Readers/JATS.hs +++ b/test/Tests/Readers/JATS.hs @@ -88,6 +88,7 @@ tests = [ testGroup "inline code" "<p>\n\ \ <inline-formula><alternatives>\n\ \ <tex-math><![CDATA[\\sigma|_{\\{x\\}}]]></tex-math>\n\ + \ </alternatives></inline-formula>\n\ \</p>" =?> para (math "\\sigma|_{\\{x\\}}") , test jats "math ml only" $ diff --git a/test/command/5321.md b/test/command/5321.md index 081abe2a0..83404632a 100644 --- a/test/command/5321.md +++ b/test/command/5321.md @@ -4,7 +4,7 @@ <caption> <p>bar</p> </caption> - <graphic xlink:href="foo.png" xlink:alt-text="baz"> + <graphic xlink:href="foo.png" xlink:alt-text="baz" /> </fig> ^D [Para [Image ("fig-1",[],[]) [Str "bar"] ("foo.png","fig:")]] @@ -17,7 +17,7 @@ <title>foo</title> <p>bar</p> </caption> - <graphic xlink:href="foo.png" xlink:alt-text="baz"> + <graphic xlink:href="foo.png" xlink:alt-text="baz" /> </fig> ^D [Para [Image ("fig-1",[],[]) [Str "foo",LineBreak,Str "bar"] ("foo.png","fig:")]] diff --git a/test/docbook-reader.docbook b/test/docbook-reader.docbook index 02568d8de..5717d78d0 100644 --- a/test/docbook-reader.docbook +++ b/test/docbook-reader.docbook @@ -1,6 +1,11 @@ <?xml version="1.0" encoding="utf-8" ?> <!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN" - "http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd"> +"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd" +[ +<!ENTITY GHC "GHC" > +<!ENTITY let "LET" > +<!ENTITY case "CASE" > +]> <article> <articleinfo> <title>Pandoc Test Suite</title> diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx index 3e1bf16e7..d3b16d0f2 100644 Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx index 66f055063..6293ef493 100644 Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx index fb3a02a0a..4205a1516 100644 Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx index bc6c2702a..adb3f23db 100644 Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx index 8c555a5bd..92c8137fe 100644 Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx index 5f96cc911..f53470617 100644 Binary files a/test/docx/golden/custom_style_reference.docx and b/test/docx/golden/custom_style_reference.docx differ diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx index c21b3a5b3..d6af90a72 100644 Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx index 92ce144e9..e18dbe853 100644 Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx index d21b67309..820299043 100644 Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx index 3558a47bf..ae0f41d12 100644 Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx index 606df92a3..94cd35dfa 100644 Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx index 759269cac..879f2a25b 100644 Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx index c37777080..93f86478f 100644 Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx index 9450b1a73..967d297f2 100644 Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx index 6f0b830e6..c5614e2fa 100644 Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx index e53889cfb..0f39a831f 100644 Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx index 5dbe298b7..07046f223 100644 Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx index 194181288..3656618e6 100644 Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx index 6e0b634f7..8798253d5 100644 Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx index 477178e77..0a24d1840 100644 Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx index 51110356e..52bb7a217 100644 Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx index b6206cdf5..182c06c64 100644 Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx index 07b576080..7b69a56a3 100644 Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx index d46095eb7..3d3a35701 100644 Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx index 7caba4e93..5ae37b406 100644 Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx index 6aaa6da61..c29aa6716 100644 Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx index 5746c5ad0..664493246 100644 Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx index 5f22dccc6..b6d15340e 100644 Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx index ab5c4f56d..f8e1092d2 100644 Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx index 085f33162..b4cda82f2 100644 Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx index 1ac86d5c8..ee222efa0 100644 Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx index c2c443b19..c6f8d9c96 100644 Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx index 5ea18d32e..ea8146690 100644 Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ diff --git a/test/jats-reader.native b/test/jats-reader.native index ab77dd1a0..0715ea8cc 100644 --- a/test/jats-reader.native +++ b/test/jats-reader.native @@ -1,4 +1,4 @@ -Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"]]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) +Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Space,Str "MacFarlane"],MetaInlines [Str "Anonymous"]]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber's",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,Header 1 ("headers",[],[]) [Str "Headers"] ,Header 2 ("level-2-with-an-embedded-link",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",SoftBreak,Link ("",[],[]) [Str "embedded",SoftBreak,Str "link"] ("/url","")] diff --git a/test/jats-reader.xml b/test/jats-reader.xml index f75b3e95a..f33cb9ab3 100644 --- a/test/jats-reader.xml +++ b/test/jats-reader.xml @@ -20,6 +20,7 @@ <surname>MacFarlane</surname> <given-names>John</given-names> </name> + </contrib> <contrib contrib-type="author"> <name> <surname>Anonymous</surname> diff --git a/test/pptx/code-custom.pptx b/test/pptx/code-custom.pptx index aa9b7692a..58070eb3f 100644 Binary files a/test/pptx/code-custom.pptx and b/test/pptx/code-custom.pptx differ diff --git a/test/pptx/code-custom_templated.pptx b/test/pptx/code-custom_templated.pptx index 9aaef4cb5..db9b7e371 100644 Binary files a/test/pptx/code-custom_templated.pptx and b/test/pptx/code-custom_templated.pptx differ diff --git a/test/pptx/code.pptx b/test/pptx/code.pptx index 1737ec757..c7b1ed7d5 100644 Binary files a/test/pptx/code.pptx and b/test/pptx/code.pptx differ diff --git a/test/pptx/code_templated.pptx b/test/pptx/code_templated.pptx index 87fb560ef..6944d92bf 100644 Binary files a/test/pptx/code_templated.pptx and b/test/pptx/code_templated.pptx differ diff --git a/test/pptx/document-properties-short-desc.pptx b/test/pptx/document-properties-short-desc.pptx index 961c31020..ae0d28429 100644 Binary files a/test/pptx/document-properties-short-desc.pptx and b/test/pptx/document-properties-short-desc.pptx differ diff --git a/test/pptx/document-properties-short-desc_templated.pptx b/test/pptx/document-properties-short-desc_templated.pptx index 894738ef7..37c74c69a 100644 Binary files a/test/pptx/document-properties-short-desc_templated.pptx and b/test/pptx/document-properties-short-desc_templated.pptx differ diff --git a/test/pptx/document-properties.pptx b/test/pptx/document-properties.pptx index 188e8d826..324e443a1 100644 Binary files a/test/pptx/document-properties.pptx and b/test/pptx/document-properties.pptx differ diff --git a/test/pptx/document-properties_templated.pptx b/test/pptx/document-properties_templated.pptx index 253e8c0a7..c81b983e3 100644 Binary files a/test/pptx/document-properties_templated.pptx and b/test/pptx/document-properties_templated.pptx differ diff --git a/test/pptx/endnotes.pptx b/test/pptx/endnotes.pptx index e230420d2..30ce33db6 100644 Binary files a/test/pptx/endnotes.pptx and b/test/pptx/endnotes.pptx differ diff --git a/test/pptx/endnotes_templated.pptx b/test/pptx/endnotes_templated.pptx index 49384fd65..d6c604968 100644 Binary files a/test/pptx/endnotes_templated.pptx and b/test/pptx/endnotes_templated.pptx differ diff --git a/test/pptx/endnotes_toc.pptx b/test/pptx/endnotes_toc.pptx index cdf1be4ad..000e17ecd 100644 Binary files a/test/pptx/endnotes_toc.pptx and b/test/pptx/endnotes_toc.pptx differ diff --git a/test/pptx/endnotes_toc_templated.pptx b/test/pptx/endnotes_toc_templated.pptx index c4fcbad45..fdcd2e29b 100644 Binary files a/test/pptx/endnotes_toc_templated.pptx and b/test/pptx/endnotes_toc_templated.pptx differ diff --git a/test/pptx/images.pptx b/test/pptx/images.pptx index 4a13b5b7f..e73126376 100644 Binary files a/test/pptx/images.pptx and b/test/pptx/images.pptx differ diff --git a/test/pptx/images_templated.pptx b/test/pptx/images_templated.pptx index 7a6e9700e..e3f968e9e 100644 Binary files a/test/pptx/images_templated.pptx and b/test/pptx/images_templated.pptx differ diff --git a/test/pptx/inline_formatting.pptx b/test/pptx/inline_formatting.pptx index 926c8ff3f..eadb9372e 100644 Binary files a/test/pptx/inline_formatting.pptx and b/test/pptx/inline_formatting.pptx differ diff --git a/test/pptx/inline_formatting_templated.pptx b/test/pptx/inline_formatting_templated.pptx index 16f48e182..8ca6bab2b 100644 Binary files a/test/pptx/inline_formatting_templated.pptx and b/test/pptx/inline_formatting_templated.pptx differ diff --git a/test/pptx/lists.pptx b/test/pptx/lists.pptx index f47b17a74..ae188ee68 100644 Binary files a/test/pptx/lists.pptx and b/test/pptx/lists.pptx differ diff --git a/test/pptx/lists_templated.pptx b/test/pptx/lists_templated.pptx index 88109a95e..60301fa50 100644 Binary files a/test/pptx/lists_templated.pptx and b/test/pptx/lists_templated.pptx differ diff --git a/test/pptx/raw_ooxml.pptx b/test/pptx/raw_ooxml.pptx index 84020708f..17124a50d 100644 Binary files a/test/pptx/raw_ooxml.pptx and b/test/pptx/raw_ooxml.pptx differ diff --git a/test/pptx/raw_ooxml_templated.pptx b/test/pptx/raw_ooxml_templated.pptx index a2f77e945..19ae7dd4e 100644 Binary files a/test/pptx/raw_ooxml_templated.pptx and b/test/pptx/raw_ooxml_templated.pptx differ diff --git a/test/pptx/remove_empty_slides.pptx b/test/pptx/remove_empty_slides.pptx index 48bf7bc8a..b650b7585 100644 Binary files a/test/pptx/remove_empty_slides.pptx and b/test/pptx/remove_empty_slides.pptx differ diff --git a/test/pptx/remove_empty_slides_templated.pptx b/test/pptx/remove_empty_slides_templated.pptx index 23b134a5f..0ab029614 100644 Binary files a/test/pptx/remove_empty_slides_templated.pptx and b/test/pptx/remove_empty_slides_templated.pptx differ diff --git a/test/pptx/slide_breaks.pptx b/test/pptx/slide_breaks.pptx index d6eebeffb..2a6e35080 100644 Binary files a/test/pptx/slide_breaks.pptx and b/test/pptx/slide_breaks.pptx differ diff --git a/test/pptx/slide_breaks_slide_level_1.pptx b/test/pptx/slide_breaks_slide_level_1.pptx index a6c76a187..a7bcf6a4b 100644 Binary files a/test/pptx/slide_breaks_slide_level_1.pptx and b/test/pptx/slide_breaks_slide_level_1.pptx differ diff --git a/test/pptx/slide_breaks_slide_level_1_templated.pptx b/test/pptx/slide_breaks_slide_level_1_templated.pptx index 1fbde815b..21b018c25 100644 Binary files a/test/pptx/slide_breaks_slide_level_1_templated.pptx and b/test/pptx/slide_breaks_slide_level_1_templated.pptx differ diff --git a/test/pptx/slide_breaks_templated.pptx b/test/pptx/slide_breaks_templated.pptx index cb3af4aa1..4ec4772a4 100644 Binary files a/test/pptx/slide_breaks_templated.pptx and b/test/pptx/slide_breaks_templated.pptx differ diff --git a/test/pptx/slide_breaks_toc.pptx b/test/pptx/slide_breaks_toc.pptx index dff386885..5983657b6 100644 Binary files a/test/pptx/slide_breaks_toc.pptx and b/test/pptx/slide_breaks_toc.pptx differ diff --git a/test/pptx/slide_breaks_toc_templated.pptx b/test/pptx/slide_breaks_toc_templated.pptx index 43b125f5e..dd54c7082 100644 Binary files a/test/pptx/slide_breaks_toc_templated.pptx and b/test/pptx/slide_breaks_toc_templated.pptx differ diff --git a/test/pptx/speaker_notes.pptx b/test/pptx/speaker_notes.pptx index 3314a1c65..b3e5ed5b9 100644 Binary files a/test/pptx/speaker_notes.pptx and b/test/pptx/speaker_notes.pptx differ diff --git a/test/pptx/speaker_notes_after_metadata.pptx b/test/pptx/speaker_notes_after_metadata.pptx index 27a136838..1078854bb 100644 Binary files a/test/pptx/speaker_notes_after_metadata.pptx and b/test/pptx/speaker_notes_after_metadata.pptx differ diff --git a/test/pptx/speaker_notes_after_metadata_templated.pptx b/test/pptx/speaker_notes_after_metadata_templated.pptx index 7aa3b6a87..5116c6c4e 100644 Binary files a/test/pptx/speaker_notes_after_metadata_templated.pptx and b/test/pptx/speaker_notes_after_metadata_templated.pptx differ diff --git a/test/pptx/speaker_notes_afterheader.pptx b/test/pptx/speaker_notes_afterheader.pptx index d43709ca7..0c8e49bd9 100644 Binary files a/test/pptx/speaker_notes_afterheader.pptx and b/test/pptx/speaker_notes_afterheader.pptx differ diff --git a/test/pptx/speaker_notes_afterheader_templated.pptx b/test/pptx/speaker_notes_afterheader_templated.pptx index 793ea10f6..68695939d 100644 Binary files a/test/pptx/speaker_notes_afterheader_templated.pptx and b/test/pptx/speaker_notes_afterheader_templated.pptx differ diff --git a/test/pptx/speaker_notes_afterseps.pptx b/test/pptx/speaker_notes_afterseps.pptx index 2f4d3b820..7ed9b946d 100644 Binary files a/test/pptx/speaker_notes_afterseps.pptx and b/test/pptx/speaker_notes_afterseps.pptx differ diff --git a/test/pptx/speaker_notes_afterseps_templated.pptx b/test/pptx/speaker_notes_afterseps_templated.pptx index 94a221398..79fc82345 100644 Binary files a/test/pptx/speaker_notes_afterseps_templated.pptx and b/test/pptx/speaker_notes_afterseps_templated.pptx differ diff --git a/test/pptx/speaker_notes_templated.pptx b/test/pptx/speaker_notes_templated.pptx index 22040c88c..9f943c279 100644 Binary files a/test/pptx/speaker_notes_templated.pptx and b/test/pptx/speaker_notes_templated.pptx differ diff --git a/test/pptx/start_numbering_at.pptx b/test/pptx/start_numbering_at.pptx index 18477380b..ac72d8ced 100644 Binary files a/test/pptx/start_numbering_at.pptx and b/test/pptx/start_numbering_at.pptx differ diff --git a/test/pptx/start_numbering_at_templated.pptx b/test/pptx/start_numbering_at_templated.pptx index 4b9d0ba4d..15c7b5469 100644 Binary files a/test/pptx/start_numbering_at_templated.pptx and b/test/pptx/start_numbering_at_templated.pptx differ diff --git a/test/pptx/tables.pptx b/test/pptx/tables.pptx index 1c5b54185..926c5e699 100644 Binary files a/test/pptx/tables.pptx and b/test/pptx/tables.pptx differ diff --git a/test/pptx/tables_templated.pptx b/test/pptx/tables_templated.pptx index 1314f4de4..a37e72d2c 100644 Binary files a/test/pptx/tables_templated.pptx and b/test/pptx/tables_templated.pptx differ diff --git a/test/pptx/two_column.pptx b/test/pptx/two_column.pptx index 9018be36e..7f86533fe 100644 Binary files a/test/pptx/two_column.pptx and b/test/pptx/two_column.pptx differ diff --git a/test/pptx/two_column_templated.pptx b/test/pptx/two_column_templated.pptx index 35e93af67..89e3db0ab 100644 Binary files a/test/pptx/two_column_templated.pptx and b/test/pptx/two_column_templated.pptx differ -- cgit v1.2.3 From acc9afaf6f6afa14cbe7cd06798275728e78a529 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 11 Feb 2021 09:16:25 -0800 Subject: Correctly parse "raw" date value in markdown references metadata. See jgm/citeproc#53. --- src/Text/Pandoc/Citeproc/MetaValue.hs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Citeproc/MetaValue.hs b/src/Text/Pandoc/Citeproc/MetaValue.hs index f5a49f49e..b43ca7314 100644 --- a/src/Text/Pandoc/Citeproc/MetaValue.hs +++ b/src/Text/Pandoc/Citeproc/MetaValue.hs @@ -135,12 +135,13 @@ metaValueToVal k v MetaMap _ -> TextVal mempty metaValueToDate :: MetaValue -> Date -metaValueToDate (MetaMap m) = - Date +metaValueToDate (MetaMap m) = fromMaybe + (Date { dateParts = dateparts , dateCirca = circa , dateSeason = season - , dateLiteral = literal } + , dateLiteral = literal }) + rawdate where dateparts = case M.lookup "date-parts" m of Just (MetaList xs) -> @@ -152,6 +153,7 @@ metaValueToDate (MetaMap m) = M.lookup "circa" m >>= metaValueToBool season = M.lookup "season" m >>= metaValueToInt literal = M.lookup "literal" m >>= metaValueToText + rawdate = M.lookup "raw" m >>= metaValueToText >>= rawDateEDTF metaValueToDate (MetaList xs) = Date{ dateParts = mapMaybe metaValueToDateParts xs , dateCirca = False -- cgit v1.2.3 From 3c4a58bad03ef56ae9c82b7e7a6ae027514e2bd6 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 10 Feb 2021 23:01:08 -0800 Subject: T.P.Class: Add getTimestamp [API change]. This attempts to read the SOURCE_DATE_EPOCH environment variable and parse a UTC time from it (treating it as a unix date stamp, see https://reproducible-builds.org/specs/source-date-epoch/). If the variable is not set or can't be parsed as a unix date stamp, then the function returns the current date. --- src/Text/Pandoc/Class/PandocMonad.hs | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index 374da161b..86c8de79e 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -51,6 +51,7 @@ module Text.Pandoc.Class.PandocMonad , setTranslations , translateTerm , makeCanonical + , getTimestamp ) where import Codec.Archive.Zip @@ -59,7 +60,8 @@ import Control.Monad.Except (MonadError (catchError, throwError), import Data.Digest.Pure.SHA (sha1, showDigest) import Data.Maybe (fromMaybe) import Data.Time (UTCTime) -import Data.Time.Clock.POSIX (POSIXTime, utcTimeToPOSIXSeconds) +import Data.Time.Clock.POSIX (POSIXTime, utcTimeToPOSIXSeconds, + posixSecondsToUTCTime) import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime) import Network.URI ( escapeURIString, nonStrictRelativeTo, unEscapeString, parseURIReference, isAllowedInURI, @@ -74,7 +76,7 @@ import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType) import Text.Pandoc.MediaBag (MediaBag, lookupMedia) -import Text.Pandoc.Shared (uriPathToPath) +import Text.Pandoc.Shared (uriPathToPath, safeRead) import Text.Pandoc.Translations (Term(..), Translations, lookupTerm, readTranslations) import Text.Pandoc.Walk (walkM) @@ -175,6 +177,21 @@ report msg = do when (level <= verbosity) $ logOutput msg modifyCommonState $ \st -> st{ stLog = msg : stLog st } +-- | Get the time from the @SOURCE_DATE_EPOCH@ +-- environment variable. The variable should contain a +-- unix time stamp, the number of seconds since midnight Jan 01 +-- 1970 UTC. If the variable is not set or cannot be +-- parsed as a unix time stamp, the current time is returned. +-- This function is designed to make possible reproducible +-- builds in formats that include a creation timestamp. +getTimestamp :: PandocMonad m => m UTCTime +getTimestamp = do + mbSourceDateEpoch <- lookupEnv "SOURCE_DATE_EPOCH" + case mbSourceDateEpoch >>= safeRead of + Just (epoch :: Integer) -> + return $ posixSecondsToUTCTime $ fromIntegral epoch + Nothing -> getCurrentTime + -- | Determine whether tracing is enabled. This affects -- the behavior of 'trace'. If tracing is not enabled, -- 'trace' does nothing. -- cgit v1.2.3 From 390d5e65b2d66078b2f9b9db142dbe2167ea29a9 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 10 Feb 2021 23:13:33 -0800 Subject: Use getTimestamp instead of getCurrentTime in writers. Setting SOURCE_DATE_EPOCH will allow reproducible builds. Partially addresses #7093. This does not suffice to fully enable reproducible in EPUB, since a unique id is being generated for each build. --- src/Text/Pandoc/Readers/RST.hs | 4 ++-- src/Text/Pandoc/Writers/Docx.hs | 2 +- src/Text/Pandoc/Writers/EPUB.hs | 4 ++-- src/Text/Pandoc/Writers/ODT.hs | 2 +- src/Text/Pandoc/Writers/Powerpoint/Output.hs | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 0f32d993c..29f81b046 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -28,7 +28,7 @@ import qualified Data.Text as T import Text.Pandoc.Builder (Blocks, Inlines, fromList, setMeta, trimInlines) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class.PandocMonad (PandocMonad, fetchItem, - readFileFromDirs, getCurrentTime) + readFileFromDirs, getTimestamp) import Text.Pandoc.CSV (CSVOptions (..), defaultCSVOptions, parseCSV) import Text.Pandoc.Definition import Text.Pandoc.Error @@ -685,7 +685,7 @@ directive' = do "replace" -> B.para <$> -- consumed by substKey parseInlineFromText (trim top) "date" -> B.para <$> do -- consumed by substKey - t <- getCurrentTime + t <- getTimestamp let format = case T.unpack (T.strip top) of [] -> "%Y-%m-%d" x -> x diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 8f498775d..da990e4d3 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -217,7 +217,7 @@ writeDocx opts doc = do let doc' = Pandoc meta blocks' username <- P.lookupEnv "USERNAME" - utctime <- P.getCurrentTime + utctime <- P.getTimestamp oldUserDataDir <- P.getUserDataDir P.setUserDataDir Nothing res <- P.readDefaultDataFile "reference.docx" diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index e99fa2567..171ffe582 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -191,7 +191,7 @@ getEPUBMetadata opts meta = do let fixDate m = if null (epubDate m) then do - currentTime <- lift P.getCurrentTime + currentTime <- lift P.getTimestamp return $ m{ epubDate = [ Date{ dateText = showDateTimeISO8601 currentTime , dateEvent = Nothing } ] } @@ -709,7 +709,7 @@ pandocToEPUB version opts doc = do uuid <- case epubIdentifier metadata of (x:_) -> return $ identifierText x -- use first identifier as UUID [] -> throwError $ PandocShouldNeverHappenError "epubIdentifier is null" -- shouldn't happen - currentTime <- lift P.getCurrentTime + currentTime <- lift P.getTimestamp let contentsData = UTF8.fromStringLazy $ ppTopElement $ unode "package" ! ([("version", case version of diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index a32ff618c..06369b4db 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -68,7 +68,7 @@ pandocToODT :: PandocMonad m pandocToODT opts doc@(Pandoc meta _) = do let title = docTitle meta let authors = docAuthors meta - utctime <- P.getCurrentTime + utctime <- P.getTimestamp lang <- toLang (getLang opts meta) refArchive <- case writerReferenceDoc opts of diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs index cd092969b..0a7060895 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs @@ -312,7 +312,7 @@ presentationToArchive opts pres = do Nothing -> toArchive . BL.fromStrict <$> P.readDataFile "reference.pptx" - utctime <- P.getCurrentTime + utctime <- P.getTimestamp presSize <- case getPresentationSize refArchive distArchive of Just sz -> return sz -- cgit v1.2.3 From d9322629a36ad50035912ee56df876c345039225 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 12 Feb 2021 17:45:05 -0800 Subject: LaTeX reader improvements. * Rewrote `withRaw` so it doesn't rely on fragile assumptions about token positions (which break when macros are expanded). This requires the addition of `sEnableWithRaw` and `sRawTokens` in `LaTeXState`, and a new combinator `disablingWithRaw` to disable collecting of raw tokens in certain contexts. * Add `parseFromToks` to T.P.Readers.LaTeX.Parsing. * Fix parsing of single character tokens so it doesn't mess up the new raw token collecting. * These changes slightly increase allocations and have a small performance impact, but it's minor. Closes #7092. --- src/Text/Pandoc/Readers/LaTeX.hs | 6 +-- src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 84 +++++++++++++++++++++++++------- 2 files changed, 68 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 91c71c000..0a66b7f39 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1054,7 +1054,7 @@ romanNumeralArg = spaces *> (parser <|> inBraces) symbol '}' return res parser = do - Tok _ Word s <- satisfyTok isWordTok + s <- untokenize <$> many1 (satisfyTok isWordTok) let (digits, rest) = T.span isDigit s unless (T.null rest) $ Prelude.fail "Non-digits in argument to \\Rn or \\RN" @@ -2208,9 +2208,7 @@ parseTableRow envname prefsufs = do option [] (count 1 amp) return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff rawcells <- mapM celltoks prefsufs - oldInput <- getInput - cells <- mapM (\ts -> setInput ts >> parseTableCell) rawcells - setInput oldInput + cells <- mapM (parseFromToks parseTableCell) rawcells spaces return $ Row nullAttr cells diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index 12a3ba2f6..313aa6c51 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -32,6 +32,8 @@ module Text.Pandoc.Readers.LaTeX.Parsing , totoks , toksToString , satisfyTok + , parseFromToks + , disablingWithRaw , doMacros , doMacros' , setpos @@ -87,13 +89,15 @@ import Control.Monad.Trans (lift) import Data.Char (chr, isAlphaNum, isDigit, isLetter, ord) import Data.Default import Data.List (intercalate) +import qualified Data.IntMap as IntMap import qualified Data.Map as M import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad, report) -import Text.Pandoc.Error (PandocError (PandocMacroLoop)) +import Text.Pandoc.Error + (PandocError (PandocMacroLoop,PandocShouldNeverHappenError)) import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline, @@ -153,6 +157,8 @@ data LaTeXState = LaTeXState{ sOptions :: ReaderOptions , sToggles :: M.Map Text Bool , sExpanded :: Bool , sFileContents :: M.Map Text Text + , sEnableWithRaw :: Bool + , sRawTokens :: IntMap.IntMap [Tok] } deriving Show @@ -179,6 +185,8 @@ defaultLaTeXState = LaTeXState{ sOptions = def , sToggles = M.empty , sExpanded = False , sFileContents = M.empty + , sEnableWithRaw = True + , sRawTokens = IntMap.empty } instance PandocMonad m => HasQuoteContext LaTeXState m where @@ -404,11 +412,31 @@ untoken t = untokenAccum t mempty toksToString :: [Tok] -> String toksToString = T.unpack . untokenize +parseFromToks :: PandocMonad m => LP m a -> [Tok] -> LP m a +parseFromToks parser toks = do + oldInput <- getInput + setInput toks + result <- disablingWithRaw parser + setInput oldInput + return result + +disablingWithRaw :: PandocMonad m => LP m a -> LP m a +disablingWithRaw parser = do + oldEnableWithRaw <- sEnableWithRaw <$> getState + updateState $ \st -> st{ sEnableWithRaw = False } + result <- parser + updateState $ \st -> st{ sEnableWithRaw = oldEnableWithRaw } + return result + satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok satisfyTok f = do doMacros -- apply macros on remaining input stream res <- tokenPrim (T.unpack . untoken) updatePos matcher - updateState $ \st -> st{ sExpanded = False } + updateState $ \st -> st{ sExpanded = False + , sRawTokens = + if sEnableWithRaw st + then IntMap.map (res:) $ sRawTokens st + else sRawTokens st } return res where matcher t | f t = Just t | otherwise = Nothing @@ -594,18 +622,22 @@ isCommentTok _ = False anyTok :: PandocMonad m => LP m Tok anyTok = satisfyTok (const True) +singleCharTok :: PandocMonad m => LP m Tok +singleCharTok = + satisfyTok $ \case + Tok _ Word t -> T.length t == 1 + Tok _ Symbol t -> not (T.any (`Set.member` specialChars) t) + _ -> False + singleChar :: PandocMonad m => LP m Tok -singleChar = try $ do - Tok pos toktype t <- satisfyTok (tokTypeIn [Word, Symbol]) - guard $ not $ toktype == Symbol && - T.any (`Set.member` specialChars) t - if T.length t > 1 - then do - let (t1, t2) = (T.take 1 t, T.drop 1 t) - inp <- getInput - setInput $ Tok (incSourceColumn pos 1) toktype t2 : inp - return $ Tok pos toktype t1 - else return $ Tok pos toktype t +singleChar = singleCharTok <|> singleCharFromWord + where + singleCharFromWord = do + Tok pos toktype t <- disablingWithRaw $ satisfyTok isWordTok + let (t1, t2) = (T.take 1 t, T.drop 1 t) + inp <- getInput + setInput $ Tok pos toktype t1 : Tok (incSourceColumn pos 1) toktype t2 : inp + anyTok specialChars :: Set.Set Char specialChars = Set.fromList "#$%&~_^\\{}" @@ -725,11 +757,23 @@ ignore raw = do withRaw :: PandocMonad m => LP m a -> LP m (a, [Tok]) withRaw parser = do - inp <- getInput + rawTokensMap <- sRawTokens <$> getState + let key = case IntMap.lookupMax rawTokensMap of + Nothing -> 0 + Just (n,_) -> n + 1 + -- insert empty list at key + updateState $ \st -> st{ sRawTokens = + IntMap.insert key [] $ sRawTokens st } result <- parser - nxtpos <- option Nothing ((\(Tok pos' _ _) -> Just pos') <$> lookAhead anyTok) - let raw = takeWhile (\(Tok pos _ _) -> maybe True - (\p -> sourceName p /= sourceName pos || pos < p) nxtpos) inp + mbRevToks <- IntMap.lookup key . sRawTokens <$> getState + raw <- case mbRevToks of + Just revtoks -> do + updateState $ \st -> st{ sRawTokens = + IntMap.delete key $ sRawTokens st} + return $ reverse revtoks + Nothing -> + throwError $ PandocShouldNeverHappenError $ + "sRawTokens has nothing at key " <> T.pack (show key) return (result, raw) keyval :: PandocMonad m => LP m (Text, Text) @@ -794,7 +838,7 @@ getRawCommand name txt = do (_, rawargs) <- withRaw $ case name of "write" -> do - void $ satisfyTok isWordTok -- digits + void $ many $ satisfyTok isDigitTok -- digits void braced "titleformat" -> do void braced @@ -809,6 +853,10 @@ getRawCommand name txt = do void $ many braced return $ txt <> untokenize rawargs +isDigitTok :: Tok -> Bool +isDigitTok (Tok _ Word t) = T.all isDigit t +isDigitTok _ = False + skipopts :: PandocMonad m => LP m () skipopts = skipMany (void overlaySpecification <|> void rawopt) -- cgit v1.2.3 From eb0c63b00263263e3dc0f74c0bf19e93c7afeb13 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 12 Feb 2021 19:29:48 -0800 Subject: Avoid an unnecessary withRaw. --- src/Text/Pandoc/Readers/LaTeX.hs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 0a66b7f39..2a949cbdc 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1869,9 +1869,12 @@ rawEnv name = do pos1 <- getPosition (bs, raw) <- withRaw $ env name blocks if parseRaw - then return $ rawBlock "latex" + then do + (_, raw) <- withRaw $ env name blocks + return $ rawBlock "latex" $ beginCommand <> untokenize raw else do + bs <- env name blocks report $ SkippedContent beginCommand pos1 pos2 <- getPosition report $ SkippedContent ("\\end{" <> name <> "}") pos2 -- cgit v1.2.3 From 25b7df7c2a1798b5861b7ddc084e74b926299630 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 13 Feb 2021 00:18:40 -0800 Subject: Remove Ext_fenced_code_attributes from allowed commonmark attributes. This attribute was listed as allowed, but it didn't actually do anything. Use `attributes` for code attributes and more. Closes #7097. --- src/Text/Pandoc/Extensions.hs | 2 -- 1 file changed, 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 7aa32c52c..82eb0e957 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -378,7 +378,6 @@ getDefaultExtensions "commonmark_x" = extensionsFromList , Ext_raw_attribute , Ext_implicit_header_references , Ext_attributes - , Ext_fenced_code_attributes ] getDefaultExtensions "org" = extensionsFromList [Ext_citations, @@ -509,7 +508,6 @@ getAllExtensions f = universalExtensions <> getAll f , Ext_raw_attribute , Ext_implicit_header_references , Ext_attributes - , Ext_fenced_code_attributes , Ext_sourcepos ] getAll "commonmark_x" = getAll "commonmark" -- cgit v1.2.3 From 6323250bad5e0bf9ef0ec007e16c869d99828080 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 13 Feb 2021 00:22:22 -0800 Subject: LaTeX reader: remove unnecessary line --- src/Text/Pandoc/Readers/LaTeX.hs | 1 - 1 file changed, 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 2a949cbdc..7d8dfab0e 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1867,7 +1867,6 @@ rawEnv name = do rawOptions <- mconcat <$> many rawopt let beginCommand = "\\begin{" <> name <> "}" <> rawOptions pos1 <- getPosition - (bs, raw) <- withRaw $ env name blocks if parseRaw then do (_, raw) <- withRaw $ env name blocks -- cgit v1.2.3 From 2d60a5127cc28bb6b55c19309d6e8fb6e81fbe66 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Sat, 13 Feb 2021 09:36:19 +0100 Subject: T.P.Shared: export `handleTaskListItem`. [API change] --- src/Text/Pandoc/Shared.hs | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index b908a0172..6d5d4c97d 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -70,6 +70,7 @@ module Text.Pandoc.Shared ( isTightList, taskListItemFromAscii, taskListItemToAscii, + handleTaskListItem, addMetaField, makeMeta, eastAsianLineBreakFilter, -- cgit v1.2.3 From a3beed9db874517fa57b55380658f4e019e809b2 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Sat, 13 Feb 2021 09:37:43 +0100 Subject: Org: support task_lists extension The tasks lists extension is now supported by the org reader and writer; the extension is turned on by default. Closes: #6336 --- src/Text/Pandoc/Extensions.hs | 2 ++ src/Text/Pandoc/Readers/Org/Blocks.hs | 41 +++++++++++++++++++++++-- src/Text/Pandoc/Writers/Org.hs | 16 ++++++++-- test/Tests/Readers/Org/Block/List.hs | 13 ++++++++ test/Tests/Writers/Org.hs | 57 ++++++++++++++++++++++++++++------- 5 files changed, 113 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 82eb0e957..3b96f9e04 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -381,6 +381,7 @@ getDefaultExtensions "commonmark_x" = extensionsFromList ] getDefaultExtensions "org" = extensionsFromList [Ext_citations, + Ext_task_lists, Ext_auto_identifiers] getDefaultExtensions "html" = extensionsFromList [Ext_auto_identifiers, @@ -515,6 +516,7 @@ getAllExtensions f = universalExtensions <> getAll f extensionsFromList [ Ext_citations , Ext_smart + , Ext_task_lists ] getAll "html" = autoIdExtensions <> extensionsFromList diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index 6bd046e04..d1aff701e 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -1,6 +1,7 @@ {-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} {- | Module : Text.Pandoc.Readers.Org.Blocks Copyright : Copyright (C) 2014-2021 Albert Krewinkel @@ -850,16 +851,52 @@ definitionListItem parseIndentedMarker = try $ do definitionMarker = spaceChar *> string "::" <* (spaceChar <|> lookAhead newline) +-- | Checkbox for tasks. +data Checkbox + = UncheckedBox + | CheckedBox + | SemicheckedBox + +-- | Parses a checkbox in a plain list. +checkbox :: PandocMonad m + => OrgParser m Checkbox +checkbox = do + guardEnabled Ext_task_lists + try (char '[' *> status <* char ']') <?> "checkbox" + where + status = choice + [ UncheckedBox <$ char ' ' + , CheckedBox <$ char 'X' + , SemicheckedBox <$ char '-' + ] + +checkboxToInlines :: Checkbox -> Inline +checkboxToInlines = B.Str . \case + UncheckedBox -> "☐" + SemicheckedBox -> "☐" + CheckedBox -> "☒" + -- | parse raw text for one list item listItem :: PandocMonad m => OrgParser m Int -> OrgParser m (F Blocks) listItem parseIndentedMarker = try . withContext ListItemState $ do markerLength <- try parseIndentedMarker + box <- optionMaybe checkbox firstLine <- anyLineNewline blank <- option "" ("\n" <$ blankline) rest <- T.concat <$> many (listContinuation markerLength) - parseFromString blocks $ firstLine <> blank <> rest + contents <- parseFromString blocks $ firstLine <> blank <> rest + return (maybe id (prependInlines . checkboxToInlines) box <$> contents) + +-- | Prepend inlines to blocks, adding them to the first paragraph or +-- creating a new Plain element if necessary. +prependInlines :: Inline -> Blocks -> Blocks +prependInlines inlns = B.fromList . prepend . B.toList + where + prepend (Plain is : bs) = Plain (inlns : Space : is) : bs + prepend (Para is : bs) = Para (inlns : Space : is) : bs + prepend bs = Plain [inlns, Space] : bs -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 43ebf1807..8dfc2749c 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -213,25 +213,35 @@ blockToOrg (DefinitionList items) = do -- | Convert bullet list item (list of blocks) to Org. bulletListItemToOrg :: PandocMonad m => [Block] -> Org m (Doc Text) bulletListItemToOrg items = do - contents <- blockListToOrg items + exts <- gets $ writerExtensions . stOptions + contents <- blockListToOrg (taskListItemToOrg exts items) return $ hang 2 "- " contents $$ if endsWithPlain items then cr else blankline - -- | Convert ordered list item (a list of blocks) to Org. orderedListItemToOrg :: PandocMonad m => Text -- ^ marker for list item -> [Block] -- ^ list item (list of blocks) -> Org m (Doc Text) orderedListItemToOrg marker items = do - contents <- blockListToOrg items + exts <- gets $ writerExtensions . stOptions + contents <- blockListToOrg (taskListItemToOrg exts items) return $ hang (T.length marker + 1) (literal marker <> space) contents $$ if endsWithPlain items then cr else blankline +-- | Convert a list item containing text starting with @U+2610 BALLOT BOX@ +-- or @U+2612 BALLOT BOX WITH X@ to org checkbox syntax (e.g. @[X]@). +taskListItemToOrg :: Extensions -> [Block] -> [Block] +taskListItemToOrg = handleTaskListItem toOrg + where + toOrg (Str "☐" : Space : is) = Str "[ ]" : Space : is + toOrg (Str "☒" : Space : is) = Str "[X]" : Space : is + toOrg is = is + -- | Convert definition list item (label, list of blocks) to Org. definitionListItemToOrg :: PandocMonad m => ([Inline], [[Block]]) -> Org m (Doc Text) diff --git a/test/Tests/Readers/Org/Block/List.hs b/test/Tests/Readers/Org/Block/List.hs index 9686b5148..2ee37081e 100644 --- a/test/Tests/Readers/Org/Block/List.hs +++ b/test/Tests/Readers/Org/Block/List.hs @@ -118,6 +118,19 @@ tests = ] =?> bulletList [ plain "", plain "" ] + , "Task list" =: + T.unlines [ "- [ ] nope" + , "- [X] yup" + , "- [-] started" + , " 1. [X] sure" + , " 2. [ ] nuh-uh" + ] =?> + bulletList [ plain "☐ nope", plain "☒ yup" + , mconcat [ plain "☐ started" + , orderedList [plain "☒ sure", plain "☐ nuh-uh"] + ] + ] + , "Simple Ordered List" =: ("1. Item1\n" <> "2. Item2\n") =?> diff --git a/test/Tests/Writers/Org.hs b/test/Tests/Writers/Org.hs index 9cbe360da..bd6c9b7ab 100644 --- a/test/Tests/Writers/Org.hs +++ b/test/Tests/Writers/Org.hs @@ -1,6 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.Org (tests) where +import Data.Text as T import Test.Tasty import Tests.Helpers import Text.Pandoc @@ -9,17 +10,51 @@ import Text.Pandoc.Builder infix 4 =: (=:) :: (ToString a, ToPandoc a) - => String -> (a, String) -> TestTree -(=:) = test (purely (writeOrg def . toPandoc)) + => String -> (a, Text) -> TestTree +(=:) = test org + +defopts :: WriterOptions +defopts = def + { writerExtensions = getDefaultExtensions "org" + } + +org :: (ToPandoc a) => a -> Text +org = orgWithOpts defopts + +orgWithOpts :: (ToPandoc a) => WriterOptions -> a -> Text +orgWithOpts opts x = purely (writeOrg opts) $ toPandoc x + tests :: [TestTree] -tests = [ testGroup "links" - -- See http://orgmode.org/manual/Internal-links.html#Internal-links - [ "simple link" - =: link "/url" "" "foo" - =?> "[[/url][foo]]" - , "internal link to anchor" - =: link "#my-custom-id" "" "#my-custom-id" - =?> "[[#my-custom-id]]" +tests = + [ testGroup "links" + -- See http://orgmode.org/manual/Internal-links.html#Internal-links + [ "simple link" + =: link "/url" "" "foo" + =?> "[[/url][foo]]" + , "internal link to anchor" + =: link "#my-custom-id" "" "#my-custom-id" + =?> "[[#my-custom-id]]" + ] + + , testGroup "lists" + [ "bullet task list" + =: bulletList [plain "☐ a", plain "☒ b"] + =?> T.unlines + [ "- [ ] a" + , "- [X] b" + ] + , "ordered task list" + =: orderedList [plain ("☐" <> space <> "a"), plain "☒ b"] + =?> T.unlines + [ "1. [ ] a" + , "2. [X] b" + ] + , test (orgWithOpts def) "bullet without task_lists" $ + bulletList [plain "☐ a", plain "☒ b"] + =?> T.unlines + [ "- ☐ a" + , "- ☒ b" ] - ] + ] + ] -- cgit v1.2.3 From 6e73273916a55448c1a12ece343454ef139648a8 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 13 Feb 2021 13:05:17 -0800 Subject: T.P.Error: export `renderError`. Refactor `handleError` to use `renderError`. This allows us render error messages without exiting. --- src/Text/Pandoc/Error.hs | 105 ++++++++++++++++++++++++++++++++--------------- test/command/7099.md | 8 ++++ 2 files changed, 80 insertions(+), 33 deletions(-) create mode 100644 test/command/7099.md (limited to 'src') diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 831405f42..50ad3c0e3 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -16,9 +16,10 @@ encountered during parsing. -} module Text.Pandoc.Error ( PandocError(..), + renderError, handleError) where -import Control.Exception (Exception) +import Control.Exception (Exception, displayException) import Data.Typeable (Typeable) import Data.Word (Word8) import Data.Text (Text) @@ -68,19 +69,17 @@ data PandocError = PandocIOError Text IOError instance Exception PandocError --- | Handle PandocError by exiting with an error message. -handleError :: Either PandocError a -> IO a -handleError (Right r) = return r -handleError (Left e) = +renderError :: PandocError -> Text +renderError e = case e of - PandocIOError _ err' -> ioError err' - PandocHttpError u err' -> err 61 $ + PandocIOError _ err' -> T.pack $ displayException err' + PandocHttpError u err' -> "Could not fetch " <> u <> "\n" <> tshow err' - PandocShouldNeverHappenError s -> err 62 $ + PandocShouldNeverHappenError s -> "Something we thought was impossible happened!\n" <> "Please report this to pandoc's developers: " <> s - PandocSomeError s -> err 63 s - PandocParseError s -> err 64 s + PandocSomeError s -> s + PandocParseError s -> s PandocParsecError input err' -> let errPos = errorPos err' errLine = sourceLine errPos @@ -91,41 +90,41 @@ handleError (Left e) = ,"\n", T.replicate (errColumn - 1) " " ,"^"] else "" - in err 65 $ "\nError at " <> tshow err' <> + in "\nError at " <> tshow err' <> -- if error comes from a chunk or included file, -- then we won't get the right text this way: if sourceName errPos == "source" then errorInFile else "" - PandocMakePDFError s -> err 66 s - PandocOptionError s -> err 6 s - PandocSyntaxMapError s -> err 67 s - PandocFailOnWarningError -> err 3 "Failing because there were warnings." - PandocPDFProgramNotFoundError pdfprog -> err 47 $ + PandocMakePDFError s -> s + PandocOptionError s -> s + PandocSyntaxMapError s -> s + PandocFailOnWarningError -> "Failing because there were warnings." + PandocPDFProgramNotFoundError pdfprog -> pdfprog <> " not found. Please select a different --pdf-engine or install " <> pdfprog - PandocPDFError logmsg -> err 43 $ "Error producing PDF.\n" <> logmsg - PandocXMLError fp logmsg -> err 44 $ "Invalid XML" <> + PandocPDFError logmsg -> "Error producing PDF.\n" <> logmsg + PandocXMLError fp logmsg -> "Invalid XML" <> (if T.null fp then "" else " in " <> fp) <> ":\n" <> logmsg - PandocFilterError filtername msg -> err 83 $ "Error running filter " <> + PandocFilterError filtername msg -> "Error running filter " <> filtername <> ":\n" <> msg - PandocLuaError msg -> err 84 $ "Error running Lua:\n" <> msg - PandocCouldNotFindDataFileError fn -> err 97 $ + PandocLuaError msg -> "Error running Lua:\n" <> msg + PandocCouldNotFindDataFileError fn -> "Could not find data file " <> fn - PandocResourceNotFound fn -> err 99 $ + PandocResourceNotFound fn -> "File " <> fn <> " not found in resource path" - PandocTemplateError s -> err 5 $ "Error compiling template " <> s - PandocAppError s -> err 4 s - PandocEpubSubdirectoryError s -> err 31 $ + PandocTemplateError s -> "Error compiling template " <> s + PandocAppError s -> s + PandocEpubSubdirectoryError s -> "EPUB subdirectory name '" <> s <> "' contains illegal characters" - PandocMacroLoop s -> err 91 $ + PandocMacroLoop s -> "Loop encountered in expanding macro " <> s - PandocUTF8DecodingError f offset w -> err 92 $ + PandocUTF8DecodingError f offset w -> "UTF-8 decoding error in " <> f <> " at byte offset " <> tshow offset <> " (" <> T.pack (printf "%2x" w) <> ").\n" <> "The input must be a UTF-8 encoded text." - PandocIpynbDecodingError w -> err 93 $ + PandocIpynbDecodingError w -> "ipynb decoding error: " <> w - PandocUnknownReaderError r -> err 21 $ + PandocUnknownReaderError r -> "Unknown input format " <> r <> case r of "doc" -> "\nPandoc can convert from DOCX, but not from DOC." <> @@ -133,7 +132,7 @@ handleError (Left e) = " and convert that with pandoc." "pdf" -> "\nPandoc can convert to PDF, but not from PDF." _ -> "" - PandocUnknownWriterError w -> err 22 $ + PandocUnknownWriterError w -> "Unknown output format " <> w <> case w of "pdf" -> "To create a pdf using pandoc, use" <> @@ -142,14 +141,54 @@ handleError (Left e) = ".pdf extension (-o filename.pdf)." "doc" -> "\nPandoc can convert to DOCX, but not to DOC." _ -> "" - PandocUnsupportedExtensionError ext f -> err 23 $ + PandocUnsupportedExtensionError ext f -> "The extension " <> ext <> " is not supported " <> "for " <> f - PandocCiteprocError e' -> err 24 $ + PandocCiteprocError e' -> prettyCiteprocError e' - PandocBibliographyError fp msg -> err 25 $ + PandocBibliographyError fp msg -> "Error reading bibliography file " <> fp <> ":\n" <> msg + +-- | Handle PandocError by exiting with an error message. +handleError :: Either PandocError a -> IO a +handleError (Right r) = return r +handleError (Left e) = + case e of + PandocIOError _ err' -> ioError err' + _ -> err exitCode (renderError e) + where + exitCode = + case e of + PandocIOError{} -> 1 + PandocHttpError{} -> 61 + PandocShouldNeverHappenError{} -> 62 + PandocSomeError{} -> 63 + PandocParseError{} -> 64 + PandocParsecError{} -> 65 + PandocMakePDFError{} -> 66 + PandocOptionError{} -> 6 + PandocSyntaxMapError{} -> 67 + PandocFailOnWarningError{} -> 3 + PandocPDFProgramNotFoundError{} -> 47 + PandocPDFError{} -> 43 + PandocXMLError{} -> 44 + PandocFilterError{} -> 83 + PandocLuaError{} -> 84 + PandocCouldNotFindDataFileError{} -> 97 + PandocResourceNotFound fn -> 99 + PandocTemplateError{} -> 5 + PandocAppError{} -> 4 + PandocEpubSubdirectoryError{} -> 31 + PandocMacroLoop s -> 91 + PandocUTF8DecodingError{} -> 92 + PandocIpynbDecodingError{} -> 93 + PandocUnknownReaderError{} -> 21 + PandocUnknownWriterError{} -> 22 + PandocUnsupportedExtensionError{} -> 23 + PandocCiteprocError{} -> 24 + PandocBibliographyError{} -> 25 + err :: Int -> Text -> IO a err exitCode msg = do UTF8.hPutStrLn stderr (T.unpack msg) diff --git a/test/command/7099.md b/test/command/7099.md new file mode 100644 index 000000000..d9ff8e5ff --- /dev/null +++ b/test/command/7099.md @@ -0,0 +1,8 @@ +``` +% pandoc -f html -t native --verbose +<iframe src=""></iframe> +^D +[INFO] Fetching ... +[INFO] Skipped '<iframe src></iframe>' at input line 1 column 1 +[] +``` -- cgit v1.2.3 From d84a6041e12547331f3a252ea4e8b4d229ba1159 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 13 Feb 2021 13:06:22 -0800 Subject: HTML reader: fix bad handling of empty src attribute in iframe. - If src is empty, we simply skip the iframe. - If src is invalid or cannot be fetched, we issue a warning and skip instead of failing with an error. - Closes #7099. --- src/Text/Pandoc/Readers/HTML.hs | 18 ++++++++++++------ test/command/7099.md | 14 ++++++++++++-- 2 files changed, 24 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index afc7a3e25..cc60b5501 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -26,7 +26,7 @@ module Text.Pandoc.Readers.HTML ( readHtml import Control.Applicative ((<|>)) import Control.Monad (guard, msum, mzero, unless, void) -import Control.Monad.Except (throwError) +import Control.Monad.Except (throwError, catchError) import Control.Monad.Reader (ask, asks, lift, local, runReaderT) import Data.ByteString.Base64 (encode) import Data.Char (isAlphaNum, isLetter) @@ -393,11 +393,17 @@ pIframe = try $ do tag <- pSatisfy (tagOpen (=="iframe") (isJust . lookup "src")) pCloses "iframe" <|> eof url <- canonicalizeUrl $ fromAttrib "src" tag - (bs, _) <- openURL url - let inp = UTF8.toText bs - opts <- readerOpts <$> getState - Pandoc _ contents <- readHtml opts inp - return $ B.divWith ("",["iframe"],[]) $ B.fromList contents + if T.null url + then ignore $ renderTags' [tag, TagClose "iframe"] + else catchError + (do (bs, _) <- openURL url + let inp = UTF8.toText bs + opts <- readerOpts <$> getState + Pandoc _ contents <- readHtml opts inp + return $ B.divWith ("",["iframe"],[]) $ B.fromList contents) + (\e -> do + logMessage $ CouldNotFetchResource url (renderError e) + ignore $ renderTags' [tag, TagClose "iframe"]) pRawHtmlBlock :: PandocMonad m => TagParser m Blocks pRawHtmlBlock = do diff --git a/test/command/7099.md b/test/command/7099.md index d9ff8e5ff..33ac8aea1 100644 --- a/test/command/7099.md +++ b/test/command/7099.md @@ -2,7 +2,17 @@ % pandoc -f html -t native --verbose <iframe src=""></iframe> ^D -[INFO] Fetching ... -[INFO] Skipped '<iframe src></iframe>' at input line 1 column 1 +[INFO] Skipped '<iframe src></iframe>' at input line 1 column 16 +[] +``` + +``` +% pandoc -f html -t native --verbose +<iframe src="h:invalid@url"></iframe> +^D +[INFO] Fetching h:invalid@url... +[WARNING] Could not fetch resource 'h:invalid@url': Could not fetch h:invalid@url + InvalidUrlException "h:invalid@url" "Invalid scheme" +[INFO] Skipped '<iframe src="h:invalid@url"></iframe>' at input line 1 column 29 [] ``` -- cgit v1.2.3 From 8621ed600af6bdc984fe2095dac400887c3cda78 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Sun, 14 Feb 2021 15:49:12 +0100 Subject: T.P.Error: remove unused variables --- src/Text/Pandoc/Error.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 50ad3c0e3..94c013cdb 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -176,11 +176,11 @@ handleError (Left e) = PandocFilterError{} -> 83 PandocLuaError{} -> 84 PandocCouldNotFindDataFileError{} -> 97 - PandocResourceNotFound fn -> 99 + PandocResourceNotFound{} -> 99 PandocTemplateError{} -> 5 PandocAppError{} -> 4 PandocEpubSubdirectoryError{} -> 31 - PandocMacroLoop s -> 91 + PandocMacroLoop{} -> 91 PandocUTF8DecodingError{} -> 92 PandocIpynbDecodingError{} -> 93 PandocUnknownReaderError{} -> 21 -- cgit v1.2.3 From 967e7f5fb990b29de48b37be1db40fb149a8cf55 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 14 Feb 2021 22:29:21 -0800 Subject: Rename Text.Pandoc.XMLParser -> Text.Pandoc.XML.Light... ..and add new definitions isomorphic to xml-light's, but with Text instead of String. This allows us to keep most of the code in existing readers that use xml-light, but avoid lots of unnecessary allocation. We also add versions of the functions from xml-light's Text.XML.Light.Output and Text.XML.Light.Proc that operate on our modified XML types, and functions that convert xml-light types to our types (since some of our dependencies, like texmath, use xml-light). Update golden tests for docx and pptx. OOXML test: Use `showContent` instead of `ppContent` in `displayDiff`. Docx: Do a manual traversal to unwrap sdt and smartTag. This is faster, and needed to pass the tests. Benchmarks: A = prior to 8ca191604dcd13af27c11d2da225da646ebce6fc (Feb 8) B = as of 8ca191604dcd13af27c11d2da225da646ebce6fc (Feb 8) C = this commit | Reader | A | B | C | | ------- | ----- | ------ | ----- | | docbook | 18 ms | 12 ms | 10 ms | | opml | 65 ms | 62 ms | 35 ms | | jats | 15 ms | 11 ms | 9 ms | | docx | 72 ms | 69 ms | 44 ms | | odt | 78 ms | 41 ms | 28 ms | | epub | 64 ms | 61 ms | 56 ms | | fb2 | 14 ms | 5 ms | 4 ms | --- .hlint.yaml | 1 + pandoc.cabal | 2 +- src/Text/Pandoc/ImageSize.hs | 9 +- src/Text/Pandoc/Readers/DocBook.hs | 76 +-- src/Text/Pandoc/Readers/Docx/Parse.hs | 163 +++--- src/Text/Pandoc/Readers/Docx/Parse/Styles.hs | 31 +- src/Text/Pandoc/Readers/Docx/Util.hs | 27 +- src/Text/Pandoc/Readers/EPUB.hs | 65 +-- src/Text/Pandoc/Readers/FB2.hs | 93 ++-- src/Text/Pandoc/Readers/JATS.hs | 58 +- src/Text/Pandoc/Readers/OPML.hs | 29 +- src/Text/Pandoc/Readers/Odt.hs | 5 +- src/Text/Pandoc/Readers/Odt/ContentReader.hs | 13 +- src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs | 3 +- src/Text/Pandoc/Readers/Odt/Generic/Utils.hs | 33 +- .../Pandoc/Readers/Odt/Generic/XMLConverter.hs | 23 +- src/Text/Pandoc/Readers/Odt/Namespaces.hs | 11 +- src/Text/Pandoc/Readers/Odt/StyleReader.hs | 23 +- src/Text/Pandoc/Writers/Docx.hs | 263 +++++---- src/Text/Pandoc/Writers/EPUB.hs | 356 ++++++------- src/Text/Pandoc/Writers/FB2.hs | 127 ++--- src/Text/Pandoc/Writers/ODT.hs | 16 +- src/Text/Pandoc/Writers/OOXML.hs | 35 +- src/Text/Pandoc/Writers/Powerpoint/Output.hs | 201 +++---- src/Text/Pandoc/XML/Light.hs | 586 +++++++++++++++++++++ src/Text/Pandoc/XMLParser.hs | 66 --- test/Tests/Writers/OOXML.hs | 3 +- test/docx/golden/block_quotes.docx | Bin 10098 -> 10071 bytes test/docx/golden/codeblock.docx | Bin 9950 -> 9920 bytes test/docx/golden/comments.docx | Bin 10285 -> 10258 bytes test/docx/golden/custom_style_no_reference.docx | Bin 10048 -> 10021 bytes test/docx/golden/custom_style_preserve.docx | Bin 10673 -> 10650 bytes test/docx/golden/custom_style_reference.docx | Bin 12434 -> 12403 bytes test/docx/golden/definition_list.docx | Bin 9947 -> 9920 bytes .../golden/document-properties-short-desc.docx | Bin 9953 -> 9925 bytes test/docx/golden/document-properties.docx | Bin 10429 -> 10404 bytes test/docx/golden/headers.docx | Bin 10086 -> 10059 bytes test/docx/golden/image.docx | Bin 26764 -> 26736 bytes test/docx/golden/inline_code.docx | Bin 9886 -> 9859 bytes test/docx/golden/inline_formatting.docx | Bin 10066 -> 10038 bytes test/docx/golden/inline_images.docx | Bin 26822 -> 26793 bytes test/docx/golden/link_in_notes.docx | Bin 10107 -> 10081 bytes test/docx/golden/links.docx | Bin 10282 -> 10251 bytes test/docx/golden/lists.docx | Bin 10358 -> 10332 bytes test/docx/golden/lists_continuing.docx | Bin 10149 -> 10123 bytes test/docx/golden/lists_multiple_initial.docx | Bin 10238 -> 10210 bytes test/docx/golden/lists_restarting.docx | Bin 10150 -> 10122 bytes test/docx/golden/nested_anchors_in_header.docx | Bin 10245 -> 10216 bytes test/docx/golden/notes.docx | Bin 10052 -> 10028 bytes test/docx/golden/raw-blocks.docx | Bin 9986 -> 9960 bytes test/docx/golden/raw-bookmarks.docx | Bin 10121 -> 10094 bytes test/docx/golden/table_one_row.docx | Bin 9938 -> 9908 bytes test/docx/golden/table_with_list_cell.docx | Bin 10255 -> 10227 bytes test/docx/golden/tables.docx | Bin 10272 -> 10244 bytes test/docx/golden/track_changes_deletion.docx | Bin 9930 -> 9903 bytes test/docx/golden/track_changes_insertion.docx | Bin 9913 -> 9886 bytes test/docx/golden/track_changes_move.docx | Bin 9947 -> 9920 bytes .../golden/track_changes_scrubbed_metadata.docx | Bin 10059 -> 10032 bytes test/docx/golden/unicode.docx | Bin 9871 -> 9845 bytes test/docx/golden/verbatim_subsuper.docx | Bin 9919 -> 9892 bytes test/pptx/code-custom.pptx | Bin 28221 -> 28184 bytes test/pptx/code-custom_templated.pptx | Bin 395516 -> 395477 bytes test/pptx/code.pptx | Bin 28220 -> 28183 bytes test/pptx/code_templated.pptx | Bin 395514 -> 395477 bytes test/pptx/document-properties-short-desc.pptx | Bin 27004 -> 26967 bytes .../document-properties-short-desc_templated.pptx | Bin 394288 -> 394253 bytes test/pptx/document-properties.pptx | Bin 27408 -> 27375 bytes test/pptx/document-properties_templated.pptx | Bin 394691 -> 394656 bytes test/pptx/endnotes.pptx | Bin 26962 -> 26928 bytes test/pptx/endnotes_templated.pptx | Bin 394253 -> 394219 bytes test/pptx/endnotes_toc.pptx | Bin 27789 -> 27747 bytes test/pptx/endnotes_toc_templated.pptx | Bin 395083 -> 395041 bytes test/pptx/images.pptx | Bin 44619 -> 44579 bytes test/pptx/images_templated.pptx | Bin 411909 -> 411870 bytes test/pptx/inline_formatting.pptx | Bin 26148 -> 26121 bytes test/pptx/inline_formatting_templated.pptx | Bin 393438 -> 393412 bytes test/pptx/lists.pptx | Bin 27049 -> 27015 bytes test/pptx/lists_templated.pptx | Bin 394340 -> 394307 bytes test/pptx/raw_ooxml.pptx | Bin 26940 -> 26908 bytes test/pptx/raw_ooxml_templated.pptx | Bin 394231 -> 394198 bytes test/pptx/remove_empty_slides.pptx | Bin 44065 -> 44025 bytes test/pptx/remove_empty_slides_templated.pptx | Bin 411352 -> 411311 bytes test/pptx/slide_breaks.pptx | Bin 28575 -> 28531 bytes test/pptx/slide_breaks_slide_level_1.pptx | Bin 27744 -> 27705 bytes .../pptx/slide_breaks_slide_level_1_templated.pptx | Bin 395038 -> 395000 bytes test/pptx/slide_breaks_templated.pptx | Bin 395868 -> 395825 bytes test/pptx/slide_breaks_toc.pptx | Bin 29532 -> 29481 bytes test/pptx/slide_breaks_toc_templated.pptx | Bin 396826 -> 396776 bytes test/pptx/speaker_notes.pptx | Bin 35436 -> 35360 bytes test/pptx/speaker_notes_after_metadata.pptx | Bin 31675 -> 31636 bytes .../speaker_notes_after_metadata_templated.pptx | Bin 398955 -> 398915 bytes test/pptx/speaker_notes_afterheader.pptx | Bin 30691 -> 30657 bytes test/pptx/speaker_notes_afterheader_templated.pptx | Bin 397979 -> 397943 bytes test/pptx/speaker_notes_afterseps.pptx | Bin 51604 -> 51548 bytes test/pptx/speaker_notes_afterseps_templated.pptx | Bin 418896 -> 418834 bytes test/pptx/speaker_notes_templated.pptx | Bin 402728 -> 402650 bytes test/pptx/start_numbering_at.pptx | Bin 27023 -> 26991 bytes test/pptx/start_numbering_at_templated.pptx | Bin 394314 -> 394283 bytes test/pptx/tables.pptx | Bin 27566 -> 27532 bytes test/pptx/tables_templated.pptx | Bin 394859 -> 394827 bytes test/pptx/two_column.pptx | Bin 26065 -> 26038 bytes test/pptx/two_column_templated.pptx | Bin 393355 -> 393327 bytes 102 files changed, 1388 insertions(+), 930 deletions(-) create mode 100644 src/Text/Pandoc/XML/Light.hs delete mode 100644 src/Text/Pandoc/XMLParser.hs (limited to 'src') diff --git a/.hlint.yaml b/.hlint.yaml index d5ebffd34..e482b2b37 100644 --- a/.hlint.yaml +++ b/.hlint.yaml @@ -14,6 +14,7 @@ - ignore: {name: "Reduce duplication"} # TODO: could be more fine-grained - ignore: {name: "Use &&&"} - ignore: {name: "Use String"} +- ignore: {name: "Use camelCase"} - ignore: {name: "Use fmap"} # specific for GHC 7.8 compat - ignore: {name: "Use isDigit"} diff --git a/pandoc.cabal b/pandoc.cabal index 3c7063f6c..22aebd55e 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -687,7 +687,7 @@ library Text.Pandoc.Lua.PandocLua, Text.Pandoc.Lua.Util, Text.Pandoc.Lua.Walk, - Text.Pandoc.XMLParser, + Text.Pandoc.XML.Light, Text.Pandoc.CSS, Text.Pandoc.CSV, Text.Pandoc.RoffChar, diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index e0a1af8e8..bb1aa6351 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -44,8 +44,7 @@ import Numeric (showFFloat) import Text.Pandoc.Definition import Text.Pandoc.Options import qualified Text.Pandoc.UTF8 as UTF8 -import qualified Text.XML.Light as Xml -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light hiding (Attr) import qualified Data.Text as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Encoding as TE @@ -332,12 +331,12 @@ svgSize opts img = do doc <- either (const mzero) return $ parseXMLElement $ TL.fromStrict $ UTF8.toText img let viewboxSize = do - vb <- Xml.findAttrBy (== Xml.QName "viewBox" Nothing Nothing) doc - [_,_,w,h] <- mapM safeRead (T.words (T.pack vb)) + vb <- findAttrBy (== QName "viewBox" Nothing Nothing) doc + [_,_,w,h] <- mapM safeRead (T.words vb) return (w,h) let dpi = fromIntegral $ writerDpi opts let dirToInt dir = do - dim <- Xml.findAttrBy (== Xml.QName dir Nothing Nothing) doc >>= lengthToDim . T.pack + dim <- findAttrBy (== QName dir Nothing Nothing) doc >>= lengthToDim return $ inPixel opts dim w <- dirToInt "width" <|> (fst <$> viewboxSize) h <- dirToInt "height" <|> (snd <$> viewboxSize) diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index ad0108843..e201b54fe 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -31,8 +31,7 @@ import Text.Pandoc.Options import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces) import Text.TeXMath (readMathML, writeTeX) -import Text.XML.Light -import Text.Pandoc.XMLParser (parseXMLContents) +import Text.Pandoc.XML.Light {- @@ -578,26 +577,27 @@ normalizeTree = everywhere (mkT go) where go :: [Content] -> [Content] go (Text (CData CDataRaw _ _):xs) = xs go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) = - Text (CData CDataText (s1 ++ s2) z):xs + Text (CData CDataText (s1 <> s2) z):xs go (Text (CData CDataText s1 z):CRef r:xs) = - Text (CData CDataText (s1 ++ convertEntity r) z):xs + Text (CData CDataText (s1 <> convertEntity r) z):xs go (CRef r:Text (CData CDataText s1 z):xs) = - Text (CData CDataText (convertEntity r ++ s1) z):xs + Text (CData CDataText (convertEntity r <> s1) z):xs go (CRef r1:CRef r2:xs) = - Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs + Text (CData CDataText (convertEntity r1 <> + convertEntity r2) Nothing):xs go xs = xs -convertEntity :: String -> String -convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) +convertEntity :: Text -> Text +convertEntity e = maybe (T.map toUpper e) T.pack (lookupEntity $ T.unpack e) -- convenience function to get an attribute value, defaulting to "" -attrValue :: String -> Element -> Text +attrValue :: Text -> Element -> Text attrValue attr elt = - maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) + fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) -- convenience function named :: Text -> Element -> Bool -named s e = qName (elName e) == T.unpack s +named s e = qName (elName e) == s -- @@ -634,7 +634,7 @@ isBlockElement :: Content -> Bool isBlockElement (Elem e) = qName (elName e) `elem` blockTags isBlockElement _ = False -blockTags :: [String] +blockTags :: [Text] blockTags = [ "abstract" , "ackno" @@ -721,7 +721,7 @@ blockTags = , "variablelist" ] ++ admonitionTags -admonitionTags :: [String] +admonitionTags :: [Text] admonitionTags = ["important","caution","note","tip","warning"] -- Trim leading and trailing newline characters @@ -779,10 +779,10 @@ getBlocks e = mconcat <$> parseBlock :: PandocMonad m => Content -> DB m Blocks parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE -parseBlock (Text (CData _ s _)) = if all isSpace s +parseBlock (Text (CData _ s _)) = if T.all isSpace s then return mempty - else return $ plain $ trimInlines $ text $ T.pack s -parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x + else return $ plain $ trimInlines $ text s +parseBlock (CRef x) = return $ plain $ str $ T.toUpper x parseBlock (Elem e) = case qName (elName e) of "toc" -> skip -- skip TOC, since in pandoc it's autogenerated @@ -837,7 +837,7 @@ parseBlock (Elem e) = "refsect2" -> sect 2 "refsect3" -> sect 3 "refsection" -> gets dbSectionLevel >>= sect . (+1) - l | l `elem` admonitionTags -> parseAdmonition $ T.pack l + l | l `elem` admonitionTags -> parseAdmonition l "area" -> skip "areaset" -> skip "areaspec" -> skip @@ -899,7 +899,7 @@ parseBlock (Elem e) = "subtitle" -> return mempty -- handled in parent element _ -> skip >> getBlocks e where skip = do - let qn = T.pack $ qName $ elName e + let qn = qName $ elName e let name = if "pi-" `T.isPrefixOf` qn then "<?" <> qn <> "?>" else qn @@ -911,7 +911,7 @@ parseBlock (Elem e) = "" -> [] x -> [x] return $ codeBlockWith (attrValue "id" e, classes', []) - $ trimNl $ T.pack $ strContentRecursive e + $ trimNl $ strContentRecursive e parseBlockquote = do attrib <- case filterChild (named "attribution") e of Nothing -> return mempty @@ -965,7 +965,7 @@ parseBlock (Elem e) = w <- findAttr (unqual "colwidth") c n <- safeRead $ "0" <> T.filter (\x -> (x >= '0' && x <= '9') - || x == '.') (T.pack w) + || x == '.') w if n > 0 then Just n else Nothing let numrows = case bodyrows of [] -> 0 @@ -1048,12 +1048,12 @@ parseMixed container conts = do x <- parseMixed container rs return $ p <> b <> x -parseRow :: PandocMonad m => [String] -> Element -> DB m [Cell] +parseRow :: PandocMonad m => [Text] -> Element -> DB m [Cell] parseRow cn = do let isEntry x = named "entry" x || named "td" x || named "th" x mapM (parseEntry cn) . filterChildren isEntry -parseEntry :: PandocMonad m => [String] -> Element -> DB m Cell +parseEntry :: PandocMonad m => [Text] -> Element -> DB m Cell parseEntry cn el = do let colDistance sa ea = do let iStrt = elemIndex sa cn @@ -1075,7 +1075,7 @@ getInlines :: PandocMonad m => Element -> DB m Inlines getInlines e' = trimInlines . mconcat <$> mapM parseInline (elContent e') -strContentRecursive :: Element -> String +strContentRecursive :: Element -> Text strContentRecursive = strContent . (\e' -> e'{ elContent = map elementToStr $ elContent e' }) @@ -1084,9 +1084,9 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing elementToStr x = x parseInline :: PandocMonad m => Content -> DB m Inlines -parseInline (Text (CData _ s _)) = return $ text $ T.pack s +parseInline (Text (CData _ s _)) = return $ text s parseInline (CRef ref) = - return $ text $ maybe (T.toUpper $ T.pack ref) T.pack $ lookupEntity ref + return $ text $ maybe (T.toUpper ref) T.pack $ lookupEntity (T.unpack ref) parseInline (Elem e) = case qName (elName e) of "anchor" -> do @@ -1138,7 +1138,7 @@ parseInline (Elem e) = "userinput" -> codeWithLang "systemitem" -> codeWithLang "varargs" -> return $ code "(...)" - "keycap" -> return (str $ T.pack $ strContent e) + "keycap" -> return (str $ strContent e) "keycombo" -> keycombo <$> mapM parseInline (elContent e) "menuchoice" -> menuchoice <$> @@ -1150,17 +1150,17 @@ parseInline (Elem e) = let title = case attrValue "endterm" e of "" -> maybe "???" xrefTitleByElem (findElementById linkend content) - endterm -> maybe "???" (T.pack . strContent) + endterm -> maybe "???" strContent (findElementById endterm content) return $ link ("#" <> linkend) "" (text title) - "email" -> return $ link ("mailto:" <> T.pack (strContent e)) "" - $ str $ T.pack $ strContent e - "uri" -> return $ link (T.pack $ strContent e) "" $ str $ T.pack $ strContent e + "email" -> return $ link ("mailto:" <> strContent e) "" + $ str $ strContent e + "uri" -> return $ link (strContent e) "" $ str $ strContent e "ulink" -> innerInlines (link (attrValue "url" e) "") "link" -> do ils <- innerInlines id let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of - Just h -> T.pack h + Just h -> h _ -> "#" <> attrValue "linkend" e let ils' = if ils == mempty then str href else ils let attr = (attrValue "id" e, T.words $ attrValue "role" e, []) @@ -1180,7 +1180,7 @@ parseInline (Elem e) = "pi-asciidoc-br" -> return linebreak _ -> skip >> innerInlines id where skip = do - let qn = T.pack $ qName $ elName e + let qn = qName $ elName e let name = if "pi-" `T.isPrefixOf` qn then "<?" <> qn <> "?>" else qn @@ -1193,7 +1193,7 @@ parseInline (Elem e) = let classes' = case attrValue "language" e of "" -> [] l -> [l] - return $ codeWith (attrValue "id" e,classes',[]) $ T.pack $ strContentRecursive e + return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e simpleList = mconcat . intersperse (str "," <> space) <$> mapM getInlines (filterChildren (named "member") e) segmentedList = do @@ -1234,10 +1234,10 @@ parseInline (Elem e) = "sect5" -> descendantContent "title" el "cmdsynopsis" -> descendantContent "command" el "funcsynopsis" -> descendantContent "function" el - _ -> T.pack $ qName (elName el) ++ "_title" + _ -> qName (elName el) <> "_title" where xrefLabel = attrValue "xreflabel" el - descendantContent name = maybe "???" (T.pack . strContent) + descendantContent name = maybe "???" strContent . filterElementName (\n -> qName n == name) -- | Extract a math equation from an element @@ -1258,7 +1258,7 @@ equation e constructor = mathMLEquations :: [Text] mathMLEquations = map writeTeX $ rights $ readMath (\x -> qName (elName x) == "math" && qPrefix (elName x) == Just "mml") - (readMathML . T.pack . showElement) + (readMathML . showElement) latexEquations :: [Text] latexEquations = readMath (\x -> qName (elName x) == "mathphrase") @@ -1272,8 +1272,8 @@ equation e constructor = -- | Get the actual text stored in a CData block. 'showContent' -- returns the text still surrounded by the [[CDATA]] tags. showVerbatimCData :: Content -> Text -showVerbatimCData (Text (CData _ d _)) = T.pack d -showVerbatimCData c = T.pack $ showContent c +showVerbatimCData (Text (CData _ d _)) = d +showVerbatimCData c = showContent c -- | Set the prefix of a name to 'Nothing' diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 056dab6c2..c76f3c171 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -63,6 +63,7 @@ import Data.Char (chr, ord, readLitChar) import Data.List import qualified Data.Map as M import qualified Data.Text as T +import Data.Text (Text) import Data.Maybe import System.FilePath import Text.Pandoc.Readers.Docx.Util @@ -72,9 +73,7 @@ import qualified Text.Pandoc.UTF8 as UTF8 import Text.TeXMath (Exp) import Text.TeXMath.Readers.OMML (readOMML) import Text.TeXMath.Unicode.Fonts (Font (..), getUnicode, textToFont) -import Text.XML.Light -import qualified Text.XML.Light.Cursor as XMLC -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light data ReaderEnv = ReaderEnv { envNotes :: Notes , envComments :: Comments @@ -128,37 +127,23 @@ mapD f xs = in concatMapM handler xs -unwrap :: NameSpaces -> Content -> [Content] -unwrap ns (Elem element) +unwrapElement :: NameSpaces -> Element -> [Element] +unwrapElement ns element | isElem ns "w" "sdt" element , Just sdtContent <- findChildByName ns "w" "sdtContent" element - = concatMap (unwrap ns . Elem) (elChildren sdtContent) + = concatMap (unwrapElement ns) (elChildren sdtContent) | isElem ns "w" "smartTag" element - = concatMap (unwrap ns . Elem) (elChildren element) -unwrap _ content = [content] + = concatMap (unwrapElement ns) (elChildren element) + | otherwise + = [element{ elContent = concatMap (unwrapContent ns) (elContent element) }] -unwrapChild :: NameSpaces -> Content -> Content -unwrapChild ns (Elem element) = - Elem $ element { elContent = concatMap (unwrap ns) (elContent element) } -unwrapChild _ content = content +unwrapContent :: NameSpaces -> Content -> [Content] +unwrapContent ns (Elem element) = map Elem $ unwrapElement ns element +unwrapContent _ content = [content] -walkDocument' :: NameSpaces -> XMLC.Cursor -> XMLC.Cursor -walkDocument' ns cur = - let modifiedCur = XMLC.modifyContent (unwrapChild ns) cur - in - case XMLC.nextDF modifiedCur of - Just cur' -> walkDocument' ns cur' - Nothing -> XMLC.root modifiedCur - -walkDocument :: NameSpaces -> Element -> Maybe Element +walkDocument :: NameSpaces -> Element -> Element walkDocument ns element = - let cur = XMLC.fromContent (Elem element) - cur' = walkDocument' ns cur - in - case XMLC.toTree cur' of - Elem element' -> Just element' - _ -> Nothing - + element{ elContent = concatMap (unwrapContent ns) (elContent element) } newtype Docx = Docx Document deriving Show @@ -361,9 +346,9 @@ getDocumentXmlPath zf = do fp <- findAttr (QName "Target" Nothing Nothing) rel -- sometimes there will be a leading slash, which windows seems to -- have trouble with. - return $ case fp of + return $ case T.unpack fp of '/' : fp' -> fp' - _ -> fp + fp' -> fp' archiveToDocument :: Archive -> D Document archiveToDocument zf = do @@ -372,7 +357,7 @@ archiveToDocument zf = do docElem <- maybeToD $ parseXMLFromEntry entry let namespaces = elemToNameSpaces docElem bodyElem <- maybeToD $ findChildByName namespaces "w" "body" docElem - let bodyElem' = fromMaybe bodyElem (walkDocument namespaces bodyElem) + let bodyElem' = walkDocument namespaces bodyElem body <- elemToBody namespaces bodyElem' return $ Document namespaces body @@ -414,8 +399,8 @@ archiveToNotes zf = fn_namespaces = maybe [] elemToNameSpaces fnElem en_namespaces = maybe [] elemToNameSpaces enElem ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces - fn = fnElem >>= walkDocument ns >>= elemToNotes ns "footnote" - en = enElem >>= walkDocument ns >>= elemToNotes ns "endnote" + fn = fnElem >>= elemToNotes ns "footnote" . walkDocument ns + en = enElem >>= elemToNotes ns "endnote" . walkDocument ns in Notes ns fn en @@ -424,7 +409,8 @@ archiveToComments zf = let cmtsElem = findEntryByPath "word/comments.xml" zf >>= parseXMLFromEntry cmts_namespaces = maybe [] elemToNameSpaces cmtsElem - cmts = elemToComments cmts_namespaces <$> (cmtsElem >>= walkDocument cmts_namespaces) + cmts = elemToComments cmts_namespaces . walkDocument cmts_namespaces <$> + cmtsElem in case cmts of Just c -> Comments cmts_namespaces c @@ -443,8 +429,8 @@ filePathToRelType path docXmlPath = relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship relElemToRelationship relType element | qName (elName element) == "Relationship" = do - relId <- findAttrText (QName "Id" Nothing Nothing) element - target <- findAttrText (QName "Target" Nothing Nothing) element + relId <- findAttr (QName "Id" Nothing Nothing) element + target <- findAttr (QName "Target" Nothing Nothing) element return $ Relationship relType relId target relElemToRelationship _ _ = Nothing @@ -485,10 +471,10 @@ lookupLevel numId ilvl (Numbering _ numbs absNumbs) = do loElemToLevelOverride :: NameSpaces -> Element -> Maybe LevelOverride loElemToLevelOverride ns element | isElem ns "w" "lvlOverride" element = do - ilvl <- findAttrTextByName ns "w" "ilvl" element + ilvl <- findAttrByName ns "w" "ilvl" element let startOverride = findChildByName ns "w" "startOverride" element >>= findAttrByName ns "w" "val" - >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) + >>= stringToInteger lvl = findChildByName ns "w" "lvl" element >>= levelElemToLevel ns return $ LevelOverride ilvl startOverride lvl @@ -497,9 +483,9 @@ loElemToLevelOverride _ _ = Nothing numElemToNum :: NameSpaces -> Element -> Maybe Numb numElemToNum ns element | isElem ns "w" "num" element = do - numId <- findAttrTextByName ns "w" "numId" element + numId <- findAttrByName ns "w" "numId" element absNumId <- findChildByName ns "w" "abstractNumId" element - >>= findAttrTextByName ns "w" "val" + >>= findAttrByName ns "w" "val" let lvlOverrides = mapMaybe (loElemToLevelOverride ns) (findChildrenByName ns "w" "lvlOverride" element) @@ -509,7 +495,7 @@ numElemToNum _ _ = Nothing absNumElemToAbsNum :: NameSpaces -> Element -> Maybe AbstractNumb absNumElemToAbsNum ns element | isElem ns "w" "abstractNum" element = do - absNumId <- findAttrTextByName ns "w" "abstractNumId" element + absNumId <- findAttrByName ns "w" "abstractNumId" element let levelElems = findChildrenByName ns "w" "lvl" element levels = mapMaybe (levelElemToLevel ns) levelElems return $ AbstractNumb absNumId levels @@ -518,14 +504,14 @@ absNumElemToAbsNum _ _ = Nothing levelElemToLevel :: NameSpaces -> Element -> Maybe Level levelElemToLevel ns element | isElem ns "w" "lvl" element = do - ilvl <- findAttrTextByName ns "w" "ilvl" element + ilvl <- findAttrByName ns "w" "ilvl" element fmt <- findChildByName ns "w" "numFmt" element - >>= findAttrTextByName ns "w" "val" + >>= findAttrByName ns "w" "val" txt <- findChildByName ns "w" "lvlText" element - >>= findAttrTextByName ns "w" "val" + >>= findAttrByName ns "w" "val" let start = findChildByName ns "w" "start" element >>= findAttrByName ns "w" "val" - >>= (\s -> listToMaybe (map fst (reads s :: [(Integer, String)]))) + >>= stringToInteger return (Level ilvl fmt txt start) levelElemToLevel _ _ = Nothing @@ -546,11 +532,11 @@ archiveToNumbering :: Archive -> Numbering archiveToNumbering archive = fromMaybe (Numbering [] [] []) (archiveToNumbering' archive) -elemToNotes :: NameSpaces -> String -> Element -> Maybe (M.Map T.Text Element) +elemToNotes :: NameSpaces -> Text -> Element -> Maybe (M.Map T.Text Element) elemToNotes ns notetype element | isElem ns "w" (notetype <> "s") element = let pairs = mapMaybe - (\e -> findAttrTextByName ns "w" "id" e >>= + (\e -> findAttrByName ns "w" "id" e >>= (\a -> Just (a, e))) (findChildrenByName ns "w" notetype element) in @@ -562,7 +548,7 @@ elemToComments :: NameSpaces -> Element -> M.Map T.Text Element elemToComments ns element | isElem ns "w" "comments" element = let pairs = mapMaybe - (\e -> findAttrTextByName ns "w" "id" e >>= + (\e -> findAttrByName ns "w" "id" e >>= (\a -> Just (a, e))) (findChildrenByName ns "w" "comment" element) in @@ -622,12 +608,12 @@ elemToParIndentation ns element | isElem ns "w" "ind" element = stringToInteger , hangingParIndent = findAttrByName ns "w" "hanging" element >>= - stringToInteger} + stringToInteger } elemToParIndentation _ _ = Nothing -testBitMask :: String -> Int -> Bool +testBitMask :: Text -> Int -> Bool testBitMask bitMaskS n = - case (reads ("0x" ++ bitMaskS) :: [(Int, String)]) of + case (reads ("0x" ++ T.unpack bitMaskS) :: [(Int, String)]) of [] -> False ((n', _) : _) -> (n' .|. n) /= 0 @@ -642,7 +628,7 @@ elemToBodyPart ns element | isElem ns "w" "p" element , (c:_) <- findChildrenByName ns "m" "oMathPara" element = do - expsLst <- eitherToD $ readOMML $ T.pack $ showElement c + expsLst <- eitherToD $ readOMML $ showElement c return $ OMathPara expsLst elemToBodyPart ns element | isElem ns "w" "p" element @@ -666,7 +652,7 @@ elemToBodyPart ns element | isElem ns "w" "tbl" element = do let caption' = findChildByName ns "w" "tblPr" element >>= findChildByName ns "w" "tblCaption" - >>= findAttrTextByName ns "w" "val" + >>= findAttrByName ns "w" "val" caption = fromMaybe "" caption' grid' = case findChildByName ns "w" "tblGrid" element of Just g -> elemToTblGrid ns g @@ -705,8 +691,8 @@ getTitleAndAlt :: NameSpaces -> Element -> (T.Text, T.Text) getTitleAndAlt ns element = let mbDocPr = findChildByName ns "wp" "inline" element >>= findChildByName ns "wp" "docPr" - title = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "title") - alt = fromMaybe "" (mbDocPr >>= findAttrTextByName ns "" "descr") + title = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "title") + alt = fromMaybe "" (mbDocPr >>= findAttrByName ns "" "descr") in (title, alt) elemToParPart :: NameSpaces -> Element -> D ParPart @@ -718,7 +704,7 @@ elemToParPart ns element = let (title, alt) = getTitleAndAlt ns drawingElem a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem - >>= findAttrTextByName ns "r" "embed" + >>= findAttrByName ns "r" "embed" in case drawing of Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem) @@ -728,7 +714,7 @@ elemToParPart ns element | isElem ns "w" "r" element , Just _ <- findChildByName ns "w" "pict" element = let drawing = findElement (elemName ns "v" "imagedata") element - >>= findAttrTextByName ns "r" "id" + >>= findAttrByName ns "r" "id" in case drawing of -- Todo: check out title and attr for deprecated format. @@ -797,7 +783,7 @@ elemToParPart ns element fldCharState <- gets stateFldCharState case fldCharState of FldCharOpen -> do - info <- eitherToD $ parseFieldInfo $ T.pack $ strContent instrText + info <- eitherToD $ parseFieldInfo $ strContent instrText modify $ \st -> st{stateFldCharState = FldCharFieldInfo info} return NullParPart _ -> return NullParPart @@ -818,48 +804,48 @@ elemToParPart ns element return $ ChangedRuns change runs elemToParPart ns element | isElem ns "w" "bookmarkStart" element - , Just bmId <- findAttrTextByName ns "w" "id" element - , Just bmName <- findAttrTextByName ns "w" "name" element = + , Just bmId <- findAttrByName ns "w" "id" element + , Just bmName <- findAttrByName ns "w" "name" element = return $ BookMark bmId bmName elemToParPart ns element | isElem ns "w" "hyperlink" element - , Just relId <- findAttrTextByName ns "r" "id" element = do + , Just relId <- findAttrByName ns "r" "id" element = do location <- asks envLocation runs <- mapD (elemToRun ns) (elChildren element) rels <- asks envRelationships case lookupRelationship location relId rels of Just target -> - case findAttrTextByName ns "w" "anchor" element of + case findAttrByName ns "w" "anchor" element of Just anchor -> return $ ExternalHyperLink (target <> "#" <> anchor) runs Nothing -> return $ ExternalHyperLink target runs Nothing -> return $ ExternalHyperLink "" runs elemToParPart ns element | isElem ns "w" "hyperlink" element - , Just anchor <- findAttrTextByName ns "w" "anchor" element = do + , Just anchor <- findAttrByName ns "w" "anchor" element = do runs <- mapD (elemToRun ns) (elChildren element) return $ InternalHyperLink anchor runs elemToParPart ns element | isElem ns "w" "commentRangeStart" element - , Just cmtId <- findAttrTextByName ns "w" "id" element = do + , Just cmtId <- findAttrByName ns "w" "id" element = do (Comments _ commentMap) <- asks envComments case M.lookup cmtId commentMap of Just cmtElem -> elemToCommentStart ns cmtElem Nothing -> throwError WrongElem elemToParPart ns element | isElem ns "w" "commentRangeEnd" element - , Just cmtId <- findAttrTextByName ns "w" "id" element = + , Just cmtId <- findAttrByName ns "w" "id" element = return $ CommentEnd cmtId elemToParPart ns element | isElem ns "m" "oMath" element = - fmap PlainOMath (eitherToD $ readOMML $ T.pack $ showElement element) + fmap PlainOMath (eitherToD $ readOMML $ showElement element) elemToParPart _ _ = throwError WrongElem elemToCommentStart :: NameSpaces -> Element -> D ParPart elemToCommentStart ns element | isElem ns "w" "comment" element - , Just cmtId <- findAttrTextByName ns "w" "id" element - , Just cmtAuthor <- findAttrTextByName ns "w" "author" element - , cmtDate <- findAttrTextByName ns "w" "date" element = do + , Just cmtId <- findAttrByName ns "w" "id" element + , Just cmtAuthor <- findAttrByName ns "w" "author" element + , cmtDate <- findAttrByName ns "w" "date" element = do bps <- mapD (elemToBodyPart ns) (elChildren element) return $ CommentStart cmtId cmtAuthor cmtDate bps elemToCommentStart _ _ = throwError WrongElem @@ -878,7 +864,7 @@ elemToExtent drawingElem = where wp_ns = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing" getDim at = findElement (QName "extent" (Just wp_ns) (Just "wp")) drawingElem - >>= findAttr (QName at Nothing Nothing) >>= safeRead . T.pack + >>= findAttr (QName at Nothing Nothing) >>= safeRead childElemToRun :: NameSpaces -> Element -> D Run @@ -889,7 +875,7 @@ childElemToRun ns element = let (title, alt) = getTitleAndAlt ns element a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem - >>= findAttrText (QName "embed" (lookup "r" ns) (Just "r")) + >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) in case drawing of Just s -> expandDrawingId s >>= @@ -902,7 +888,7 @@ childElemToRun ns element = return InlineChart childElemToRun ns element | isElem ns "w" "footnoteReference" element - , Just fnId <- findAttrTextByName ns "w" "id" element = do + , Just fnId <- findAttrByName ns "w" "id" element = do notes <- asks envNotes case lookupFootnote fnId notes of Just e -> do bps <- local (\r -> r {envLocation=InFootnote}) $ mapD (elemToBodyPart ns) (elChildren e) @@ -910,7 +896,7 @@ childElemToRun ns element Nothing -> return $ Footnote [] childElemToRun ns element | isElem ns "w" "endnoteReference" element - , Just enId <- findAttrTextByName ns "w" "id" element = do + , Just enId <- findAttrByName ns "w" "id" element = do notes <- asks envNotes case lookupEndnote enId notes of Just e -> do bps <- local (\r -> r {envLocation=InEndnote}) $ mapD (elemToBodyPart ns) (elChildren e) @@ -963,15 +949,15 @@ getParStyleField _ _ = Nothing getTrackedChange :: NameSpaces -> Element -> Maybe TrackedChange getTrackedChange ns element | isElem ns "w" "ins" element || isElem ns "w" "moveTo" element - , Just cId <- findAttrTextByName ns "w" "id" element - , Just cAuthor <- findAttrTextByName ns "w" "author" element - , mcDate <- findAttrTextByName ns "w" "date" element = + , Just cId <- findAttrByName ns "w" "id" element + , Just cAuthor <- findAttrByName ns "w" "author" element + , mcDate <- findAttrByName ns "w" "date" element = Just $ TrackedChange Insertion (ChangeInfo cId cAuthor mcDate) getTrackedChange ns element | isElem ns "w" "del" element || isElem ns "w" "moveFrom" element - , Just cId <- findAttrTextByName ns "w" "id" element - , Just cAuthor <- findAttrTextByName ns "w" "author" element - , mcDate <- findAttrTextByName ns "w" "date" element = + , Just cId <- findAttrByName ns "w" "id" element + , Just cAuthor <- findAttrByName ns "w" "author" element + , mcDate <- findAttrByName ns "w" "date" element = Just $ TrackedChange Deletion (ChangeInfo cId cAuthor mcDate) getTrackedChange _ _ = Nothing @@ -980,7 +966,7 @@ elemToParagraphStyle ns element sty | Just pPr <- findChildByName ns "w" "pPr" element = let style = mapMaybe - (fmap ParaStyleId . findAttrTextByName ns "w" "val") + (fmap ParaStyleId . findAttrByName ns "w" "val") (findChildrenByName ns "w" "pStyle" pPr) in ParagraphStyle {pStyle = mapMaybe (`M.lookup` sty) style @@ -1012,7 +998,7 @@ elemToRunStyleD ns element charStyles <- asks envCharStyles let parentSty = findChildByName ns "w" "rStyle" rPr >>= - findAttrTextByName ns "w" "val" >>= + findAttrByName ns "w" "val" >>= flip M.lookup charStyles . CharStyleId return $ elemToRunStyle ns element parentSty elemToRunStyleD _ _ = return defaultRunStyle @@ -1022,7 +1008,7 @@ elemToRunElem ns element | isElem ns "w" "t" element || isElem ns "w" "delText" element || isElem ns "m" "t" element = do - let str = T.pack $ strContent element + let str = strContent element font <- asks envFont case font of Nothing -> return $ TextRun str @@ -1044,14 +1030,14 @@ getSymChar :: NameSpaces -> Element -> RunElem getSymChar ns element | Just s <- lowerFromPrivate <$> getCodepoint , Just font <- getFont = - case readLitChar ("\\x" ++ s) of + case readLitChar ("\\x" ++ T.unpack s) of [(char, _)] -> TextRun . maybe "" T.singleton $ getUnicode font char _ -> TextRun "" where getCodepoint = findAttrByName ns "w" "char" element - getFont = textToFont . T.pack =<< findAttrByName ns "w" "font" element - lowerFromPrivate ('F':xs) = '0':xs - lowerFromPrivate xs = xs + getFont = textToFont =<< findAttrByName ns "w" "font" element + lowerFromPrivate t | "F" `T.isPrefixOf` t = "0" <> T.drop 1 t + | otherwise = t getSymChar _ _ = TextRun "" elemToRunElems :: NameSpaces -> Element -> D [RunElem] @@ -1061,8 +1047,9 @@ elemToRunElems ns element let qualName = elemName ns "w" let font = do fontElem <- findElement (qualName "rFonts") element - textToFont . T.pack =<< - foldr ((<|>) . (flip findAttr fontElem . qualName)) Nothing ["ascii", "hAnsi"] + textToFont =<< + foldr ((<|>) . (flip findAttr fontElem . qualName)) + Nothing ["ascii", "hAnsi"] local (setFont font) (mapD (elemToRunElem ns) (elChildren element)) elemToRunElems _ _ = throwError WrongElem diff --git a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs index edade8654..0d7271d6a 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse/Styles.hs @@ -48,12 +48,13 @@ import Data.Function (on) import Data.String (IsString(..)) import qualified Data.Map as M import qualified Data.Text as T +import qualified Data.Text.Read +import Data.Text (Text) import Data.Maybe import Data.Coerce import Text.Pandoc.Readers.Docx.Util import qualified Text.Pandoc.UTF8 as UTF8 -import Text.XML.Light -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light newtype CharStyleId = CharStyleId T.Text deriving (Show, Eq, Ord, IsString, FromStyleId) @@ -109,7 +110,7 @@ data RunStyle = RunStyle { isBold :: Maybe Bool , isRTL :: Maybe Bool , isForceCTL :: Maybe Bool , rVertAlign :: Maybe VertAlign - , rUnderline :: Maybe String + , rUnderline :: Maybe Text , rParentStyle :: Maybe CharStyle } deriving Show @@ -159,7 +160,7 @@ isBasedOnStyle ns element parentStyle , Just styleType <- findAttrByName ns "w" "type" element , styleType == cStyleType parentStyle , Just basedOnVal <- findChildByName ns "w" "basedOn" element >>= - findAttrTextByName ns "w" "val" + findAttrByName ns "w" "val" , Just ps <- parentStyle = basedOnVal == fromStyleId (getStyleId ps) | isElem ns "w" "style" element , Just styleType <- findAttrByName ns "w" "type" element @@ -169,7 +170,7 @@ isBasedOnStyle ns element parentStyle | otherwise = False class HasStyleId a => ElemToStyle a where - cStyleType :: Maybe a -> String + cStyleType :: Maybe a -> Text elemToStyle :: NameSpaces -> Element -> Maybe a -> Maybe a class FromStyleId (StyleId a) => HasStyleId a where @@ -226,8 +227,10 @@ buildBasedOnList ns element rootStyle = stys -> stys ++ concatMap (buildBasedOnList ns element . Just) stys -stringToInteger :: String -> Maybe Integer -stringToInteger s = listToMaybe $ map fst (reads s :: [(Integer, String)]) +stringToInteger :: Text -> Maybe Integer +stringToInteger s = case Data.Text.Read.decimal s of + Right (x,_) -> Just x + Left _ -> Nothing checkOnOff :: NameSpaces -> Element -> QName -> Maybe Bool checkOnOff ns rPr tag @@ -247,7 +250,7 @@ checkOnOff _ _ _ = Nothing elemToCharStyle :: NameSpaces -> Element -> Maybe CharStyle -> Maybe CharStyle elemToCharStyle ns element parentStyle - = CharStyle <$> (CharStyleId <$> findAttrTextByName ns "w" "styleId" element) + = CharStyle <$> (CharStyleId <$> findAttrByName ns "w" "styleId" element) <*> getElementStyleName ns element <*> Just (elemToRunStyle ns element parentStyle) @@ -281,7 +284,7 @@ elemToRunStyle _ _ _ = defaultRunStyle getHeaderLevel :: NameSpaces -> Element -> Maybe (ParaStyleName, Int) getHeaderLevel ns element | Just styleName <- getElementStyleName ns element - , Just n <- stringToInteger . T.unpack =<< + , Just n <- stringToInteger =<< (T.stripPrefix "heading " . T.toLower $ fromStyleName styleName) , n > 0 = Just (styleName, fromInteger n) @@ -289,8 +292,8 @@ getHeaderLevel _ _ = Nothing getElementStyleName :: Coercible T.Text a => NameSpaces -> Element -> Maybe a getElementStyleName ns el = coerce <$> - ((findChildByName ns "w" "name" el >>= findAttrTextByName ns "w" "val") - <|> findAttrTextByName ns "w" "styleId" el) + ((findChildByName ns "w" "name" el >>= findAttrByName ns "w" "val") + <|> findAttrByName ns "w" "styleId" el) getNumInfo :: NameSpaces -> Element -> Maybe (T.Text, T.Text) getNumInfo ns element = do @@ -298,15 +301,15 @@ getNumInfo ns element = do findChildByName ns "w" "numPr" lvl = fromMaybe "0" (numPr >>= findChildByName ns "w" "ilvl" >>= - findAttrTextByName ns "w" "val") + findAttrByName ns "w" "val") numId <- numPr >>= findChildByName ns "w" "numId" >>= - findAttrTextByName ns "w" "val" + findAttrByName ns "w" "val" return (numId, lvl) elemToParStyleData :: NameSpaces -> Element -> Maybe ParStyle -> Maybe ParStyle elemToParStyleData ns element parentStyle - | Just styleId <- findAttrTextByName ns "w" "styleId" element + | Just styleId <- findAttrByName ns "w" "styleId" element , Just styleName <- getElementStyleName ns element = Just $ ParStyle { diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs index f9c9a8e26..21df03d9e 100644 --- a/src/Text/Pandoc/Readers/Docx/Util.hs +++ b/src/Text/Pandoc/Readers/Docx/Util.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Docx.StyleMaps Copyright : © 2014-2020 Jesse Rosenthal <jrosenthal@jhu.edu>, @@ -18,51 +19,45 @@ module Text.Pandoc.Readers.Docx.Util ( , elemToNameSpaces , findChildByName , findChildrenByName - , findAttrText , findAttrByName - , findAttrTextByName ) where import Data.Maybe (mapMaybe) import qualified Data.Text as T -import Text.XML.Light +import Data.Text (Text) +import Text.Pandoc.XML.Light -type NameSpaces = [(String, String)] +type NameSpaces = [(Text, Text)] elemToNameSpaces :: Element -> NameSpaces elemToNameSpaces = mapMaybe attrToNSPair . elAttribs -attrToNSPair :: Attr -> Maybe (String, String) +attrToNSPair :: Attr -> Maybe (Text, Text) attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) attrToNSPair _ = Nothing -elemName :: NameSpaces -> String -> String -> QName +elemName :: NameSpaces -> Text -> Text -> QName elemName ns prefix name = - QName name (lookup prefix ns) (if null prefix then Nothing else Just prefix) + QName name (lookup prefix ns) (if T.null prefix then Nothing else Just prefix) -isElem :: NameSpaces -> String -> String -> Element -> Bool +isElem :: NameSpaces -> Text -> Text -> Element -> Bool isElem ns prefix name element = let ns' = ns ++ elemToNameSpaces element in qName (elName element) == name && qURI (elName element) == lookup prefix ns' -findChildByName :: NameSpaces -> String -> String -> Element -> Maybe Element +findChildByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element findChildByName ns pref name el = let ns' = ns ++ elemToNameSpaces el in findChild (elemName ns' pref name) el -findChildrenByName :: NameSpaces -> String -> String -> Element -> [Element] +findChildrenByName :: NameSpaces -> Text -> Text -> Element -> [Element] findChildrenByName ns pref name el = let ns' = ns ++ elemToNameSpaces el in findChildren (elemName ns' pref name) el -findAttrText :: QName -> Element -> Maybe T.Text -findAttrText x = fmap T.pack . findAttr x - -findAttrByName :: NameSpaces -> String -> String -> Element -> Maybe String +findAttrByName :: NameSpaces -> Text -> Text -> Element -> Maybe Text findAttrByName ns pref name el = let ns' = ns ++ elemToNameSpaces el in findAttr (elemName ns' pref name) el -findAttrTextByName :: NameSpaces -> String -> String -> Element -> Maybe T.Text -findAttrTextByName a b c = fmap T.pack . findAttrByName a b c diff --git a/src/Text/Pandoc/Readers/EPUB.hs b/src/Text/Pandoc/Readers/EPUB.hs index 369c4f0c9..eb8d2405d 100644 --- a/src/Text/Pandoc/Readers/EPUB.hs +++ b/src/Text/Pandoc/Readers/EPUB.hs @@ -23,8 +23,8 @@ import Control.DeepSeq (NFData, deepseq) import Control.Monad (guard, liftM, liftM2, mplus) import Control.Monad.Except (throwError) import qualified Data.ByteString.Lazy as BL (ByteString) -import Data.List (isInfixOf) import qualified Data.Text as T +import Data.Text (Text) import qualified Data.Map as M (Map, elems, fromList, lookup) import Data.Maybe (mapMaybe) import qualified Data.Text.Lazy as TL @@ -40,13 +40,12 @@ import Text.Pandoc.Extensions (Extension (Ext_raw_html), enableExtension) import Text.Pandoc.MIME (MimeType) import Text.Pandoc.Options (ReaderOptions (..)) import Text.Pandoc.Readers.HTML (readHtml) -import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI) +import Text.Pandoc.Shared (addMetaField, collapseFilePath, escapeURI, tshow) import qualified Text.Pandoc.UTF8 as UTF8 (toTextLazy) import Text.Pandoc.Walk (query, walk) -import Text.XML.Light -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light -type Items = M.Map String (FilePath, MimeType) +type Items = M.Map Text (FilePath, MimeType) readEPUB :: PandocMonad m => ReaderOptions -> BL.ByteString -> m Pandoc readEPUB opts bytes = case toArchiveOrFail bytes of @@ -126,26 +125,27 @@ imageToPandoc s = B.doc . B.para $ B.image (T.pack s) "" mempty imageMimes :: [MimeType] imageMimes = ["image/gif", "image/jpeg", "image/png"] -type CoverId = String +type CoverId = Text type CoverImage = FilePath -parseManifest :: (PandocMonad m) => Element -> Maybe CoverId -> m (Maybe CoverImage, Items) +parseManifest :: (PandocMonad m) + => Element -> Maybe CoverId -> m (Maybe CoverImage, Items) parseManifest content coverId = do manifest <- findElementE (dfName "manifest") content let items = findChildren (dfName "item") manifest r <- mapM parseItem items let cover = findAttr (emptyName "href") =<< filterChild findCover manifest - return (cover `mplus` coverId, M.fromList r) + return (T.unpack <$> (cover `mplus` coverId), M.fromList r) where - findCover e = maybe False (isInfixOf "cover-image") + findCover e = maybe False (T.isInfixOf "cover-image") (findAttr (emptyName "properties") e) || Just True == liftM2 (==) coverId (findAttr (emptyName "id") e) parseItem e = do uid <- findAttrE (emptyName "id") e href <- findAttrE (emptyName "href") e mime <- findAttrE (emptyName "media-type") e - return (uid, (href, T.pack mime)) + return (uid, (T.unpack href, mime)) parseSpine :: PandocMonad m => Items -> Element -> m [(FilePath, MimeType)] parseSpine is e = do @@ -173,11 +173,11 @@ parseMeta content = do -- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem parseMetaItem :: Element -> Meta -> Meta parseMetaItem e@(stripNamespace . elName -> field) meta = - addMetaField (renameMeta field) (B.str $ T.pack $ strContent e) meta + addMetaField (renameMeta field) (B.str $ strContent e) meta -renameMeta :: String -> T.Text +renameMeta :: Text -> Text renameMeta "creator" = "author" -renameMeta s = T.pack s +renameMeta s = s getManifest :: PandocMonad m => Archive -> m (String, Element) getManifest archive = do @@ -187,7 +187,7 @@ getManifest archive = do ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces) as <- fmap (map attrToPair . elAttribs) (findElementE (QName "rootfile" (Just ns) Nothing) docElem) - manifestFile <- mkE "Root not found" (lookup "full-path" as) + manifestFile <- T.unpack <$> mkE "Root not found" (lookup "full-path" as) let rootdir = dropFileName manifestFile --mime <- lookup "media-type" as manifest <- findEntryByPathE manifestFile archive @@ -201,7 +201,8 @@ fixInternalReferences pathToFile = . walk (fixBlockIRs filename) . walk (fixInlineIRs filename) where - (root, T.unpack . escapeURI . T.pack -> filename) = splitFileName pathToFile + (root, T.unpack . escapeURI . T.pack -> filename) = + splitFileName pathToFile fixInlineIRs :: String -> Inline -> Inline fixInlineIRs s (Span as v) = @@ -214,7 +215,7 @@ fixInlineIRs s (Link as is t) = Link (fixAttrs s as) is t fixInlineIRs _ v = v -prependHash :: [T.Text] -> Inline -> Inline +prependHash :: [Text] -> Inline -> Inline prependHash ps l@(Link attr is (url, tit)) | or [s `T.isPrefixOf` url | s <- ps] = Link attr is ("#" <> url, tit) @@ -231,16 +232,17 @@ fixBlockIRs s (CodeBlock as code) = fixBlockIRs _ b = b fixAttrs :: FilePath -> B.Attr -> B.Attr -fixAttrs s (ident, cs, kvs) = (addHash s ident, filter (not . T.null) cs, removeEPUBAttrs kvs) +fixAttrs s (ident, cs, kvs) = + (addHash s ident, filter (not . T.null) cs, removeEPUBAttrs kvs) -addHash :: String -> T.Text -> T.Text +addHash :: FilePath -> Text -> Text addHash _ "" = "" addHash s ident = T.pack (takeFileName s) <> "#" <> ident -removeEPUBAttrs :: [(T.Text, T.Text)] -> [(T.Text, T.Text)] +removeEPUBAttrs :: [(Text, Text)] -> [(Text, Text)] removeEPUBAttrs kvs = filter (not . isEPUBAttr) kvs -isEPUBAttr :: (T.Text, a) -> Bool +isEPUBAttr :: (Text, a) -> Bool isEPUBAttr (k, _) = "epub:" `T.isPrefixOf` k -- Library @@ -257,33 +259,33 @@ uncurry3 f (a, b, c) = f a b c -- Utility -stripNamespace :: QName -> String +stripNamespace :: QName -> Text stripNamespace (QName v _ _) = v -attrToNSPair :: Attr -> Maybe (String, String) +attrToNSPair :: Attr -> Maybe (Text, Text) attrToNSPair (Attr (QName "xmlns" _ _) val) = Just ("xmlns", val) attrToNSPair _ = Nothing -attrToPair :: Attr -> (String, String) +attrToPair :: Attr -> (Text, Text) attrToPair (Attr (QName name _ _) val) = (name, val) -defaultNameSpace :: Maybe String +defaultNameSpace :: Maybe Text defaultNameSpace = Just "http://www.idpf.org/2007/opf" -dfName :: String -> QName +dfName :: Text -> QName dfName s = QName s defaultNameSpace Nothing -emptyName :: String -> QName +emptyName :: Text -> QName emptyName s = QName s Nothing Nothing -- Convert Maybe interface to Either -findAttrE :: PandocMonad m => QName -> Element -> m String +findAttrE :: PandocMonad m => QName -> Element -> m Text findAttrE q e = mkE "findAttr" $ findAttr q e findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry findEntryByPathE (normalise . unEscapeString -> path) a = - mkE ("No entry on path: " ++ path) $ findEntryByPath path a + mkE ("No entry on path: " <> T.pack path) $ findEntryByPath path a parseXMLDocE :: PandocMonad m => Entry -> m Element parseXMLDocE entry = @@ -293,7 +295,8 @@ parseXMLDocE entry = fp = T.pack $ eRelativePath entry findElementE :: PandocMonad m => QName -> Element -> m Element -findElementE e x = mkE ("Unable to find element: " ++ show e) $ findElement e x +findElementE e x = + mkE ("Unable to find element: " <> tshow e) $ findElement e x -mkE :: PandocMonad m => String -> Maybe a -> m a -mkE s = maybe (throwError . PandocParseError $ T.pack s) return +mkE :: PandocMonad m => Text -> Maybe a -> m a +mkE s = maybe (throwError . PandocParseError $ s) return diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs index b804eab4f..66e390bd7 100644 --- a/src/Text/Pandoc/Readers/FB2.hs +++ b/src/Text/Pandoc/Readers/FB2.hs @@ -25,7 +25,6 @@ TODO: module Text.Pandoc.Readers.FB2 ( readFB2 ) where import Control.Monad.Except (throwError) import Control.Monad.State.Strict -import Data.ByteString.Lazy.Char8 ( pack ) import Data.ByteString.Base64.Lazy import Data.Functor import Data.List (intersperse) @@ -42,8 +41,8 @@ import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Shared (crFilter) -import Text.XML.Light -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light +import qualified Text.Pandoc.UTF8 as UTF8 type FB2 m = StateT FB2State m @@ -85,12 +84,12 @@ removeHash t = case T.uncons t of Just ('#', xs) -> xs _ -> t -convertEntity :: String -> Text -convertEntity e = maybe (T.toUpper $ T.pack e) T.pack $ lookupEntity e +convertEntity :: Text -> Text +convertEntity e = maybe (T.toUpper e) T.pack $ lookupEntity (T.unpack e) parseInline :: PandocMonad m => Content -> FB2 m Inlines parseInline (Elem e) = - case T.pack $ qName $ elName e of + case qName $ elName e of "strong" -> strong <$> parseStyleType e "emphasis" -> emph <$> parseStyleType e "style" -> parseNamedStyle e @@ -98,12 +97,12 @@ parseInline (Elem e) = "strikethrough" -> strikeout <$> parseStyleType e "sub" -> subscript <$> parseStyleType e "sup" -> superscript <$> parseStyleType e - "code" -> pure $ code $ T.pack $ strContent e + "code" -> pure $ code $ strContent e "image" -> parseInlineImageElement e name -> do report $ IgnoredElement name pure mempty -parseInline (Text x) = pure $ text $ T.pack $ cdData x +parseInline (Text x) = pure $ text $ cdData x parseInline (CRef r) = pure $ str $ convertEntity r parseSubtitle :: PandocMonad m => Element -> FB2 m Blocks @@ -113,7 +112,7 @@ parseSubtitle e = headerWith ("", ["unnumbered"], []) <$> gets fb2SectionLevel < parseRootElement :: PandocMonad m => Element -> FB2 m Blocks parseRootElement e = - case T.pack $ qName $ elName e of + case qName $ elName e of "FictionBook" -> do -- Parse notes before parsing the rest of the content. case filterChild isNotesBody e of @@ -146,7 +145,7 @@ parseNote e = Just sectionId -> do content <- mconcat <$> mapM parseSectionChild (dropTitle $ elChildren e) oldNotes <- gets fb2Notes - modify $ \s -> s { fb2Notes = M.insert ("#" <> T.pack sectionId) content oldNotes } + modify $ \s -> s { fb2Notes = M.insert ("#" <> sectionId) content oldNotes } pure () where isTitle x = qName (elName x) == "title" @@ -158,7 +157,7 @@ parseNote e = -- | Parse a child of @\<FictionBook>@ element. parseFictionBookChild :: PandocMonad m => Element -> FB2 m Blocks parseFictionBookChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "stylesheet" -> pure mempty -- stylesheet is ignored "description" -> mempty <$ mapM_ parseDescriptionChild (elChildren e) "body" -> if isNotesBody e @@ -170,7 +169,7 @@ parseFictionBookChild e = -- | Parse a child of @\<description>@ element. parseDescriptionChild :: PandocMonad m => Element -> FB2 m () parseDescriptionChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "title-info" -> mapM_ parseTitleInfoChild (elChildren e) "src-title-info" -> pure () -- ignore "document-info" -> pure () @@ -184,7 +183,7 @@ parseDescriptionChild e = -- | Parse a child of @\<body>@ element. parseBodyChild :: PandocMonad m => Element -> FB2 m Blocks parseBodyChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "image" -> parseImageElement e "title" -> header <$> gets fb2SectionLevel <*> parseTitleType (elContent e) "epigraph" -> parseEpigraph e @@ -198,7 +197,10 @@ parseBinaryElement e = (Nothing, _) -> report $ IgnoredElement "binary without id attribute" (Just _, Nothing) -> report $ IgnoredElement "binary without content-type attribute" - (Just filename, contentType) -> insertMedia filename (T.pack <$> contentType) (decodeLenient (pack (strContent e))) + (Just filename, contentType) -> + insertMedia (T.unpack filename) contentType + (decodeLenient + (UTF8.fromTextLazy . TL.fromStrict . strContent $ e)) -- * Type parsers @@ -208,13 +210,13 @@ parseAuthor e = T.unwords . catMaybes <$> mapM parseAuthorChild (elChildren e) parseAuthorChild :: PandocMonad m => Element -> FB2 m (Maybe Text) parseAuthorChild e = - case T.pack $ qName $ elName e of - "first-name" -> pure $ Just $ T.pack $ strContent e - "middle-name" -> pure $ Just $ T.pack $ strContent e - "last-name" -> pure $ Just $ T.pack $ strContent e - "nickname" -> pure $ Just $ T.pack $ strContent e - "home-page" -> pure $ Just $ T.pack $ strContent e - "email" -> pure $ Just $ T.pack $ strContent e + case qName $ elName e of + "first-name" -> pure $ Just $ strContent e + "middle-name" -> pure $ Just $ strContent e + "last-name" -> pure $ Just $ strContent e + "nickname" -> pure $ Just $ strContent e + "home-page" -> pure $ Just $ strContent e + "email" -> pure $ Just $ strContent e name -> do report $ IgnoredElement $ name <> " in author" pure Nothing @@ -238,13 +240,13 @@ parseTitleContent _ = pure Nothing parseImageElement :: PandocMonad m => Element -> FB2 m Blocks parseImageElement e = case href of - Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash $ T.pack src) title alt + Just src -> pure $ para $ imageWith (imgId, [], []) (removeHash src) title alt Nothing -> do report $ IgnoredElement " image without href" pure mempty - where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e - title = maybe "" T.pack $ findAttr (unqual "title") e - imgId = maybe "" T.pack $ findAttr (unqual "id") e + where alt = maybe mempty str $ findAttr (unqual "alt") e + title = fromMaybe "" $ findAttr (unqual "title") e + imgId = fromMaybe "" $ findAttr (unqual "id") e href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e -- | Parse @pType@ @@ -258,7 +260,7 @@ parseCite e = blockQuote . mconcat <$> mapM parseCiteChild (elChildren e) -- | Parse @citeType@ child parseCiteChild :: PandocMonad m => Element -> FB2 m Blocks parseCiteChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "p" -> para <$> parsePType e "poem" -> parsePoem e "empty-line" -> pure horizontalRule @@ -273,13 +275,13 @@ parsePoem e = mconcat <$> mapM parsePoemChild (elChildren e) parsePoemChild :: PandocMonad m => Element -> FB2 m Blocks parsePoemChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "title" -> parseTitle e "subtitle" -> parseSubtitle e "epigraph" -> parseEpigraph e "stanza" -> parseStanza e "text-author" -> para <$> parsePType e - "date" -> pure $ para $ text $ T.pack $ strContent e + "date" -> pure $ para $ text $ strContent e name -> report (UnexpectedXmlElement name "poem") $> mempty parseStanza :: PandocMonad m => Element -> FB2 m Blocks @@ -292,7 +294,7 @@ joinLineBlocks [] = [] parseStanzaChild :: PandocMonad m => Element -> FB2 m Blocks parseStanzaChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "title" -> parseTitle e "subtitle" -> parseSubtitle e "v" -> lineBlock . (:[]) <$> parsePType e @@ -302,11 +304,11 @@ parseStanzaChild e = parseEpigraph :: PandocMonad m => Element -> FB2 m Blocks parseEpigraph e = divWith (divId, ["epigraph"], []) . mconcat <$> mapM parseEpigraphChild (elChildren e) - where divId = maybe "" T.pack $ findAttr (unqual "id") e + where divId = fromMaybe "" $ findAttr (unqual "id") e parseEpigraphChild :: PandocMonad m => Element -> FB2 m Blocks parseEpigraphChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "p" -> para <$> parsePType e "poem" -> parsePoem e "cite" -> parseCite e @@ -320,7 +322,7 @@ parseAnnotation e = mconcat <$> mapM parseAnnotationChild (elChildren e) parseAnnotationChild :: PandocMonad m => Element -> FB2 m Blocks parseAnnotationChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "p" -> para <$> parsePType e "poem" -> parsePoem e "cite" -> parseCite e @@ -334,14 +336,14 @@ parseSection :: PandocMonad m => Element -> FB2 m Blocks parseSection e = do n <- gets fb2SectionLevel modify $ \st -> st{ fb2SectionLevel = n + 1 } - let sectionId = maybe "" T.pack $ findAttr (unqual "id") e + let sectionId = fromMaybe "" $ findAttr (unqual "id") e bs <- divWith (sectionId, ["section"], []) . mconcat <$> mapM parseSectionChild (elChildren e) modify $ \st -> st{ fb2SectionLevel = n } pure bs parseSectionChild :: PandocMonad m => Element -> FB2 m Blocks parseSectionChild e = - case T.pack $ qName $ elName e of + case qName $ elName e of "title" -> parseBodyChild e "epigraph" -> parseEpigraph e "image" -> parseImageElement e @@ -363,16 +365,16 @@ parseStyleType e = mconcat <$> mapM parseInline (elContent e) parseNamedStyle :: PandocMonad m => Element -> FB2 m Inlines parseNamedStyle e = do content <- mconcat <$> mapM parseNamedStyleChild (elContent e) - let lang = maybeToList $ ("lang",) . T.pack <$> findAttr (QName "lang" Nothing (Just "xml")) e + let lang = maybeToList $ ("lang",) <$> findAttr (QName "lang" Nothing (Just "xml")) e case findAttr (unqual "name") e of - Just name -> pure $ spanWith ("", [T.pack name], lang) content + Just name -> pure $ spanWith ("", [name], lang) content Nothing -> do report $ IgnoredElement "link without required name" pure mempty parseNamedStyleChild :: PandocMonad m => Content -> FB2 m Inlines parseNamedStyleChild (Elem e) = - case T.pack $ qName (elName e) of + case qName (elName e) of "strong" -> strong <$> parseStyleType e "emphasis" -> emph <$> parseStyleType e "style" -> parseNamedStyle e @@ -380,7 +382,7 @@ parseNamedStyleChild (Elem e) = "strikethrough" -> strikeout <$> parseStyleType e "sub" -> subscript <$> parseStyleType e "sup" -> superscript <$> parseStyleType e - "code" -> pure $ code $ T.pack $ strContent e + "code" -> pure $ code $ strContent e "image" -> parseInlineImageElement e name -> do report $ IgnoredElement $ name <> " in style" @@ -392,7 +394,7 @@ parseLinkType :: PandocMonad m => Element -> FB2 m Inlines parseLinkType e = do content <- mconcat <$> mapM parseStyleLinkType (elContent e) notes <- gets fb2Notes - case T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of + case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of Just href -> case findAttr (QName "type" Nothing Nothing) e of Just "note" -> case M.lookup href notes of Nothing -> pure $ link href "" content @@ -419,15 +421,14 @@ parseTable _ = pure mempty -- TODO: tables are not supported yet -- | Parse @title-infoType@ parseTitleInfoChild :: PandocMonad m => Element -> FB2 m () parseTitleInfoChild e = - case T.pack $ qName (elName e) of + case qName (elName e) of "genre" -> pure () "author" -> parseAuthor e >>= \author -> modify (\st -> st {fb2Authors = author:fb2Authors st}) - "book-title" -> modify (setMeta "title" (text $ T.pack $ strContent e)) + "book-title" -> modify (setMeta "title" (text $ strContent e)) "annotation" -> parseAnnotation e >>= modify . setMeta "abstract" "keywords" -> modify (setMeta "keywords" (map (MetaString . trim) $ T.splitOn "," - $ T.pack $ strContent e)) - "date" -> modify (setMeta "date" (text $ T.pack $ strContent e)) + "date" -> modify (setMeta "date" (text $ strContent e)) "coverpage" -> parseCoverPage e "lang" -> pure () "src-lang" -> pure () @@ -441,7 +442,7 @@ parseCoverPage e = Just img -> case href of Just src -> modify (setMeta "cover-image" (MetaString $ removeHash src)) Nothing -> pure () - where href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img + where href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) img Nothing -> pure () -- | Parse @inlineImageType@ element @@ -454,5 +455,5 @@ parseInlineImageElement e = Nothing -> do report $ IgnoredElement "inline image without href" pure mempty - where alt = maybe mempty (str . T.pack) $ findAttr (unqual "alt") e - href = T.pack <$> findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e + where alt = maybe mempty str $ findAttr (unqual "alt") e + href = findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index dfd343b7a..5353f2001 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -16,7 +16,7 @@ module Text.Pandoc.Readers.JATS ( readJATS ) where import Control.Monad.State.Strict import Control.Monad.Except (throwError) import Text.Pandoc.Error (PandocError(..)) -import Data.Char (isDigit, isSpace, toUpper) +import Data.Char (isDigit, isSpace) import Data.Default import Data.Generics import Data.List (foldl', intersperse) @@ -31,8 +31,7 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Options import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces) import Text.TeXMath (readMathML, writeTeX) -import Text.XML.Light -import Text.Pandoc.XMLParser (parseXMLContents) +import Text.Pandoc.XML.Light import qualified Data.Set as S (fromList, member) import Data.Set ((\\)) @@ -67,29 +66,29 @@ normalizeTree = everywhere (mkT go) where go :: [Content] -> [Content] go (Text (CData CDataRaw _ _):xs) = xs go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) = - Text (CData CDataText (s1 ++ s2) z):xs + Text (CData CDataText (s1 <> s2) z):xs go (Text (CData CDataText s1 z):CRef r:xs) = - Text (CData CDataText (s1 ++ convertEntity r) z):xs + Text (CData CDataText (s1 <> convertEntity r) z):xs go (CRef r:Text (CData CDataText s1 z):xs) = - Text (CData CDataText (convertEntity r ++ s1) z):xs + Text (CData CDataText (convertEntity r <> s1) z):xs go (CRef r1:CRef r2:xs) = - Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs + Text (CData CDataText (convertEntity r1 <> convertEntity r2) Nothing):xs go xs = xs -convertEntity :: String -> String -convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) +convertEntity :: Text -> Text +convertEntity e = maybe (T.toUpper e) T.pack (lookupEntity $ T.unpack e) -- convenience function to get an attribute value, defaulting to "" -attrValue :: String -> Element -> Text +attrValue :: Text -> Element -> Text attrValue attr = fromMaybe "" . maybeAttrValue attr -maybeAttrValue :: String -> Element -> Maybe Text +maybeAttrValue :: Text -> Element -> Maybe Text maybeAttrValue attr elt = - T.pack <$> lookupAttrBy (\x -> qName x == attr) (elAttribs elt) + lookupAttrBy (\x -> qName x == attr) (elAttribs elt) -- convenience function -named :: String -> Element -> Bool +named :: Text -> Element -> Bool named s e = qName (elName e) == s -- @@ -155,10 +154,10 @@ getBlocks e = mconcat <$> parseBlock :: PandocMonad m => Content -> JATS m Blocks parseBlock (Text (CData CDataRaw _ _)) = return mempty -- DOCTYPE -parseBlock (Text (CData _ s _)) = if all isSpace s +parseBlock (Text (CData _ s _)) = if T.all isSpace s then return mempty - else return $ plain $ trimInlines $ text $ T.pack s -parseBlock (CRef x) = return $ plain $ str $ T.toUpper $ T.pack x + else return $ plain $ trimInlines $ text s +parseBlock (CRef x) = return $ plain $ str $ T.toUpper x parseBlock (Elem e) = case qName (elName e) of "p" -> parseMixed para (elContent e) @@ -207,7 +206,7 @@ parseBlock (Elem e) = "" -> [] x -> [x] return $ codeBlockWith (attrValue "id" e, classes', []) - $ trimNl $ textContentRecursive e + $ trimNl $ strContentRecursive e parseBlockquote = do attrib <- case filterChild (named "attribution") e of Nothing -> return mempty @@ -271,7 +270,7 @@ parseBlock (Elem e) = Just "center" -> AlignCenter _ -> AlignDefault let toWidth c = do - w <- findAttrText (unqual "colwidth") c + w <- findAttr (unqual "colwidth") c n <- safeRead $ "0" <> T.filter (\x -> isDigit x || x == '.') w if n > 0 then Just n else Nothing let numrows = foldl' max 0 $ map length bodyrows @@ -442,16 +441,10 @@ parseRef e = do Nothing -> return $ Map.insert "id" (toMetaValue refId) mempty -- TODO handle mixed-citation -findAttrText :: QName -> Element -> Maybe Text -findAttrText x = fmap T.pack . findAttr x - textContent :: Element -> Text -textContent = T.pack . strContent - -textContentRecursive :: Element -> Text -textContentRecursive = T.pack . strContentRecursive +textContent = strContent -strContentRecursive :: Element -> String +strContentRecursive :: Element -> Text strContentRecursive = strContent . (\e' -> e'{ elContent = map elementToStr $ elContent e' }) @@ -460,9 +453,8 @@ elementToStr (Elem e') = Text $ CData CDataText (strContentRecursive e') Nothing elementToStr x = x parseInline :: PandocMonad m => Content -> JATS m Inlines -parseInline (Text (CData _ s _)) = return $ text $ T.pack s -parseInline (CRef ref) = - return . text . maybe (T.toUpper $ T.pack ref) T.pack $ lookupEntity ref +parseInline (Text (CData _ s _)) = return $ text s +parseInline (CRef ref) = return . text . convertEntity $ ref parseInline (Elem e) = case qName (elName e) of "italic" -> innerInlines emph @@ -507,9 +499,9 @@ parseInline (Elem e) = else linkWith attr ("#" <> rid) "" ils "ext-link" -> do ils <- innerInlines id - let title = fromMaybe "" $ findAttrText (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e + let title = fromMaybe "" $ findAttr (QName "title" (Just "http://www.w3.org/1999/xlink") Nothing) e let href = case findAttr (QName "href" (Just "http://www.w3.org/1999/xlink") Nothing) e of - Just h -> T.pack h + Just h -> h _ -> "#" <> attrValue "rid" e let ils' = if ils == mempty then str href else ils let attr = (attrValue "id" e, [], []) @@ -529,7 +521,7 @@ parseInline (Elem e) = where innerInlines f = extractSpaces f . mconcat <$> mapM parseInline (elContent e) mathML x = - case readMathML . T.pack . showElement $ everywhere (mkT removePrefix) x of + case readMathML . showElement $ everywhere (mkT removePrefix) x of Left _ -> mempty Right m -> writeTeX m formula constructor = do @@ -547,4 +539,4 @@ parseInline (Elem e) = let classes' = case attrValue "language" e of "" -> [] l -> [l] - return $ codeWith (attrValue "id" e,classes',[]) $ textContentRecursive e + return $ codeWith (attrValue "id" e,classes',[]) $ strContentRecursive e diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index bdadc4dd9..184d5a63f 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -13,7 +13,6 @@ Conversion of OPML to 'Pandoc' document. module Text.Pandoc.Readers.OPML ( readOPML ) where import Control.Monad.State.Strict -import Data.Char (toUpper) import Data.Default import Data.Generics import Data.Maybe (fromMaybe) @@ -28,8 +27,7 @@ import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Readers.HTML (readHtml) import Text.Pandoc.Readers.Markdown (readMarkdown) import Text.Pandoc.Shared (crFilter, blocksToInlines') -import Text.XML.Light -import Text.Pandoc.XMLParser (parseXMLContents) +import Text.Pandoc.XML.Light import Control.Monad.Except (throwError) type OPML m = StateT OPMLState m @@ -69,25 +67,22 @@ normalizeTree = everywhere (mkT go) where go :: [Content] -> [Content] go (Text (CData CDataRaw _ _):xs) = xs go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) = - Text (CData CDataText (s1 ++ s2) z):xs + Text (CData CDataText (s1 <> s2) z):xs go (Text (CData CDataText s1 z):CRef r:xs) = - Text (CData CDataText (s1 ++ convertEntity r) z):xs + Text (CData CDataText (s1 <> convertEntity r) z):xs go (CRef r:Text (CData CDataText s1 z):xs) = - Text (CData CDataText (convertEntity r ++ s1) z):xs + Text (CData CDataText (convertEntity r <> s1) z):xs go (CRef r1:CRef r2:xs) = - Text (CData CDataText (convertEntity r1 ++ convertEntity r2) Nothing):xs + Text (CData CDataText (convertEntity r1 <> convertEntity r2) Nothing):xs go xs = xs -convertEntity :: String -> String -convertEntity e = Data.Maybe.fromMaybe (map toUpper e) (lookupEntity e) +convertEntity :: Text -> Text +convertEntity e = maybe (T.toUpper e) T.pack (lookupEntity (T.unpack e)) -- convenience function to get an attribute value, defaulting to "" -attrValue :: String -> Element -> Text +attrValue :: Text -> Element -> Text attrValue attr elt = - maybe "" T.pack (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) - -textContent :: Element -> Text -textContent = T.pack . strContent + fromMaybe "" (lookupAttrBy (\x -> qName x == attr) (elAttribs elt)) -- exceptT :: PandocMonad m => Either PandocError a -> OPML m a -- exceptT = either throwError return @@ -111,11 +106,11 @@ parseBlock :: PandocMonad m => Content -> OPML m Blocks parseBlock (Elem e) = case qName (elName e) of "ownerName" -> mempty <$ modify (\st -> - st{opmlDocAuthors = [text $ textContent e]}) + st{opmlDocAuthors = [text $ strContent e]}) "dateModified" -> mempty <$ modify (\st -> - st{opmlDocDate = text $ textContent e}) + st{opmlDocDate = text $ strContent e}) "title" -> mempty <$ modify (\st -> - st{opmlDocTitle = text $ textContent e}) + st{opmlDocTitle = text $ strContent e}) "outline" -> gets opmlSectionLevel >>= sect . (+1) "?xml" -> return mempty _ -> getBlocks e diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs index 85308deb1..c274b6fd4 100644 --- a/src/Text/Pandoc/Readers/Odt.hs +++ b/src/Text/Pandoc/Readers/Odt.hs @@ -14,8 +14,7 @@ Entry point to the odt reader. module Text.Pandoc.Readers.Odt ( readOdt ) where import Codec.Archive.Zip -import qualified Text.XML.Light as XML -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light import qualified Data.ByteString.Lazy as B @@ -91,7 +90,7 @@ archiveToOdt archive = do -- -entryToXmlElem :: Entry -> Either PandocError XML.Element +entryToXmlElem :: Entry -> Either PandocError Element entryToXmlElem entry = case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of Right x -> Right x diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs index 43c44e7e9..df90880fa 100644 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs @@ -29,14 +29,14 @@ import Control.Monad ((<=<)) import qualified Data.ByteString.Lazy as B import Data.Foldable (fold) -import Data.List (find, stripPrefix) +import Data.List (find) import qualified Data.Map as M import qualified Data.Text as T import Data.Maybe import Data.Semigroup (First(..), Option(..)) import Text.TeXMath (readMathML, writeTeX) -import qualified Text.XML.Light as XML +import qualified Text.Pandoc.XML.Light as XML import Text.Pandoc.Builder hiding (underline) import Text.Pandoc.MediaBag (MediaBag, insertMedia) @@ -557,7 +557,7 @@ read_plain_text = fst ^&&& read_plain_text' >>% recover >>?% mappend -- extractText :: XML.Content -> Fallible T.Text - extractText (XML.Text cData) = succeedWith (T.pack $ XML.cdData cData) + extractText (XML.Text cData) = succeedWith (XML.cdData cData) extractText _ = failEmpty read_text_seq :: InlineMatcher @@ -777,14 +777,14 @@ read_frame_img = "" -> returnV mempty -< () src' -> do let exts = extensionsFromList [Ext_auto_identifiers] - resource <- lookupResource -< src' + resource <- lookupResource -< T.unpack src' _ <- updateMediaWithResource -< resource w <- findAttrText' NsSVG "width" -< () h <- findAttrText' NsSVG "height" -< () titleNodes <- matchChildContent' [ read_frame_title ] -< () alt <- matchChildContent [] read_plain_text -< () arr (firstMatch . uncurry4 imageWith) -< - (image_attributes w h, T.pack src', inlineListToIdentifier exts (toList titleNodes), alt) + (image_attributes w h, src', inlineListToIdentifier exts (toList titleNodes), alt) read_frame_title :: InlineMatcher read_frame_title = matchingElement NsSVG "title" (matchChildContent [] read_plain_text) @@ -804,7 +804,8 @@ read_frame_mathml = case fold src of "" -> returnV mempty -< () src' -> do - let path = fromMaybe src' (stripPrefix "./" src') ++ "/content.xml" + let path = T.unpack $ + fromMaybe src' (T.stripPrefix "./" src') <> "/content.xml" (_, mathml) <- lookupResource -< path case readMathML (UTF8.toText $ B.toStrict mathml) of Left _ -> returnV mempty -< () diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs index 77174c793..78a7fc0b2 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs @@ -14,9 +14,10 @@ typesafe Haskell namespace identifiers and unsafe "real world" namespaces. module Text.Pandoc.Readers.Odt.Generic.Namespaces where import qualified Data.Map as M +import Data.Text (Text) -- -type NameSpaceIRI = String +type NameSpaceIRI = Text -- type NameSpaceIRIs nsID = M.Map nsID NameSpaceIRI diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs index 6dc56a0d9..edefe3c70 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs @@ -20,7 +20,6 @@ module Text.Pandoc.Readers.Odt.Generic.Utils , reverseComposition , tryToRead , Lookupable(..) -, readLookupables , readLookupable , readPercent , findBy @@ -30,11 +29,11 @@ module Text.Pandoc.Readers.Odt.Generic.Utils import Control.Category (Category, (<<<), (>>>)) import qualified Control.Category as Cat (id) -import Control.Monad (msum) - +import Data.Char (isSpace) import qualified Data.Foldable as F (Foldable, foldr) import Data.Maybe - +import Data.Text (Text) +import qualified Data.Text as T -- | Equivalent to -- > foldr (.) id @@ -76,8 +75,8 @@ swing = flip.(.flip id) -- (nobody wants that) while the latter returns "to much" for simple purposes. -- This function instead applies 'reads' and returns the first match (if any) -- in a 'Maybe'. -tryToRead :: (Read r) => String -> Maybe r -tryToRead = reads >>> listToMaybe >>> fmap fst +tryToRead :: (Read r) => Text -> Maybe r +tryToRead = (reads . T.unpack) >>> listToMaybe >>> fmap fst -- | A version of 'reads' that requires a '%' sign after the number readPercent :: ReadS Int @@ -88,26 +87,12 @@ readPercent s = [ (i,s') | (i , r ) <- reads s -- | Data that can be looked up. -- This is mostly a utility to read data with kind *. class Lookupable a where - lookupTable :: [(String, a)] - --- | The idea is to use this function as if there was a declaration like --- --- > instance (Lookupable a) => (Read a) where --- > readsPrec _ = readLookupables --- . --- But including this code in this form would need UndecideableInstances. --- That is a bad idea. Luckily 'readLookupable' (without the s at the end) --- can be used directly in almost any case. -readLookupables :: (Lookupable a) => String -> [(a,String)] -readLookupables s = [ (a,rest) | (word,rest) <- lex s, - a <- maybeToList (lookup word lookupTable) - ] + lookupTable :: [(Text, a)] -- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer. -readLookupable :: (Lookupable a) => String -> Maybe a -readLookupable s = msum - $ map ((`lookup` lookupTable).fst) - $ lex s +readLookupable :: (Lookupable a) => Text -> Maybe a +readLookupable s = + lookup (T.takeWhile (not . isSpace) $ T.dropWhile isSpace s) lookupTable uncurry3 :: (a->b->c -> z) -> (a,b,c ) -> z uncurry4 :: (a->b->c->d -> z) -> (a,b,c,d ) -> z diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs index 00c636a0d..0d921e23b 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE LambdaCase #-} @@ -60,11 +61,11 @@ import Control.Arrow import Data.Bool ( bool ) import Data.Either ( rights ) import qualified Data.Map as M -import qualified Data.Text as T +import Data.Text (Text) import Data.Default import Data.Maybe -import qualified Text.XML.Light as XML +import qualified Text.Pandoc.XML.Light as XML import Text.Pandoc.Readers.Odt.Arrows.State import Text.Pandoc.Readers.Odt.Arrows.Utils @@ -78,13 +79,13 @@ import Text.Pandoc.Readers.Odt.Generic.Fallible -------------------------------------------------------------------------------- -- -type ElementName = String -type AttributeName = String -type AttributeValue = String -type TextAttributeValue = T.Text +type ElementName = Text +type AttributeName = Text +type AttributeValue = Text +type TextAttributeValue = Text -- -type NameSpacePrefix = String +type NameSpacePrefix = Text -- type NameSpacePrefixes nsID = M.Map nsID NameSpacePrefix @@ -461,7 +462,7 @@ lookupDefaultingAttr :: (NameSpaceID nsID, Lookupable a, Default a) lookupDefaultingAttr nsID attrName = lookupAttrWithDefault nsID attrName def --- | Return value as a (Maybe String) +-- | Return value as a (Maybe Text) findAttr' :: (NameSpaceID nsID) => nsID -> AttributeName -> XMLConverter nsID extraState x (Maybe AttributeValue) @@ -477,7 +478,6 @@ findAttrText' nsID attrName = qualifyName nsID attrName &&& getCurrentElement >>% XML.findAttr - >>^ fmap T.pack -- | Return value as string or fail findAttr :: (NameSpaceID nsID) @@ -492,7 +492,6 @@ findAttrText :: (NameSpaceID nsID) -> FallibleXMLConverter nsID extraState x TextAttributeValue findAttrText nsID attrName = findAttr' nsID attrName - >>^ fmap T.pack >>> maybeToChoice -- | Return value as string or return provided default value @@ -511,7 +510,7 @@ findAttrTextWithDefault :: (NameSpaceID nsID) -> XMLConverter nsID extraState x TextAttributeValue findAttrTextWithDefault nsID attrName deflt = findAttr' nsID attrName - >>^ maybe deflt T.pack + >>^ fromMaybe deflt -- | Read and return value or fail readAttr :: (NameSpaceID nsID, Read attrValue) @@ -748,7 +747,7 @@ matchContent lookups fallback -- Internals -------------------------------------------------------------------------------- -stringToBool' :: String -> Maybe Bool +stringToBool' :: Text -> Maybe Bool stringToBool' val | val `elem` trueValues = Just True | val `elem` falseValues = Just False | otherwise = Nothing diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Namespaces.hs index 3a24a1162..70741c28d 100644 --- a/src/Text/Pandoc/Readers/Odt/Namespaces.hs +++ b/src/Text/Pandoc/Readers/Odt/Namespaces.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Reader.Odt.Namespaces Copyright : Copyright (C) 2015 Martin Linnemann @@ -13,10 +14,10 @@ Namespaces used in odt files. module Text.Pandoc.Readers.Odt.Namespaces ( Namespace (..) ) where -import Data.List (isPrefixOf) import qualified Data.Map as M (empty, insert) import Data.Maybe (fromMaybe, listToMaybe) - +import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Readers.Odt.Generic.Namespaces @@ -30,7 +31,7 @@ instance NameSpaceID Namespace where findID :: NameSpaceIRI -> Maybe Namespace -findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `isPrefixOf` iri] +findID iri = listToMaybe [nsID | (iri',nsID) <- nsIDs, iri' `T.isPrefixOf` iri] nsIDmap :: NameSpaceIRIs Namespace nsIDmap = foldr (uncurry $ flip M.insert) M.empty nsIDs @@ -54,12 +55,12 @@ data Namespace = -- Open Document core -- Core XML (basically only for the 'id'-attribute) | NsXML -- Fallback - | NsOther String + | NsOther Text deriving ( Eq, Ord, Show ) -- | Not the actual iri's, but large prefixes of them - this way there are -- less versioning problems and the like. -nsIDs :: [(String,Namespace)] +nsIDs :: [(Text, Namespace)] nsIDs = [ ("urn:oasis:names:tc:opendocument:xmlns:animation" , NsAnim ), ("urn:oasis:names:tc:opendocument:xmlns:chart" , NsChart ), diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs index 46a777df1..5e10f896c 100644 --- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs +++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs @@ -2,6 +2,7 @@ {-# LANGUAGE Arrows #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Odt.StyleReader Copyright : Copyright (C) 2015 Martin Linnemann @@ -46,11 +47,13 @@ import qualified Data.Foldable as F import Data.List (unfoldr) import qualified Data.Map as M import Data.Maybe +import Data.Text (Text) +import qualified Data.Text as T import qualified Data.Set as S -import qualified Text.XML.Light as XML +import qualified Text.Pandoc.XML.Light as XML -import Text.Pandoc.Shared (safeRead) +import Text.Pandoc.Shared (safeRead, tshow) import Text.Pandoc.Readers.Odt.Arrows.Utils @@ -90,7 +93,7 @@ instance Default FontPitch where -- -- Thus, we want -type FontFaceName = String +type FontFaceName = Text type FontPitches = M.Map FontFaceName FontPitch @@ -151,7 +154,7 @@ findPitch = ( lookupAttr NsStyle "font-pitch" -- Definitions of main data -------------------------------------------------------------------------------- -type StyleName = String +type StyleName = Text -- | There are two types of styles: named styles with a style family and an -- optional style parent, and default styles for each style family, @@ -355,8 +358,8 @@ getListLevelStyle level ListStyle{..} = -- \^ simpler, but in general less efficient data ListLevelStyle = ListLevelStyle { listLevelType :: ListLevelType - , listItemPrefix :: Maybe String - , listItemSuffix :: Maybe String + , listItemPrefix :: Maybe Text + , listItemSuffix :: Maybe Text , listItemFormat :: ListItemNumberFormat , listItemStart :: Int } @@ -366,9 +369,9 @@ instance Show ListLevelStyle where show ListLevelStyle{..} = "<LLS|" ++ show listLevelType ++ "|" - ++ maybeToString listItemPrefix + ++ maybeToString (T.unpack <$> listItemPrefix) ++ show listItemFormat - ++ maybeToString listItemSuffix + ++ maybeToString (T.unpack <$> listItemSuffix) ++ ">" where maybeToString = fromMaybe "" @@ -471,7 +474,7 @@ readTextProperties = ) where isFontEmphasised = [("normal",False),("italic",True),("oblique",True)] isFontBold = ("normal",False):("bold",True) - :map ((,True).show) ([100,200..900]::[Int]) + :map ((,True) . tshow) ([100,200..900]::[Int]) readUnderlineMode :: StyleReaderSafe _x (Maybe UnderlineMode) readUnderlineMode = readLineMode "text-underline-mode" @@ -481,7 +484,7 @@ readStrikeThroughMode :: StyleReaderSafe _x (Maybe UnderlineMode) readStrikeThroughMode = readLineMode "text-line-through-mode" "text-line-through-style" -readLineMode :: String -> String -> StyleReaderSafe _x (Maybe UnderlineMode) +readLineMode :: Text -> Text -> StyleReaderSafe _x (Maybe UnderlineMode) readLineMode modeAttr styleAttr = proc x -> do isUL <- searchAttr NsStyle styleAttr False isLinePresent -< x mode <- lookupAttr' NsStyle modeAttr -< x diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index da990e4d3..89c71d773 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -31,6 +31,7 @@ import qualified Data.Map as M import Data.Maybe (fromMaybe, isNothing, mapMaybe, maybeToList) import qualified Data.Set as Set import qualified Data.Text as T +import Data.Text (Text) import qualified Data.Text.Lazy as TL import Data.Time.Clock.POSIX import Data.Digest.Pure.SHA (sha1, showDigest) @@ -57,19 +58,19 @@ import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared import Text.Printf (printf) import Text.TeXMath -import Text.XML.Light as XML -import Text.XML.Light.Cursor as XMLC import Text.Pandoc.Writers.OOXML +import Text.Pandoc.XML.Light as XML +import Data.Generics (mkT, everywhere) data ListMarker = NoMarker | BulletMarker | NumberMarker ListNumberStyle ListNumberDelim Int deriving (Show, Read, Eq, Ord) -listMarkerToId :: ListMarker -> String +listMarkerToId :: ListMarker -> Text listMarkerToId NoMarker = "990" listMarkerToId BulletMarker = "991" -listMarkerToId (NumberMarker sty delim n) = +listMarkerToId (NumberMarker sty delim n) = T.pack $ '9' : '9' : styNum : delimNum : show n where styNum = case sty of DefaultStyle -> '2' @@ -106,8 +107,8 @@ data WriterEnv = WriterEnv{ envTextProperties :: EnvProps , envListLevel :: Int , envListNumId :: Int , envInDel :: Bool - , envChangesAuthor :: T.Text - , envChangesDate :: T.Text + , envChangesAuthor :: Text + , envChangesDate :: Text , envPrintWidth :: Integer } @@ -125,9 +126,9 @@ defaultWriterEnv = WriterEnv{ envTextProperties = mempty data WriterState = WriterState{ stFootnotes :: [Element] - , stComments :: [([(T.Text, T.Text)], [Inline])] - , stSectionIds :: Set.Set T.Text - , stExternalLinks :: M.Map String String + , stComments :: [([(Text, Text)], [Inline])] + , stSectionIds :: Set.Set Text + , stExternalLinks :: M.Map Text Text , stImages :: M.Map FilePath (String, String, Maybe MimeType, B.ByteString) , stLists :: [ListMarker] , stInsId :: Int @@ -164,18 +165,18 @@ defaultWriterState = WriterState{ type WS m = ReaderT WriterEnv (StateT WriterState m) -renumIdMap :: Int -> [Element] -> M.Map String String +renumIdMap :: Int -> [Element] -> M.Map Text Text renumIdMap _ [] = M.empty renumIdMap n (e:es) | Just oldId <- findAttr (QName "Id" Nothing Nothing) e = - M.insert oldId ("rId" ++ show n) (renumIdMap (n+1) es) + M.insert oldId ("rId" <> tshow n) (renumIdMap (n+1) es) | otherwise = renumIdMap n es -replaceAttr :: (QName -> Bool) -> String -> [XML.Attr] -> [XML.Attr] +replaceAttr :: (QName -> Bool) -> Text -> [XML.Attr] -> [XML.Attr] replaceAttr f val = map $ \a -> if f (attrKey a) then XML.Attr (attrKey a) val else a -renumId :: (QName -> Bool) -> M.Map String String -> Element -> Element +renumId :: (QName -> Bool) -> M.Map Text Text -> Element -> Element renumId f renumMap e | Just oldId <- findAttrBy f e , Just newId <- M.lookup oldId renumMap = @@ -184,18 +185,12 @@ renumId f renumMap e e { elAttribs = attrs' } | otherwise = e -renumIds :: (QName -> Bool) -> M.Map String String -> [Element] -> [Element] +renumIds :: (QName -> Bool) -> M.Map Text Text -> [Element] -> [Element] renumIds f renumMap = map (renumId f renumMap) -findAttrTextBy :: (QName -> Bool) -> Element -> Maybe T.Text -findAttrTextBy x = fmap T.pack . findAttrBy x - -lookupAttrTextBy :: (QName -> Bool) -> [XML.Attr] -> Maybe T.Text -lookupAttrTextBy x = fmap T.pack . lookupAttrBy x - -- | Certain characters are invalid in XML even if escaped. -- See #1992 -stripInvalidChars :: T.Text -> T.Text +stripInvalidChars :: Text -> Text stripInvalidChars = T.filter isValidChar -- | See XML reference @@ -234,11 +229,11 @@ writeDocx opts doc = do -- Gets the template size let mbpgsz = mbsectpr >>= filterElementName (wname (=="pgSz")) - let mbAttrSzWidth = mbpgsz >>= lookupAttrTextBy ((=="w") . qName) . elAttribs + let mbAttrSzWidth = mbpgsz >>= lookupAttrBy ((=="w") . qName) . elAttribs let mbpgmar = mbsectpr >>= filterElementName (wname (=="pgMar")) - let mbAttrMarLeft = mbpgmar >>= lookupAttrTextBy ((=="left") . qName) . elAttribs - let mbAttrMarRight = mbpgmar >>= lookupAttrTextBy ((=="right") . qName) . elAttribs + let mbAttrMarLeft = mbpgmar >>= lookupAttrBy ((=="left") . qName) . elAttribs + let mbAttrMarRight = mbpgmar >>= lookupAttrBy ((=="right") . qName) . elAttribs -- Get the available area (converting the size and the margins to int and -- doing the difference @@ -250,24 +245,21 @@ writeDocx opts doc = do -- styles mblang <- toLang $ getLang opts meta + -- TODO FIXME avoid this generic traversal! + -- lang is in w:docDefaults / w:rPr / w:lang let addLang :: Element -> Element - addLang e = case (\l -> XMLC.toTree . go (T.unpack $ renderLang l) $ - XMLC.fromElement e) <$> mblang of - Just (Elem e') -> e' - _ -> e -- return original - where go :: String -> Cursor -> Cursor - go l cursor = case XMLC.findRec (isLangElt . current) cursor of - Nothing -> cursor - Just t -> XMLC.modifyContent (setval l) t - setval :: String -> Content -> Content - setval l (Elem e') = Elem $ e'{ elAttribs = map (setvalattr l) $ - elAttribs e' } - setval _ x = x - setvalattr :: String -> XML.Attr -> XML.Attr - setvalattr l (XML.Attr qn@(QName "val" _ _) _) = XML.Attr qn l - setvalattr _ x = x - isLangElt (Elem e') = qName (elName e') == "lang" - isLangElt _ = False + addLang = case mblang of + Nothing -> id + Just l -> everywhere (mkT (go (renderLang l))) + where + go :: Text -> Element -> Element + go l e' + | qName (elName e') == "lang" + = e'{ elAttribs = map (setvalattr l) $ elAttribs e' } + | otherwise = e' + + setvalattr l (XML.Attr qn@(QName "val" _ _) _) = XML.Attr qn l + setvalattr _ x = x let stylepath = "word/styles.xml" styledoc <- addLang <$> parseXml refArchive distArchive stylepath @@ -337,12 +329,13 @@ writeDocx opts doc = do -- [Content_Types].xml let mkOverrideNode (part', contentType') = mknode "Override" - [("PartName",part'),("ContentType",contentType')] () + [("PartName", T.pack part') + ,("ContentType", contentType')] () let mkImageOverride (_, imgpath, mbMimeType, _) = - mkOverrideNode ("/word/" ++ imgpath, - maybe "application/octet-stream" T.unpack mbMimeType) + mkOverrideNode ("/word/" <> imgpath, + fromMaybe "application/octet-stream" mbMimeType) let mkMediaOverride imgpath = - mkOverrideNode ('/':imgpath, T.unpack $ getMimeTypeDef imgpath) + mkOverrideNode ("/" <> imgpath, getMimeTypeDef imgpath) let overrides = map mkOverrideNode ( [("/word/webSettings.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml") @@ -369,13 +362,14 @@ writeDocx opts doc = do ,("/word/footnotes.xml", "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml") ] ++ - map (\x -> (maybe "" ("/word/" ++) $ extractTarget x, + map (\x -> (maybe "" (T.unpack . ("/word/" <>)) (extractTarget x), "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml")) headers ++ - map (\x -> (maybe "" ("/word/" ++) $ extractTarget x, + map (\x -> (maybe "" (T.unpack . ("/word/" <>)) (extractTarget x), "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml")) footers) ++ map mkImageOverride imgs ++ - [ mkMediaOverride (eRelativePath e) | e <- zEntries refArchive - , "word/media/" `isPrefixOf` eRelativePath e ] + [ mkMediaOverride (eRelativePath e) + | e <- zEntries refArchive + , "word/media/" `isPrefixOf` eRelativePath e ] let defaultnodes = [mknode "Default" [("Extension","xml"),("ContentType","application/xml")] (), @@ -421,7 +415,7 @@ writeDocx opts doc = do let renumHeaders = renumIds (\q -> qName q == "Id") idMap headers let renumFooters = renumIds (\q -> qName q == "Id") idMap footers let baserels = baserels' ++ renumHeaders ++ renumFooters - let toImgRel (ident,path,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",ident),("Target",path)] () + let toImgRel (ident,path,_,_) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"),("Id",T.pack ident),("Target",T.pack path)] () let imgrels = map toImgRel imgs let toLinkRel (src,ident) = mknode "Relationship" [("Type","http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"),("Id",ident),("Target",src),("TargetMode","External") ] () let linkrels = map toLinkRel $ M.toList $ stExternalLinks st @@ -489,10 +483,10 @@ writeDocx opts doc = do numbering <- parseXml refArchive distArchive numpath let newNumElts = mkNumbering (stLists st) let pandocAdded e = - case findAttrTextBy ((== "abstractNumId") . qName) e >>= safeRead of + case findAttrBy ((== "abstractNumId") . qName) e >>= safeRead of Just numid -> numid >= (990 :: Int) Nothing -> - case findAttrTextBy ((== "numId") . qName) e >>= safeRead of + case findAttrBy ((== "numId") . qName) e >>= safeRead of Just numid -> numid >= (1000 :: Int) Nothing -> False let oldElts = filter (not . pandocAdded) $ onlyElems (elContent numbering) @@ -514,7 +508,7 @@ writeDocx opts doc = do let extraCoreProps = ["subject","lang","category","description"] let extraCorePropsMap = M.fromList $ zip extraCoreProps ["dc:subject","dc:language","cp:category","dc:description"] - let lookupMetaString' :: T.Text -> Meta -> T.Text + let lookupMetaString' :: Text -> Meta -> Text lookupMetaString' key' meta' = case key' of "description" -> T.intercalate "_x000d_\n" (map stringify $ lookupMetaBlocks "description" meta') @@ -530,21 +524,21 @@ writeDocx opts doc = do : mktnode "dc:creator" [] (T.intercalate "; " (map stringify $ docAuthors meta)) : [ mktnode (M.findWithDefault "" k extraCorePropsMap) [] (lookupMetaString' k meta) | k <- M.keys (unMeta meta), k `elem` extraCoreProps] - ++ mknode "cp:keywords" [] (T.unpack $ T.intercalate ", " keywords) + ++ mknode "cp:keywords" [] (T.intercalate ", " keywords) : (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x - ]) (formatTime defaultTimeLocale "%FT%XZ" utctime) + ]) (T.pack $ formatTime defaultTimeLocale "%FT%XZ" utctime) let docPropsEntry = toEntry docPropsPath epochtime $ renderXml docProps -- docProps/custom.xml - let customProperties :: [(String, String)] - customProperties = [ (T.unpack k, T.unpack $ lookupMetaString k meta) + let customProperties :: [(Text, Text)] + customProperties = [ (k, lookupMetaString k meta) | k <- M.keys (unMeta meta) , k `notElem` (["title", "author", "keywords"] ++ extraCoreProps)] let mkCustomProp (k, v) pid = mknode "property" [("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}") - ,("pid", show pid) + ,("pid", tshow pid) ,("name", k)] $ mknode "vt:lpwstr" [] v let customPropsPath = "docProps/custom.xml" let customProps = mknode "Properties" @@ -594,7 +588,8 @@ writeDocx opts doc = do fontTableEntry <- entryFromArchive refArchive "word/fontTable.xml" webSettingsEntry <- entryFromArchive refArchive "word/webSettings.xml" headerFooterEntries <- mapM (entryFromArchive refArchive . ("word/" ++)) $ - mapMaybe extractTarget (headers ++ footers) + mapMaybe (fmap T.unpack . extractTarget) + (headers ++ footers) let miscRelEntries = [ e | e <- zEntries refArchive , "word/_rels/" `isPrefixOf` eRelativePath e , ".xml.rels" `isSuffixOf` eRelativePath e @@ -620,8 +615,8 @@ newParaPropToOpenXml (fromStyleName -> s) = let styleId = T.filter (not . isSpace) s in mknode "w:style" [ ("w:type", "paragraph") , ("w:customStyle", "1") - , ("w:styleId", T.unpack styleId)] - [ mknode "w:name" [("w:val", T.unpack s)] () + , ("w:styleId", styleId)] + [ mknode "w:name" [("w:val", s)] () , mknode "w:basedOn" [("w:val","BodyText")] () , mknode "w:qFormat" [] () ] @@ -631,8 +626,8 @@ newTextPropToOpenXml (fromStyleName -> s) = let styleId = T.filter (not . isSpace) s in mknode "w:style" [ ("w:type", "character") , ("w:customStyle", "1") - , ("w:styleId", T.unpack styleId)] - [ mknode "w:name" [("w:val", T.unpack s)] () + , ("w:styleId", styleId)] + [ mknode "w:name" [("w:val", s)] () , mknode "w:basedOn" [("w:val","BodyTextChar")] () ] @@ -643,13 +638,14 @@ styleToOpenXml sm style = toStyle toktype | hasStyleName (fromString $ show toktype) (smCharStyle sm) = Nothing | otherwise = Just $ mknode "w:style" [("w:type","character"), - ("w:customStyle","1"),("w:styleId",show toktype)] - [ mknode "w:name" [("w:val",show toktype)] () + ("w:customStyle","1"),("w:styleId", tshow toktype)] + [ mknode "w:name" [("w:val", tshow toktype)] () , mknode "w:basedOn" [("w:val","VerbatimChar")] () , mknode "w:rPr" [] $ - [ mknode "w:color" [("w:val",tokCol toktype)] () + [ mknode "w:color" [("w:val", tokCol toktype)] () | tokCol toktype /= "auto" ] ++ - [ mknode "w:shd" [("w:val","clear"),("w:fill",tokBg toktype)] () + [ mknode "w:shd" [("w:val","clear") + ,("w:fill",tokBg toktype)] () | tokBg toktype /= "auto" ] ++ [ mknode "w:b" [] () | tokFeature tokenBold toktype ] ++ [ mknode "w:i" [] () | tokFeature tokenItalic toktype ] ++ @@ -657,10 +653,10 @@ styleToOpenXml sm style = ] tokStyles = tokenStyles style tokFeature f toktype = maybe False f $ M.lookup toktype tokStyles - tokCol toktype = maybe "auto" (drop 1 . fromColor) + tokCol toktype = maybe "auto" (T.pack . drop 1 . fromColor) $ (tokenColor =<< M.lookup toktype tokStyles) `mplus` defaultColor style - tokBg toktype = maybe "auto" (drop 1 . fromColor) + tokBg toktype = maybe "auto" (T.pack . drop 1 . fromColor) $ (tokenBackground =<< M.lookup toktype tokStyles) `mplus` backgroundColor style parStyle | hasStyleName "Source Code" (smParaStyle sm) = Nothing @@ -673,10 +669,11 @@ styleToOpenXml sm style = , mknode "w:pPr" [] $ mknode "w:wordWrap" [("w:val","off")] () : - maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill",drop 1 $ fromColor col)] ()]) (backgroundColor style) + maybe [] (\col -> [mknode "w:shd" [("w:val","clear"),("w:fill", T.pack $ drop 1 $ fromColor col)] ()]) (backgroundColor style) ] -copyChildren :: (PandocMonad m) => Archive -> Archive -> String -> Integer -> [String] -> m Entry +copyChildren :: (PandocMonad m) + => Archive -> Archive -> String -> Integer -> [Text] -> m Entry copyChildren refArchive distArchive path timestamp elNames = do ref <- parseXml refArchive distArchive path dist <- parseXml distArchive distArchive path @@ -685,7 +682,7 @@ copyChildren refArchive distArchive path timestamp elNames = do } where strName QName{qName=name, qPrefix=prefix} - | Just p <- prefix = p++":"++name + | Just p <- prefix = p <> ":" <> name | otherwise = name shouldCopy = (`elem` elNames) . strName cleanElem el@Element{elName=name} = Elem el{elName=name{qURI=Nothing}} @@ -706,35 +703,35 @@ maxListLevel = 8 mkNum :: ListMarker -> Int -> Element mkNum marker numid = - mknode "w:num" [("w:numId",show numid)] + mknode "w:num" [("w:numId",tshow numid)] $ mknode "w:abstractNumId" [("w:val",listMarkerToId marker)] () : case marker of NoMarker -> [] BulletMarker -> [] NumberMarker _ _ start -> - map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",show (lvl :: Int))] - $ mknode "w:startOverride" [("w:val",show start)] ()) + map (\lvl -> mknode "w:lvlOverride" [("w:ilvl",tshow (lvl :: Int))] + $ mknode "w:startOverride" [("w:val",tshow start)] ()) [0..maxListLevel] mkAbstractNum :: ListMarker -> Integer -> Element mkAbstractNum marker nsid = mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)] - $ mknode "w:nsid" [("w:val", printf "%8x" nsid)] () + $ mknode "w:nsid" [("w:val", T.pack $ printf "%8x" nsid)] () : mknode "w:multiLevelType" [("w:val","multilevel")] () : map (mkLvl marker) [0..maxListLevel] mkLvl :: ListMarker -> Int -> Element mkLvl marker lvl = - mknode "w:lvl" [("w:ilvl",show lvl)] $ + mknode "w:lvl" [("w:ilvl",tshow lvl)] $ [ mknode "w:start" [("w:val",start)] () | marker /= NoMarker && marker /= BulletMarker ] ++ [ mknode "w:numFmt" [("w:val",fmt)] () - , mknode "w:lvlText" [("w:val",lvltxt)] () + , mknode "w:lvlText" [("w:val", lvltxt)] () , mknode "w:lvlJc" [("w:val","left")] () , mknode "w:pPr" [] - [ mknode "w:ind" [ ("w:left",show $ lvl * step + step) - , ("w:hanging",show (hang :: Int)) + [ mknode "w:ind" [ ("w:left",tshow $ lvl * step + step) + , ("w:hanging",tshow (hang :: Int)) ] () ] ] @@ -743,8 +740,8 @@ mkLvl marker lvl = NoMarker -> ("bullet"," ","1") BulletMarker -> ("bullet",bulletFor lvl,"1") NumberMarker st de n -> (styleFor st lvl - ,patternFor de ("%" ++ show (lvl + 1)) - ,show n) + ,patternFor de ("%" <> tshow (lvl + 1)) + ,tshow n) step = 720 hang = 480 bulletFor 0 = "\x2022" -- filled circle @@ -767,9 +764,9 @@ mkLvl marker lvl = styleFor DefaultStyle 5 = "lowerRoman" styleFor DefaultStyle x = styleFor DefaultStyle (x `mod` 6) styleFor _ _ = "decimal" - patternFor OneParen s = s ++ ")" - patternFor TwoParens s = "(" ++ s ++ ")" - patternFor _ s = s ++ "." + patternFor OneParen s = s <> ")" + patternFor TwoParens s = "(" <> s <> ")" + patternFor _ s = s <> "." getNumId :: (PandocMonad m) => WS m Int getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists @@ -777,8 +774,8 @@ getNumId = (((baseListId - 1) +) . length) `fmap` gets stLists makeTOC :: (PandocMonad m) => WriterOptions -> WS m [Element] makeTOC opts = do - let depth = "1-"++show (writerTOCDepth opts) - let tocCmd = "TOC \\o \""++depth++"\" \\h \\z \\u" + let depth = "1-" <> tshow (writerTOCDepth opts) + let tocCmd = "TOC \\o \"" <> depth <> "\" \\h \\z \\u" tocTitle <- gets stTocTitle title <- withParaPropM (pStyleM "TOC Heading") (blocksToOpenXML opts [Para tocTitle]) return @@ -831,7 +828,7 @@ writeOpenXML opts (Pandoc meta blocks) = do let toComment (kvs, ils) = do annotation <- inlinesToOpenXML opts ils return $ - mknode "w:comment" [('w':':':T.unpack k,T.unpack v) | (k,v) <- kvs] + mknode "w:comment" [("w:" <> k, v) | (k,v) <- kvs] [ mknode "w:p" [] $ map Elem [ mknode "w:pPr" [] @@ -867,24 +864,24 @@ pStyleM :: (PandocMonad m) => ParaStyleName -> WS m XML.Element pStyleM styleName = do pStyleMap <- gets (smParaStyle . stStyleMaps) let sty' = getStyleIdFromName styleName pStyleMap - return $ mknode "w:pStyle" [("w:val", T.unpack $ fromStyleId sty')] () + return $ mknode "w:pStyle" [("w:val", fromStyleId sty')] () rStyleM :: (PandocMonad m) => CharStyleName -> WS m XML.Element rStyleM styleName = do cStyleMap <- gets (smCharStyle . stStyleMaps) let sty' = getStyleIdFromName styleName cStyleMap - return $ mknode "w:rStyle" [("w:val", T.unpack $ fromStyleId sty')] () + return $ mknode "w:rStyle" [("w:val", fromStyleId sty')] () -getUniqueId :: (PandocMonad m) => WS m String +getUniqueId :: (PandocMonad m) => WS m Text -- the + 20 is to ensure that there are no clashes with the rIds -- already in word/document.xml.rel getUniqueId = do n <- gets stCurId modify $ \st -> st{stCurId = n + 1} - return $ show n + return $ tshow n -- | Key for specifying user-defined docx styles. -dynamicStyleKey :: T.Text +dynamicStyleKey :: Text dynamicStyleKey = "custom-style" -- | Convert a Pandoc block element to OpenXML. @@ -979,7 +976,7 @@ blockToOpenXML' opts (Para lst) blockToOpenXML' opts (LineBlock lns) = blockToOpenXML opts $ linesToPara lns blockToOpenXML' _ b@(RawBlock format str) | format == Format "openxml" = return [ - Text (CData CDataRaw (T.unpack str) Nothing) + Text (CData CDataRaw str Nothing) ] | otherwise = do report $ BlockNotRendered b @@ -1036,7 +1033,7 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do let fullrow = 5000 -- 100% specified in pct let rowwidth = fullrow * sum widths let mkgridcol w = mknode "w:gridCol" - [("w:w", show (floor (textwidth * w) :: Integer))] () + [("w:w", tshow (floor (textwidth * w) :: Integer))] () let hasHeader = not $ all null headers modify $ \s -> s { stInTable = False } -- for compatibility with Word <= 2007, we include a val with a bitmask @@ -1054,16 +1051,16 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do mknode "w:tbl" [] ( mknode "w:tblPr" [] ( mknode "w:tblStyle" [("w:val","Table")] () : - mknode "w:tblW" [("w:type", "pct"), ("w:w", show rowwidth)] () : + mknode "w:tblW" [("w:type", "pct"), ("w:w", tshow rowwidth)] () : mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0") ,("w:lastRow","0") ,("w:firstColumn","0") ,("w:lastColumn","0") ,("w:noHBand","0") ,("w:noVBand","0") - ,("w:val", printf "%04x" tblLookVal) + ,("w:val", T.pack $ printf "%04x" tblLookVal) ] () : - [ mknode "w:tblCaption" [("w:val", T.unpack captionStr)] () + [ mknode "w:tblCaption" [("w:val", captionStr)] () | not (null caption) ] ) : mknode "w:tblGrid" [] (if all (==0) widths @@ -1126,7 +1123,7 @@ listItemToOpenXML opts numid (first:rest) = do modify $ \st -> st{ stInList = oldInList } return $ first'' ++ rest'' -alignmentToString :: Alignment -> [Char] +alignmentToString :: Alignment -> Text alignmentToString alignment = case alignment of AlignLeft -> "left" AlignRight -> "right" @@ -1169,8 +1166,8 @@ getParaProps displayMathPara = do listLevel <- asks envListLevel numid <- asks envListNumId let listPr = [mknode "w:numPr" [] - [ mknode "w:ilvl" [("w:val",show listLevel)] () - , mknode "w:numId" [("w:val",show numid)] () ] | listLevel >= 0 && not displayMathPara] + [ mknode "w:ilvl" [("w:val",tshow listLevel)] () + , mknode "w:numId" [("w:val",tshow numid)] () ] | listLevel >= 0 && not displayMathPara] return $ case listPr ++ squashProps props of [] -> [] ps -> [mknode "w:pPr" [] ps] @@ -1185,7 +1182,7 @@ withParaPropM md p = do d <- md withParaProp d p -formattedString :: PandocMonad m => T.Text -> WS m [Element] +formattedString :: PandocMonad m => Text -> WS m [Element] formattedString str = -- properly handle soft hyphens case splitTextBy (=='\173') str of @@ -1194,7 +1191,7 @@ formattedString str = sh <- formattedRun [mknode "w:softHyphen" [] ()] intercalate sh <$> mapM formattedString' ws -formattedString' :: PandocMonad m => T.Text -> WS m [Element] +formattedString' :: PandocMonad m => Text -> WS m [Element] formattedString' str = do inDel <- asks envInDel formattedRun [ mktnode (if inDel then "w:delText" else "w:t") @@ -1226,7 +1223,7 @@ inlineToOpenXML' opts (Span ("",["csl-right-inline"],[]) ils) = mknode "w:r" [] (mknode "w:t" [("xml:space","preserve")] - ("\t" :: String))] ++) + ("\t" :: Text))] ++) <$> inlinesToOpenXML opts ils inlineToOpenXML' opts (Span ("",["csl-indent"],[]) ils) = inlinesToOpenXML opts ils @@ -1236,17 +1233,17 @@ inlineToOpenXML' _ (Span (ident,["comment-start"],kvs) ils) = do let ident' = fromMaybe ident (lookup "id" kvs) kvs' = filter (("id" /=) . fst) kvs modify $ \st -> st{ stComments = (("id",ident'):kvs', ils) : stComments st } - return [ Elem $ mknode "w:commentRangeStart" [("w:id", T.unpack ident')] () ] + return [ Elem $ mknode "w:commentRangeStart" [("w:id", ident')] () ] inlineToOpenXML' _ (Span (ident,["comment-end"],kvs) _) = -- prefer the "id" in kvs, since that is the one produced by the docx -- reader. let ident' = fromMaybe ident (lookup "id" kvs) in return . map Elem $ - [ mknode "w:commentRangeEnd" [("w:id", T.unpack ident')] () + [ mknode "w:commentRangeEnd" [("w:id", ident')] () , mknode "w:r" [] [ mknode "w:rPr" [] [ mknode "w:rStyle" [("w:val", "CommentReference")] () ] - , mknode "w:commentReference" [("w:id", T.unpack ident')] () ] + , mknode "w:commentReference" [("w:id", ident')] () ] ] inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do stylemod <- case lookup dynamicStyleKey kvs of @@ -1270,8 +1267,8 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do defaultAuthor <- asks envChangesAuthor let author = fromMaybe defaultAuthor (lookup "author" kvs) let mdate = lookup "date" kvs - return $ ("w:author", T.unpack author) : - maybe [] (\date -> [("w:date", T.unpack date)]) mdate + return $ ("w:author", author) : + maybe [] (\date -> [("w:date", date)]) mdate insmod <- if "insertion" `elem` classes then do changeAuthorDate <- getChangeAuthorDate @@ -1281,7 +1278,7 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do x <- f return [Elem $ mknode "w:ins" - (("w:id", show insId) : changeAuthorDate) x] + (("w:id", tshow insId) : changeAuthorDate) x] else return id delmod <- if "deletion" `elem` classes then do @@ -1291,7 +1288,7 @@ inlineToOpenXML' opts (Span (ident,classes,kvs) ils) = do return $ \f -> local (\env->env{envInDel=True}) $ do x <- f return [Elem $ mknode "w:del" - (("w:id", show delId) : changeAuthorDate) x] + (("w:id", tshow delId) : changeAuthorDate) x] else return id contents <- insmod $ delmod $ dirmod $ stylemod $ pmod $ inlinesToOpenXML opts ils @@ -1322,7 +1319,7 @@ inlineToOpenXML' opts (Strikeout lst) = inlineToOpenXML' _ LineBreak = return [Elem br] inlineToOpenXML' _ il@(RawInline f str) | f == Format "openxml" = return - [Text (CData CDataRaw (T.unpack str) Nothing)] + [Text (CData CDataRaw str Nothing)] | otherwise = do report $ InlineNotRendered il return [] @@ -1335,7 +1332,7 @@ inlineToOpenXML' opts (Math mathType str) = do when (mathType == DisplayMath) setFirstPara res <- (lift . lift) (convertMath writeOMML mathType str) case res of - Right r -> return [Elem r] + Right r -> return [Elem $ fromXLElement r] Left il -> inlineToOpenXML' opts il inlineToOpenXML' opts (Cite _ lst) = inlinesToOpenXML opts lst inlineToOpenXML' opts (Code attrs str) = do @@ -1348,7 +1345,7 @@ inlineToOpenXML' opts (Code attrs str) = do mknode "w:r" [] [ mknode "w:rPr" [] $ maybeToList (lookup toktype tokTypesMap) - , mknode "w:t" [("xml:space","preserve")] (T.unpack tok) ] + , mknode "w:t" [("xml:space","preserve")] tok ] withTextPropM (rStyleM "Verbatim Char") $ if isNothing (writerHighlightStyle opts) then unhighlighted @@ -1365,7 +1362,7 @@ inlineToOpenXML' opts (Note bs) = do let notemarker = mknode "w:r" [] [ mknode "w:rPr" [] footnoteStyle , mknode "w:footnoteRef" [] () ] - let notemarkerXml = RawInline (Format "openxml") $ T.pack $ ppElement notemarker + let notemarkerXml = RawInline (Format "openxml") $ ppElement notemarker let insertNoteRef (Plain ils : xs) = Plain (notemarkerXml : Space : ils) : xs insertNoteRef (Para ils : xs) = Para (notemarkerXml : Space : ils) : xs insertNoteRef xs = Para [notemarkerXml] : xs @@ -1384,17 +1381,17 @@ inlineToOpenXML' opts (Note bs) = do inlineToOpenXML' opts (Link _ txt (T.uncons -> Just ('#', xs),_)) = do contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt return - [ Elem $ mknode "w:hyperlink" [("w:anchor", T.unpack $ toBookmarkName xs)] contents ] + [ Elem $ mknode "w:hyperlink" [("w:anchor", toBookmarkName xs)] contents ] -- external link: inlineToOpenXML' opts (Link _ txt (src,_)) = do contents <- withTextPropM (rStyleM "Hyperlink") $ inlinesToOpenXML opts txt extlinks <- gets stExternalLinks - id' <- case M.lookup (T.unpack src) extlinks of + id' <- case M.lookup src extlinks of Just i -> return i Nothing -> do - i <- ("rId"++) `fmap` getUniqueId + i <- ("rId" <>) <$> getUniqueId modify $ \st -> st{ stExternalLinks = - M.insert (T.unpack src) i extlinks } + M.insert src i extlinks } return i return [ Elem $ mknode "w:hyperlink" [("r:id",id')] contents ] inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do @@ -1414,17 +1411,17 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do ,("noChangeAspect","1")] () nvPicPr = mknode "pic:nvPicPr" [] [ mknode "pic:cNvPr" - [("descr",T.unpack src),("id","0"),("name","Picture")] () + [("descr",src),("id","0"),("name","Picture")] () , cNvPicPr ] blipFill = mknode "pic:blipFill" [] - [ mknode "a:blip" [("r:embed",ident)] () + [ mknode "a:blip" [("r:embed",T.pack ident)] () , mknode "a:stretch" [] $ mknode "a:fillRect" [] () ] xfrm = mknode "a:xfrm" [] [ mknode "a:off" [("x","0"),("y","0")] () - , mknode "a:ext" [("cx",show xemu) - ,("cy",show yemu)] () ] + , mknode "a:ext" [("cx",tshow xemu) + ,("cy",tshow yemu)] () ] prstGeom = mknode "a:prstGeom" [("prst","rect")] $ mknode "a:avLst" [] () ln = mknode "a:ln" [("w","9525")] @@ -1445,12 +1442,12 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do imgElt = mknode "w:r" [] $ mknode "w:drawing" [] $ mknode "wp:inline" [] - [ mknode "wp:extent" [("cx",show xemu),("cy",show yemu)] () + [ mknode "wp:extent" [("cx",tshow xemu),("cy",tshow yemu)] () , mknode "wp:effectExtent" [("b","0"),("l","0"),("r","0"),("t","0")] () , mknode "wp:docPr" - [ ("descr", T.unpack $ stringify alt) - , ("title", T.unpack title) + [ ("descr", stringify alt) + , ("title", title) , ("id","1") , ("name","Picture") ] () @@ -1463,7 +1460,7 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do Just imgData -> return [Elem $ generateImgElt imgData] Nothing -> ( do --try (img, mt) <- P.fetchItem src - ident <- ("rId"++) `fmap` getUniqueId + ident <- ("rId" <>) <$> getUniqueId let imgext = case mt >>= extensionFromMimeType of @@ -1477,10 +1474,10 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do Just Svg -> ".svg" Just Emf -> ".emf" Nothing -> "" - imgpath = "media/" <> ident <> T.unpack imgext - mbMimeType = mt <|> getMimeType imgpath + imgpath = "media/" <> ident <> imgext + mbMimeType = mt <|> getMimeType (T.unpack imgpath) - imgData = (ident, imgpath, mbMimeType, img) + imgData = (T.unpack ident, T.unpack imgpath, mbMimeType, img) if T.null imgext then -- without an extension there is no rule for content type @@ -1538,20 +1535,20 @@ withDirection x = do , envTextProperties = EnvProps textStyle textProps' } -wrapBookmark :: (PandocMonad m) => T.Text -> [Content] -> WS m [Content] +wrapBookmark :: (PandocMonad m) => Text -> [Content] -> WS m [Content] wrapBookmark "" contents = return contents wrapBookmark ident contents = do id' <- getUniqueId let bookmarkStart = mknode "w:bookmarkStart" [("w:id", id') - ,("w:name", T.unpack $ toBookmarkName ident)] () + ,("w:name", toBookmarkName ident)] () bookmarkEnd = mknode "w:bookmarkEnd" [("w:id", id')] () return $ Elem bookmarkStart : contents ++ [Elem bookmarkEnd] -- Word imposes a 40 character limit on bookmark names and requires -- that they begin with a letter. So we just use a hash of the -- identifier when otherwise we'd have an illegal bookmark name. -toBookmarkName :: T.Text -> T.Text +toBookmarkName :: Text -> Text toBookmarkName s | Just (c, _) <- T.uncons s , isLetter c diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 171ffe582..3f10cb437 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -24,12 +24,13 @@ import Control.Monad.State.Strict (StateT, evalState, evalStateT, get, gets, lift, modify) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy.Char8 as B8 -import Data.Char (isAlphaNum, isAscii, isDigit, toLower) +import Data.Char (isAlphaNum, isAscii, isDigit) import Data.List (isInfixOf, isPrefixOf) import qualified Data.Map as M import Data.Maybe (fromMaybe, isNothing, mapMaybe, isJust) import qualified Data.Set as Set -import qualified Data.Text as TS +import qualified Data.Text as T +import Data.Text (Text) import qualified Data.Text.Lazy as TL import Network.HTTP (urlEncode) import System.FilePath (takeExtension, takeFileName, makeRelative) @@ -48,16 +49,13 @@ import Text.Pandoc.Options (EPUBVersion (..), HTMLMathMethod (..), ObfuscationMethod (NoObfuscation), WrapOption (..), WriterOptions (..)) import Text.Pandoc.Shared (makeSections, normalizeDate, renderTags', - safeRead, stringify, trim, uniqueIdent, tshow) + stringify, uniqueIdent, tshow) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.UUID (getRandomUUID) import Text.Pandoc.Walk (query, walk, walkM) import Text.Pandoc.Writers.HTML (writeHtmlStringForEPUB) import Text.Printf (printf) -import Text.XML.Light (Attr (..), Element (..), Node (..), QName (..), - add_attrs, lookupAttr, node, onlyElems, - ppElement, showElement, strContent, unode, unqual) -import Text.Pandoc.XMLParser (parseXMLContents) +import Text.Pandoc.XML.Light import Text.Pandoc.XML (escapeStringForXML) import Text.DocTemplates (FromContext(lookupContext), Context(..), ToContext(toVal), Val(..)) @@ -69,7 +67,7 @@ newtype Chapter = Chapter [Block] data EPUBState = EPUBState { stMediaPaths :: [(FilePath, (FilePath, Maybe Entry))] , stMediaNextId :: Int - , stEpubSubdir :: String + , stEpubSubdir :: FilePath } type E m = StateT EPUBState m @@ -78,62 +76,63 @@ data EPUBMetadata = EPUBMetadata{ epubIdentifier :: [Identifier] , epubTitle :: [Title] , epubDate :: [Date] - , epubLanguage :: String + , epubLanguage :: Text , epubCreator :: [Creator] , epubContributor :: [Creator] - , epubSubject :: [String] - , epubDescription :: Maybe String - , epubType :: Maybe String - , epubFormat :: Maybe String - , epubPublisher :: Maybe String - , epubSource :: Maybe String - , epubRelation :: Maybe String - , epubCoverage :: Maybe String - , epubRights :: Maybe String - , epubBelongsToCollection :: Maybe String - , epubGroupPosition :: Maybe String - , epubCoverImage :: Maybe String + , epubSubject :: [Text] + , epubDescription :: Maybe Text + , epubType :: Maybe Text + , epubFormat :: Maybe Text + , epubPublisher :: Maybe Text + , epubSource :: Maybe Text + , epubRelation :: Maybe Text + , epubCoverage :: Maybe Text + , epubRights :: Maybe Text + , epubBelongsToCollection :: Maybe Text + , epubGroupPosition :: Maybe Text + , epubCoverImage :: Maybe FilePath , epubStylesheets :: [FilePath] , epubPageDirection :: Maybe ProgressionDirection - , epubIbooksFields :: [(String, String)] - , epubCalibreFields :: [(String, String)] + , epubIbooksFields :: [(Text, Text)] + , epubCalibreFields :: [(Text, Text)] } deriving Show data Date = Date{ - dateText :: String - , dateEvent :: Maybe String + dateText :: Text + , dateEvent :: Maybe Text } deriving Show data Creator = Creator{ - creatorText :: String - , creatorRole :: Maybe String - , creatorFileAs :: Maybe String + creatorText :: Text + , creatorRole :: Maybe Text + , creatorFileAs :: Maybe Text } deriving Show data Identifier = Identifier{ - identifierText :: String - , identifierScheme :: Maybe String + identifierText :: Text + , identifierScheme :: Maybe Text } deriving Show data Title = Title{ - titleText :: String - , titleFileAs :: Maybe String - , titleType :: Maybe String + titleText :: Text + , titleFileAs :: Maybe Text + , titleType :: Maybe Text } deriving Show data ProgressionDirection = LTR | RTL deriving Show -dcName :: String -> QName +dcName :: Text -> QName dcName n = QName n Nothing (Just "dc") -dcNode :: Node t => String -> t -> Element +dcNode :: Node t => Text -> t -> Element dcNode = node . dcName -opfName :: String -> QName +opfName :: Text -> QName opfName n = QName n Nothing (Just "opf") -toId :: FilePath -> String -toId = map (\x -> if isAlphaNum x || x == '-' || x == '_' +toId :: FilePath -> Text +toId = T.pack . + map (\x -> if isAlphaNum x || x == '-' || x == '_' then x else '_') . takeFileName @@ -141,8 +140,8 @@ removeNote :: Inline -> Inline removeNote (Note _) = Str "" removeNote x = x -toVal' :: String -> Val TS.Text -toVal' = toVal . TS.pack +toVal' :: Text -> Val T.Text +toVal' = toVal mkEntry :: PandocMonad m => FilePath -> B.ByteString -> E m Entry mkEntry path content = do @@ -172,21 +171,21 @@ getEPUBMetadata opts meta = do if null (epubIdentifier m) then do randomId <- getRandomUUID - return $ m{ epubIdentifier = [Identifier (show randomId) Nothing] } + return $ m{ epubIdentifier = [Identifier (tshow randomId) Nothing] } else return m let addLanguage m = - if null (epubLanguage m) + if T.null (epubLanguage m) then case lookupContext "lang" (writerVariables opts) of - Just x -> return m{ epubLanguage = TS.unpack x } + Just x -> return m{ epubLanguage = x } Nothing -> do mLang <- lift $ P.lookupEnv "LANG" let localeLang = case mLang of Just lang -> - TS.map (\c -> if c == '_' then '-' else c) $ - TS.takeWhile (/='.') lang + T.map (\c -> if c == '_' then '-' else c) $ + T.takeWhile (/='.') lang Nothing -> "en-US" - return m{ epubLanguage = TS.unpack localeLang } + return m{ epubLanguage = localeLang } else return m let fixDate m = if null (epubDate m) @@ -201,7 +200,7 @@ getEPUBMetadata opts meta = do then return m else do let authors' = map stringify $ docAuthors meta - let toAuthor name = Creator{ creatorText = TS.unpack name + let toAuthor name = Creator{ creatorText = name , creatorRole = Just "aut" , creatorFileAs = Nothing } return $ m{ epubCreator = map toAuthor authors' ++ epubCreator m } @@ -249,31 +248,31 @@ addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md where getAttr n = lookupAttr (opfName n) attrs addMetadataFromXML e@(Element (QName "meta" _ _) attrs _ _) md = case getAttr "property" of - Just s | "ibooks:" `isPrefixOf` s -> - md{ epubIbooksFields = (drop 7 s, strContent e) : + Just s | "ibooks:" `T.isPrefixOf` s -> + md{ epubIbooksFields = (T.drop 7 s, strContent e) : epubIbooksFields md } _ -> case getAttr "name" of - Just s | "calibre:" `isPrefixOf` s -> + Just s | "calibre:" `T.isPrefixOf` s -> md{ epubCalibreFields = - (drop 8 s, fromMaybe "" $ getAttr "content") : + (T.drop 8 s, fromMaybe "" $ getAttr "content") : epubCalibreFields md } _ -> md where getAttr n = lookupAttr (unqual n) attrs addMetadataFromXML _ md = md -metaValueToString :: MetaValue -> String -metaValueToString (MetaString s) = TS.unpack s -metaValueToString (MetaInlines ils) = TS.unpack $ stringify ils -metaValueToString (MetaBlocks bs) = TS.unpack $ stringify bs +metaValueToString :: MetaValue -> Text +metaValueToString (MetaString s) = s +metaValueToString (MetaInlines ils) = stringify ils +metaValueToString (MetaBlocks bs) = stringify bs metaValueToString (MetaBool True) = "true" metaValueToString (MetaBool False) = "false" metaValueToString _ = "" metaValueToPaths :: MetaValue -> [FilePath] -metaValueToPaths (MetaList xs) = map metaValueToString xs -metaValueToPaths x = [metaValueToString x] +metaValueToPaths (MetaList xs) = map (T.unpack . metaValueToString) xs +metaValueToPaths x = [T.unpack $ metaValueToString x] -getList :: TS.Text -> Meta -> (MetaValue -> a) -> [a] +getList :: T.Text -> Meta -> (MetaValue -> a) -> [a] getList s meta handleMetaValue = case lookupMeta s meta of Just (MetaList xs) -> map handleMetaValue xs @@ -297,7 +296,7 @@ getTitle meta = getList "title" meta handleMetaValue , titleType = metaValueToString <$> M.lookup "type" m } handleMetaValue mv = Title (metaValueToString mv) Nothing Nothing -getCreator :: TS.Text -> Meta -> [Creator] +getCreator :: T.Text -> Meta -> [Creator] getCreator s meta = getList s meta handleMetaValue where handleMetaValue (MetaMap m) = Creator{ creatorText = maybe "" metaValueToString $ M.lookup "text" m @@ -305,7 +304,7 @@ getCreator s meta = getList s meta handleMetaValue , creatorRole = metaValueToString <$> M.lookup "role" m } handleMetaValue mv = Creator (metaValueToString mv) Nothing Nothing -getDate :: TS.Text -> Meta -> [Date] +getDate :: T.Text -> Meta -> [Date] getDate s meta = getList s meta handleMetaValue where handleMetaValue (MetaMap m) = Date{ dateText = fromMaybe "" $ @@ -314,7 +313,7 @@ getDate s meta = getList s meta handleMetaValue handleMetaValue mv = Date { dateText = fromMaybe "" $ normalizeDate' $ metaValueToString mv , dateEvent = Nothing } -simpleList :: TS.Text -> Meta -> [String] +simpleList :: T.Text -> Meta -> [Text] simpleList s meta = case lookupMeta s meta of Just (MetaList xs) -> map metaValueToString xs @@ -339,7 +338,7 @@ metadataFromMeta opts meta = EPUBMetadata{ , epubCoverage = coverage , epubRights = rights , epubBelongsToCollection = belongsToCollection - , epubGroupPosition = groupPosition + , epubGroupPosition = groupPosition , epubCoverImage = coverImage , epubStylesheets = stylesheets , epubPageDirection = pageDirection @@ -363,31 +362,30 @@ metadataFromMeta opts meta = EPUBMetadata{ coverage = metaValueToString <$> lookupMeta "coverage" meta rights = metaValueToString <$> lookupMeta "rights" meta belongsToCollection = metaValueToString <$> lookupMeta "belongs-to-collection" meta - groupPosition = metaValueToString <$> lookupMeta "group-position" meta - coverImage = - (TS.unpack <$> lookupContext "epub-cover-image" - (writerVariables opts)) + groupPosition = metaValueToString <$> lookupMeta "group-position" meta + coverImage = T.unpack <$> + lookupContext "epub-cover-image" (writerVariables opts) `mplus` (metaValueToString <$> lookupMeta "cover-image" meta) mCss = lookupMeta "css" meta <|> lookupMeta "stylesheet" meta stylesheets = maybe [] metaValueToPaths mCss ++ case lookupContext "css" (writerVariables opts) of - Just xs -> map TS.unpack xs + Just xs -> map T.unpack xs Nothing -> case lookupContext "css" (writerVariables opts) of - Just x -> [TS.unpack x] + Just x -> [T.unpack x] Nothing -> [] - pageDirection = case map toLower . metaValueToString <$> + pageDirection = case T.toLower . metaValueToString <$> lookupMeta "page-progression-direction" meta of Just "ltr" -> Just LTR Just "rtl" -> Just RTL _ -> Nothing ibooksFields = case lookupMeta "ibooks" meta of Just (MetaMap mp) - -> M.toList $ M.mapKeys TS.unpack $ M.map metaValueToString mp + -> M.toList $ M.map metaValueToString mp _ -> [] calibreFields = case lookupMeta "calibre" meta of Just (MetaMap mp) - -> M.toList $ M.mapKeys TS.unpack $ M.map metaValueToString mp + -> M.toList $ M.map metaValueToString mp _ -> [] -- | Produce an EPUB2 file from a Pandoc document. @@ -413,9 +411,11 @@ writeEPUB :: PandocMonad m writeEPUB epubVersion opts doc = do let epubSubdir = writerEpubSubdirectory opts -- sanity check on epubSubdir - unless (TS.all (\c -> isAscii c && isAlphaNum c) epubSubdir) $ + unless (T.all (\c -> isAscii c && isAlphaNum c) epubSubdir) $ throwError $ PandocEpubSubdirectoryError epubSubdir - let initState = EPUBState { stMediaPaths = [], stMediaNextId = 0, stEpubSubdir = TS.unpack epubSubdir } + let initState = EPUBState { stMediaPaths = [] + , stMediaNextId = 0 + , stEpubSubdir = T.unpack epubSubdir } evalStateT (pandocToEPUB epubVersion opts doc) initState pandocToEPUB :: PandocMonad m @@ -439,7 +439,7 @@ pandocToEPUB version opts doc = do [] -> case epubTitle metadata of [] -> "UNTITLED" (x:_) -> titleText x - x -> TS.unpack $ stringify x + x -> stringify x -- stylesheet stylesheets <- case epubStylesheets metadata of @@ -461,7 +461,8 @@ pandocToEPUB version opts doc = do (ListVal $ map (\e -> toVal' $ (if useprefix then "../" else "") <> - makeRelative epubSubdir (eRelativePath e)) + T.pack + (makeRelative epubSubdir (eRelativePath e))) stylesheetEntries) mempty @@ -490,18 +491,19 @@ pandocToEPUB version opts doc = do case imageSize opts' (B.toStrict imgContent) of Right sz -> return $ sizeInPixels sz Left err' -> (0, 0) <$ report - (CouldNotDetermineImageSize (TS.pack img) err') + (CouldNotDetermineImageSize (T.pack img) err') cpContent <- lift $ writeHtml opts'{ writerVariables = Context (M.fromList [ ("coverpage", toVal' "true"), ("pagetitle", toVal $ - escapeStringForXML $ TS.pack plainTitle), - ("cover-image", toVal' coverImageName), + escapeStringForXML plainTitle), + ("cover-image", + toVal' $ T.pack coverImageName), ("cover-image-width", toVal' $ - show coverImageWidth), + tshow coverImageWidth), ("cover-image-height", toVal' $ - show coverImageHeight)]) <> + tshow coverImageHeight)]) <> cssvars True <> vars } (Pandoc meta []) coverEntry <- mkEntry "text/cover.xhtml" cpContent @@ -517,7 +519,7 @@ pandocToEPUB version opts doc = do ("titlepage", toVal' "true"), ("body-type", toVal' "frontmatter"), ("pagetitle", toVal $ - escapeStringForXML $ TS.pack plainTitle)]) + escapeStringForXML plainTitle)]) <> cssvars True <> vars } (Pandoc meta []) tpEntry <- mkEntry "text/title_page.xhtml" tpContent @@ -526,7 +528,7 @@ pandocToEPUB version opts doc = do let matchingGlob f = do xs <- lift $ P.glob f when (null xs) $ - report $ CouldNotFetchResource (TS.pack f) "glob did not match any font files" + report $ CouldNotFetchResource (T.pack f) "glob did not match any font files" return xs let mkFontEntry f = mkEntry ("fonts/" ++ takeFileName f) =<< lift (P.readFileLazy f) @@ -573,13 +575,13 @@ pandocToEPUB version opts doc = do let chapters' = secsToChapters secs - let extractLinkURL' :: Int -> Inline -> [(TS.Text, TS.Text)] + let extractLinkURL' :: Int -> Inline -> [(T.Text, T.Text)] extractLinkURL' num (Span (ident, _, _) _) - | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] + | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] extractLinkURL' num (Link (ident, _, _) _ _) - | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] + | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] extractLinkURL' num (Image (ident, _, _) _ _) - | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] + | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] extractLinkURL' num (RawInline fmt raw) | isHtmlFormat fmt = foldr (\tag -> @@ -587,18 +589,18 @@ pandocToEPUB version opts doc = do TagOpen{} -> case fromAttrib "id" tag of "" -> id - x -> ((x, TS.pack (showChapter num) <> "#" <> x):) + x -> ((x, showChapter num <> "#" <> x):) _ -> id) [] (parseTags raw) extractLinkURL' _ _ = [] - let extractLinkURL :: Int -> Block -> [(TS.Text, TS.Text)] + let extractLinkURL :: Int -> Block -> [(T.Text, T.Text)] extractLinkURL num (Div (ident, _, _) _) - | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] + | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] extractLinkURL num (Header _ (ident, _, _) _) - | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] + | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] extractLinkURL num (Table (ident,_,_) _ _ _ _ _) - | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] + | not (T.null ident) = [(ident, showChapter num <> "#" <> ident)] extractLinkURL num (RawBlock fmt raw) | isHtmlFormat fmt = foldr (\tag -> @@ -606,7 +608,7 @@ pandocToEPUB version opts doc = do TagOpen{} -> case fromAttrib "id" tag of "" -> id - x -> ((x, TS.pack (showChapter num) <> "#" <> x):) + x -> ((x, showChapter num <> "#" <> x):) _ -> id) [] (parseTags raw) extractLinkURL num b = query (extractLinkURL' num) b @@ -617,7 +619,7 @@ pandocToEPUB version opts doc = do let fixInternalReferences :: Inline -> Inline fixInternalReferences (Link attr lab (src, tit)) - | Just ('#', xs) <- TS.uncons src = case lookup xs reftable of + | Just ('#', xs) <- T.uncons src = case lookup xs reftable of Just ys -> Link attr lab (ys, tit) Nothing -> Link attr lab (src, tit) fixInternalReferences x = x @@ -630,7 +632,7 @@ pandocToEPUB version opts doc = do chapters' let chapToEntry num (Chapter bs) = - mkEntry ("text/" ++ showChapter num) =<< + mkEntry ("text/" ++ T.unpack (showChapter num)) =<< writeHtml opts'{ writerVariables = Context (M.fromList [("body-type", toVal' bodyType), @@ -677,12 +679,12 @@ pandocToEPUB version opts doc = do let chapterNode ent = unode "item" ! ([("id", toId $ makeRelative epubSubdir $ eRelativePath ent), - ("href", makeRelative epubSubdir + ("href", T.pack $ makeRelative epubSubdir $ eRelativePath ent), ("media-type", "application/xhtml+xml")] ++ case props ent of [] -> [] - xs -> [("properties", unwords xs)]) + xs -> [("properties", T.unwords xs)]) $ () let chapterRefNode ent = unode "itemref" ! @@ -691,17 +693,17 @@ pandocToEPUB version opts doc = do let pictureNode ent = unode "item" ! [("id", toId $ makeRelative epubSubdir $ eRelativePath ent), - ("href", makeRelative epubSubdir + ("href", T.pack $ makeRelative epubSubdir $ eRelativePath ent), ("media-type", - maybe "application/octet-stream" TS.unpack + fromMaybe "application/octet-stream" $ mediaTypeOf $ eRelativePath ent)] $ () let fontNode ent = unode "item" ! [("id", toId $ makeRelative epubSubdir $ eRelativePath ent), - ("href", makeRelative epubSubdir + ("href", T.pack $ makeRelative epubSubdir $ eRelativePath ent), - ("media-type", maybe "" TS.unpack $ + ("media-type", fromMaybe "" $ getMimeType $ eRelativePath ent)] $ () let tocTitle = maybe plainTitle @@ -710,7 +712,7 @@ pandocToEPUB version opts doc = do (x:_) -> return $ identifierText x -- use first identifier as UUID [] -> throwError $ PandocShouldNeverHappenError "epubIdentifier is null" -- shouldn't happen currentTime <- lift P.getTimestamp - let contentsData = UTF8.fromStringLazy $ ppTopElement $ + let contentsData = UTF8.fromTextLazy $ TL.fromStrict $ ppTopElement $ unode "package" ! ([("version", case version of EPUB2 -> "2.0" @@ -728,7 +730,8 @@ pandocToEPUB version opts doc = do ,("media-type","application/xhtml+xml")] ++ [("properties","nav") | epub3 ]) $ () ] ++ - [ unode "item" ! [("id","stylesheet" ++ show n), ("href",fp) + [ unode "item" ! [("id","stylesheet" <> tshow n) + , ("href", T.pack fp) ,("media-type","text/css")] $ () | (n :: Int, fp) <- zip [1..] (map (makeRelative epubSubdir . eRelativePath) @@ -773,7 +776,7 @@ pandocToEPUB version opts doc = do let tocLevel = writerTOCDepth opts let navPointNode :: PandocMonad m - => (Int -> [Inline] -> TS.Text -> [Element] -> Element) + => (Int -> [Inline] -> T.Text -> [Element] -> Element) -> Block -> StateT Int m [Element] navPointNode formatter (Div (ident,_,_) (Header lvl (_,_,kvs) ils : children)) = @@ -783,7 +786,7 @@ pandocToEPUB version opts doc = do n <- get modify (+1) let num = fromMaybe "" $ lookup "number" kvs - let tit = if writerNumberSections opts && not (TS.null num) + let tit = if writerNumberSections opts && not (T.null num) then Span ("", ["section-header-number"], []) [Str num] : Space : ils else ils @@ -797,21 +800,21 @@ pandocToEPUB version opts doc = do concat <$> mapM (navPointNode formatter) bs navPointNode _ _ = return [] - let navMapFormatter :: Int -> [Inline] -> TS.Text -> [Element] -> Element + let navMapFormatter :: Int -> [Inline] -> T.Text -> [Element] -> Element navMapFormatter n tit src subs = unode "navPoint" ! - [("id", "navPoint-" ++ show n)] $ - [ unode "navLabel" $ unode "text" $ TS.unpack $ stringify tit - , unode "content" ! [("src", "text/" <> TS.unpack src)] $ () + [("id", "navPoint-" <> tshow n)] $ + [ unode "navLabel" $ unode "text" $ stringify tit + , unode "content" ! [("src", "text/" <> src)] $ () ] ++ subs let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $ - [ unode "navLabel" $ unode "text" (TS.unpack $ stringify $ docTitle' meta) + [ unode "navLabel" $ unode "text" (stringify $ docTitle' meta) , unode "content" ! [("src", "text/title_page.xhtml")] $ () ] navMap <- lift $ evalStateT (concat <$> mapM (navPointNode navMapFormatter) secs) 1 - let tocData = UTF8.fromStringLazy $ ppTopElement $ + let tocData = B.fromStrict $ UTF8.fromText $ ppTopElement $ unode "ncx" ! [("version","2005-1") ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $ [ unode "head" $ @@ -833,11 +836,11 @@ pandocToEPUB version opts doc = do ] tocEntry <- mkEntry "toc.ncx" tocData - let navXhtmlFormatter :: Int -> [Inline] -> TS.Text -> [Element] -> Element + let navXhtmlFormatter :: Int -> [Inline] -> T.Text -> [Element] -> Element navXhtmlFormatter n tit src subs = unode "li" ! - [("id", "toc-li-" ++ show n)] $ + [("id", "toc-li-" <> tshow n)] $ (unode "a" ! - [("href", "text/" <> TS.unpack src)] + [("href", "text/" <> src)] $ titElements) : case subs of [] -> [] @@ -850,7 +853,7 @@ pandocToEPUB version opts doc = do , writerVariables = Context (M.fromList [("pagetitle", toVal $ - escapeStringForXML $ TS.pack plainTitle)]) + escapeStringForXML plainTitle)]) <> writerVariables opts} (Pandoc nullMeta [Plain $ walk clean tit])) of @@ -865,7 +868,7 @@ pandocToEPUB version opts doc = do tocBlocks <- lift $ evalStateT (concat <$> mapM (navPointNode navXhtmlFormatter) secs) 1 let navBlocks = [RawBlock (Format "html") - $ TS.pack $ showElement $ -- prettyprinting introduces bad spaces + $ showElement $ -- prettyprinting introduces bad spaces unode navtag ! ([("epub:type","toc") | epub3] ++ [("id","toc")]) $ [ unode "h1" ! [("id","toc-title")] $ tocTitle @@ -875,21 +878,21 @@ pandocToEPUB version opts doc = do [ unode "a" ! [("href", "text/title_page.xhtml") ,("epub:type", "titlepage")] $ - ("Title Page" :: String) ] : + ("Title Page" :: Text) ] : [ unode "li" [ unode "a" ! [("href", "text/cover.xhtml") ,("epub:type", "cover")] $ - ("Cover" :: String)] | + ("Cover" :: Text)] | isJust (epubCoverImage metadata) ] ++ [ unode "li" [ unode "a" ! [("href", "#toc") ,("epub:type", "toc")] $ - ("Table of Contents" :: String) + ("Table of Contents" :: Text) ] | writerTableOfContents opts ] else [] - let landmarks = [RawBlock (Format "html") $ TS.pack $ ppElement $ + let landmarks = [RawBlock (Format "html") $ ppElement $ unode "nav" ! [("epub:type","landmarks") ,("id","landmarks") ,("hidden","hidden")] $ @@ -910,22 +913,22 @@ pandocToEPUB version opts doc = do UTF8.fromStringLazy "application/epub+zip" -- container.xml - let containerData = UTF8.fromStringLazy $ ppTopElement $ + let containerData = B.fromStrict $ UTF8.fromText $ ppTopElement $ unode "container" ! [("version","1.0") ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $ unode "rootfiles" $ unode "rootfile" ! [("full-path", (if null epubSubdir then "" - else epubSubdir ++ "/") ++ "content.opf") + else T.pack epubSubdir <> "/") <> "content.opf") ,("media-type","application/oebps-package+xml")] $ () containerEntry <- mkEntry "META-INF/container.xml" containerData -- com.apple.ibooks.display-options.xml - let apple = UTF8.fromStringLazy $ ppTopElement $ + let apple = B.fromStrict $ UTF8.fromText $ ppTopElement $ unode "display_options" $ unode "platform" ! [("name","*")] $ - unode "option" ! [("name","specified-fonts")] $ ("true" :: String) + unode "option" ! [("name","specified-fonts")] $ ("true" :: Text) appleEntry <- mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple -- construct archive @@ -947,7 +950,8 @@ metadataElement version md currentTime = ++ publisherNodes ++ sourceNodes ++ relationNodes ++ coverageNodes ++ rightsNodes ++ coverImageNodes ++ modifiedNodes ++ belongsToCollectionNodes - withIds base f = concat . zipWith f (map (\x -> base ++ ('-' : show x)) + withIds base f = concat . zipWith f (map (\x -> base <> + T.cons '-' (tshow x)) ([1..] :: [Int])) identifierNodes = withIds "epub-id" toIdentifierNode $ epubIdentifier md @@ -961,9 +965,9 @@ metadataElement version md currentTime = (x:_) -> [dcNode "date" ! [("id","epub-date")] $ dateText x] ibooksNodes = map ibooksNode (epubIbooksFields md) - ibooksNode (k, v) = unode "meta" ! [("property", "ibooks:" ++ k)] $ v + ibooksNode (k, v) = unode "meta" ! [("property", "ibooks:" <> k)] $ v calibreNodes = map calibreNode (epubCalibreFields md) - calibreNode (k, v) = unode "meta" ! [("name", "calibre:" ++ k), + calibreNode (k, v) = unode "meta" ! [("name", "calibre:" <> k), ("content", v)] $ () languageNodes = [dcTag "language" $ epubLanguage md] creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $ @@ -989,12 +993,12 @@ metadataElement version md currentTime = maybe [] (\belongsToCollection -> (unode "meta" ! [("property", "belongs-to-collection"), ("id", "epub-id-1")] $ belongsToCollection ) : - [unode "meta" ! [("refines", "#epub-id-1"), ("property", "collection-type")] $ ("series" :: String) ]) + [unode "meta" ! [("refines", "#epub-id-1"), ("property", "collection-type")] $ ("series" :: Text) ]) (epubBelongsToCollection md)++ maybe [] (\groupPosition -> [unode "meta" ! [("refines", "#epub-id-1"), ("property", "group-position")] $ groupPosition ]) (epubGroupPosition md) - dcTag n s = unode ("dc:" ++ n) s + dcTag n s = unode ("dc:" <> n) s dcTag' n s = [dcTag n s] toIdentifierNode id' (Identifier txt scheme) | version == EPUB2 = [dcNode "identifier" ! @@ -1002,7 +1006,7 @@ metadataElement version md currentTime = txt] | otherwise = (dcNode "identifier" ! [("id",id')] $ txt) : maybe [] ((\x -> [unode "meta" ! - [ ("refines",'#':id') + [ ("refines","#" <> id') , ("property","identifier-type") , ("scheme","onix:codelist5") ] @@ -1018,10 +1022,10 @@ metadataElement version md currentTime = (creatorRole creator >>= toRelator)) $ creatorText creator] | otherwise = [dcNode s ! [("id",id')] $ creatorText creator] ++ maybe [] (\x -> [unode "meta" ! - [("refines",'#':id'),("property","file-as")] $ x]) + [("refines","#" <> id'),("property","file-as")] $ x]) (creatorFileAs creator) ++ maybe [] (\x -> [unode "meta" ! - [("refines",'#':id'),("property","role"), + [("refines","#" <> id'),("property","role"), ("scheme","marc:relators")] $ x]) (creatorRole creator >>= toRelator) toTitleNode id' title @@ -1033,16 +1037,16 @@ metadataElement version md currentTime = | otherwise = [dcNode "title" ! [("id",id')] $ titleText title] ++ maybe [] (\x -> [unode "meta" ! - [("refines",'#':id'),("property","file-as")] $ x]) + [("refines","#" <> id'),("property","file-as")] $ x]) (titleFileAs title) ++ maybe [] (\x -> [unode "meta" ! - [("refines",'#':id'),("property","title-type")] $ x]) + [("refines","#" <> id'),("property","title-type")] $ x]) (titleType title) toDateNode id' date = [dcNode "date" ! (("id",id') : maybe [] (\x -> [("opf:event",x)]) (dateEvent date)) $ dateText date] - schemeToOnix :: String -> String + schemeToOnix :: Text -> Text schemeToOnix "ISBN-10" = "02" schemeToOnix "GTIN-13" = "03" schemeToOnix "UPC" = "04" @@ -1060,48 +1064,48 @@ metadataElement version md currentTime = schemeToOnix "OLCC" = "28" schemeToOnix _ = "01" -showDateTimeISO8601 :: UTCTime -> String -showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" +showDateTimeISO8601 :: UTCTime -> Text +showDateTimeISO8601 = T.pack . formatTime defaultTimeLocale "%FT%TZ" transformTag :: PandocMonad m - => Tag TS.Text - -> E m (Tag TS.Text) + => Tag T.Text + -> E m (Tag T.Text) transformTag tag@(TagOpen name attr) | name `elem` ["video", "source", "img", "audio"] && isNothing (lookup "data-external" attr) = do let src = fromAttrib "src" tag let poster = fromAttrib "poster" tag - newsrc <- modifyMediaRef $ TS.unpack src - newposter <- modifyMediaRef $ TS.unpack poster + newsrc <- modifyMediaRef $ T.unpack src + newposter <- modifyMediaRef $ T.unpack poster let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++ - [("src", "../" <> newsrc) | not (TS.null newsrc)] ++ - [("poster", "../" <> newposter) | not (TS.null newposter)] + [("src", "../" <> newsrc) | not (T.null newsrc)] ++ + [("poster", "../" <> newposter) | not (T.null newposter)] return $ TagOpen name attr' transformTag tag = return tag modifyMediaRef :: PandocMonad m => FilePath - -> E m TS.Text + -> E m T.Text modifyMediaRef "" = return "" modifyMediaRef oldsrc = do media <- gets stMediaPaths case lookup oldsrc media of - Just (n,_) -> return $ TS.pack n + Just (n,_) -> return $ T.pack n Nothing -> catchError - (do (img, mbMime) <- P.fetchItem $ TS.pack oldsrc - let ext = maybe (takeExtension (takeWhile (/='?') oldsrc)) TS.unpack + (do (img, mbMime) <- P.fetchItem $ T.pack oldsrc + let ext = maybe (takeExtension (takeWhile (/='?') oldsrc)) T.unpack (("." <>) <$> (mbMime >>= extensionFromMimeType)) newName <- getMediaNextNewName ext let newPath = "media/" ++ newName entry <- mkEntry newPath (B.fromChunks . (:[]) $ img) modify $ \st -> st{ stMediaPaths = (oldsrc, (newPath, Just entry)):media} - return $ TS.pack newPath) + return $ T.pack newPath) (\e -> do - report $ CouldNotFetchResource (TS.pack oldsrc) (tshow e) - return $ TS.pack oldsrc) + report $ CouldNotFetchResource (T.pack oldsrc) (tshow e) + return $ T.pack oldsrc) -getMediaNextNewName :: PandocMonad m => String -> E m String +getMediaNextNewName :: PandocMonad m => FilePath -> E m FilePath getMediaNextNewName ext = do nextId <- gets stMediaNextId modify $ \st -> st { stMediaNextId = nextId + 1 } @@ -1128,11 +1132,11 @@ transformInline :: PandocMonad m -> Inline -> E m Inline transformInline _opts (Image attr lab (src,tit)) = do - newsrc <- modifyMediaRef $ TS.unpack src + newsrc <- modifyMediaRef $ T.unpack src return $ Image attr lab ("../" <> newsrc, tit) transformInline opts x@(Math t m) | WebTeX url <- writerHTMLMathMethod opts = do - newsrc <- modifyMediaRef (TS.unpack url <> urlEncode (TS.unpack m)) + newsrc <- modifyMediaRef (T.unpack url <> urlEncode (T.unpack m)) let mathclass = if t == DisplayMath then "display" else "inline" return $ Span ("",["math",mathclass],[]) [Image nullAttr [x] ("../" <> newsrc, "")] @@ -1143,40 +1147,26 @@ transformInline _opts (RawInline fmt raw) return $ RawInline fmt (renderTags' tags') transformInline _ x = return x -(!) :: (t -> Element) -> [(String, String)] -> t -> Element +(!) :: (t -> Element) -> [(Text, Text)] -> t -> Element (!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n) --- | Version of 'ppTopElement' that specifies UTF-8 encoding. -ppTopElement :: Element -> String -ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . ppElement - -- unEntity removes numeric entities introduced by ppElement - -- (kindlegen seems to choke on these). - where unEntity [] = "" - unEntity ('&':'#':xs) = - let (ds,ys) = break (==';') xs - rest = drop 1 ys - in case safeRead (TS.pack $ "'\\" <> ds <> "'") of - Just x -> x : unEntity rest - Nothing -> '&':'#':unEntity xs - unEntity (x:xs) = x : unEntity xs - mediaTypeOf :: FilePath -> Maybe MimeType mediaTypeOf x = let mediaPrefixes = ["image", "video", "audio"] in case getMimeType x of - Just y | any (`TS.isPrefixOf` y) mediaPrefixes -> Just y + Just y | any (`T.isPrefixOf` y) mediaPrefixes -> Just y _ -> Nothing -- Returns filename for chapter number. -showChapter :: Int -> String -showChapter = printf "ch%03d.xhtml" +showChapter :: Int -> Text +showChapter = T.pack . printf "ch%03d.xhtml" -- Add identifiers to any headers without them. addIdentifiers :: WriterOptions -> [Block] -> [Block] addIdentifiers opts bs = evalState (mapM go bs) Set.empty where go (Header n (ident,classes,kvs) ils) = do ids <- get - let ident' = if TS.null ident + let ident' = if T.null ident then uniqueIdent (writerExtensions opts) ils ids else ident modify $ Set.insert ident' @@ -1184,27 +1174,27 @@ addIdentifiers opts bs = evalState (mapM go bs) Set.empty go x = return x -- Variant of normalizeDate that allows partial dates: YYYY, YYYY-MM -normalizeDate' :: String -> Maybe String -normalizeDate' = fmap TS.unpack . go . trim . TS.pack +normalizeDate' :: Text -> Maybe Text +normalizeDate' = go . T.strip where go xs - | TS.length xs == 4 -- YYY - , TS.all isDigit xs = Just xs - | (y, s) <- TS.splitAt 4 xs -- YYY-MM - , Just ('-', m) <- TS.uncons s - , TS.length m == 2 - , TS.all isDigit y && TS.all isDigit m = Just xs + | T.length xs == 4 -- YYY + , T.all isDigit xs = Just xs + | (y, s) <- T.splitAt 4 xs -- YYY-MM + , Just ('-', m) <- T.uncons s + , T.length m == 2 + , T.all isDigit y && T.all isDigit m = Just xs | otherwise = normalizeDate xs -toRelator :: String -> Maybe String +toRelator :: Text -> Maybe Text toRelator x | x `elem` relators = Just x - | otherwise = lookup (map toLower x) relatorMap + | otherwise = lookup (T.toLower x) relatorMap -relators :: [String] +relators :: [Text] relators = map snd relatorMap -relatorMap :: [(String, String)] +relatorMap :: [(Text, Text)] relatorMap = [("abridger", "abr") ,("actor", "act") diff --git a/src/Text/Pandoc/Writers/FB2.hs b/src/Text/Pandoc/Writers/FB2.hs index 9334d6e9a..3b5d04427 100644 --- a/src/Text/Pandoc/Writers/FB2.hs +++ b/src/Text/Pandoc/Writers/FB2.hs @@ -25,15 +25,12 @@ import Data.ByteString.Base64 (encode) import Data.Char (isAscii, isControl, isSpace) import Data.Either (lefts, rights) import Data.List (intercalate) -import Data.Text (Text, pack) +import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Encoding as TE import Network.HTTP (urlEncode) -import Text.XML.Light -import qualified Text.XML.Light as X -import qualified Text.XML.Light.Cursor as XC -import Text.Pandoc.XMLParser (parseXMLContents) +import Text.Pandoc.XML.Light as X import Text.Pandoc.Class.PandocMonad (PandocMonad, report) import qualified Text.Pandoc.Class.PandocMonad as P @@ -44,6 +41,7 @@ import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..), def) import Text.Pandoc.Shared (capitalize, isURI, orderedListMarkers, makeSections, tshow, stringify) import Text.Pandoc.Writers.Shared (lookupMetaString, toLegacyTable) +import Data.Generics (everywhere, mkT) -- | Data to be written at the end of the document: -- (foot)notes, URLs, references, images. @@ -88,7 +86,7 @@ pandocToFB2 opts (Pandoc meta blocks) = do (imgs,missing) <- get >>= (lift . fetchImages . imagesToFetch) let body' = replaceImagesWithAlt missing body let fb2_xml = el "FictionBook" (fb2_attrs, [desc, body'] ++ notes ++ imgs) - return $ pack $ xml_head ++ showContent fb2_xml ++ "\n" + return $ xml_head <> showContent fb2_xml <> "\n" where xml_head = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" fb2_attrs = @@ -100,8 +98,8 @@ pandocToFB2 opts (Pandoc meta blocks) = do description :: PandocMonad m => Meta -> FBM m Content description meta' = do let genre = case lookupMetaString "genre" meta' of - "" -> el "genre" ("unrecognised" :: String) - s -> el "genre" (T.unpack s) + "" -> el "genre" ("unrecognised" :: Text) + s -> el "genre" s bt <- booktitle meta' let as = authors meta' dd <- docdate meta' @@ -112,7 +110,7 @@ description meta' = do Just (MetaInlines [Str s]) -> [el "lang" $ iso639 s] Just (MetaString s) -> [el "lang" $ iso639 s] _ -> [] - where iso639 = T.unpack . T.takeWhile (/= '-') -- Convert BCP 47 to ISO 639 + where iso639 = T.takeWhile (/= '-') -- Convert BCP 47 to ISO 639 let coverimage url = do let img = Image nullAttr mempty (url, "") im <- insertImage InlineImage img @@ -124,7 +122,7 @@ description meta' = do return $ el "description" [ el "title-info" (genre : (as ++ bt ++ annotation ++ dd ++ coverpage ++ lang)) - , el "document-info" [el "program-used" ("pandoc" :: String)] + , el "document-info" [el "program-used" ("pandoc" :: Text)] ] booktitle :: PandocMonad m => Meta -> FBM m [Content] @@ -137,15 +135,15 @@ authors meta' = cMap author (docAuthors meta') author :: [Inline] -> [Content] author ss = - let ws = words . cMap plain $ ss - email = el "email" <$> take 1 (filter ('@' `elem`) ws) - ws' = filter ('@' `notElem`) ws + let ws = T.words $ mconcat $ map plain ss + email = el "email" <$> take 1 (filter (T.any (=='@')) ws) + ws' = filter (not . T.any (== '@')) ws names = case ws' of [nickname] -> [ el "nickname" nickname ] [fname, lname] -> [ el "first-name" fname , el "last-name" lname ] (fname:rest) -> [ el "first-name" fname - , el "middle-name" (concat . init $ rest) + , el "middle-name" (T.concat . init $ rest) , el "last-name" (last rest) ] [] -> [] in list $ el "author" (names ++ email) @@ -206,7 +204,7 @@ renderFootnotes = do el "body" ([uattr "name" "notes"], map renderFN (reverse fns)) where renderFN (n, idstr, cs) = - let fn_texts = el "title" (el "p" (show n)) : cs + let fn_texts = el "title" (el "p" (tshow n)) : cs in el "section" ([uattr "id" idstr], fn_texts) -- | Fetch images and encode them for the FictionBook XML. @@ -282,7 +280,7 @@ isMimeType s = where types = ["text","image","audio","video","application","message","multipart"] valid c = isAscii c && not (isControl c) && not (isSpace c) && - c `notElem` ("()<>@,;:\\\"/[]?=" :: String) + c `notElem` ("()<>@,;:\\\"/[]?=" :: [Char]) footnoteID :: Int -> Text footnoteID i = "n" <> tshow i @@ -306,7 +304,7 @@ blockToXml (Para [Image atr alt (src,tgt)]) = insertImage NormalImage (Image atr alt (src,tit)) blockToXml (Para ss) = list . el "p" <$> cMapM toXml ss blockToXml (CodeBlock _ s) = return . spaceBeforeAfter . - map (el "p" . el "code" . T.unpack) . T.lines $ s + map (el "p" . el "code") . T.lines $ s blockToXml (RawBlock f str) = if f == Format "fb2" then @@ -346,11 +344,11 @@ blockToXml (Table _ blkCapt specs thead tbody tfoot) = do c <- el "emphasis" <$> cMapM toXml caption return [el "table" (hd <> bd), el "p" c] where - mkrow :: PandocMonad m => String -> [[Block]] -> [Alignment] -> FBM m Content + mkrow :: PandocMonad m => Text -> [[Block]] -> [Alignment] -> FBM m Content mkrow tag cells aligns' = el "tr" <$> mapM (mkcell tag) (zip cells aligns') -- - mkcell :: PandocMonad m => String -> ([Block], Alignment) -> FBM m Content + mkcell :: PandocMonad m => Text -> ([Block], Alignment) -> FBM m Content mkcell tag (cell, align) = do cblocks <- cMapM blockToXml cell return $ el tag ([align_attr align], cblocks) @@ -424,7 +422,7 @@ toXml (Quoted DoubleQuote ss) = do inner <- cMapM toXml ss return $ [txt "“"] ++ inner ++ [txt "”"] toXml (Cite _ ss) = cMapM toXml ss -- FIXME: support citation styles -toXml (Code _ s) = return [el "code" $ T.unpack s] +toXml (Code _ s) = return [el "code" s] toXml Space = return [txt " "] toXml SoftBreak = return [txt "\n"] toXml LineBreak = return [txt "\n"] @@ -456,7 +454,7 @@ insertMath immode formula = do let imgurl = url <> T.pack (urlEncode $ T.unpack formula) let img = Image nullAttr alt (imgurl, "") insertImage immode img - _ -> return [el "code" $ T.unpack formula] + _ -> return [el "code" formula] insertImage :: PandocMonad m => ImageMode -> Inline -> FBM m [Content] insertImage immode (Image _ alt (url,ttl)) = do @@ -471,31 +469,16 @@ insertImage immode (Image _ alt (url,ttl)) = do el "image" $ [ attr ("l","href") ("#" <> fname) , attr ("l","type") (tshow immode) - , uattr "alt" (T.pack $ cMap plain alt) ] + , uattr "alt" (mconcat $ map plain alt) ] ++ ttlattr insertImage _ _ = error "unexpected inline instead of image" replaceImagesWithAlt :: [Text] -> Content -> Content -replaceImagesWithAlt missingHrefs body = - let cur = XC.fromContent body - cur' = replaceAll cur - in XC.toTree . XC.root $ cur' +replaceImagesWithAlt missingHrefs = everywhere (mkT go) where - -- - replaceAll :: XC.Cursor -> XC.Cursor - replaceAll c = - let n = XC.current c - c' = if isImage n && isMissing n - then XC.modifyContent replaceNode c - else c - in case XC.nextDF c' of - (Just cnext) -> replaceAll cnext - Nothing -> c' -- end of document - -- - isImage :: Content -> Bool - isImage (Elem e) = elName e == uname "image" - isImage _ = False - -- + go c = if isMissing c + then replaceNode c + else c isMissing (Elem img@Element{}) = let imgAttrs = elAttribs img badAttrs = map (attr ("l","href")) missingHrefs @@ -505,18 +488,18 @@ replaceImagesWithAlt missingHrefs body = replaceNode :: Content -> Content replaceNode n@(Elem img@Element{}) = let attrs = elAttribs img - alt = getAttrVal attrs (uname "alt") + alt = getAttrVal attrs (unqual "alt") imtype = getAttrVal attrs (qname "l" "type") in case (alt, imtype) of (Just alt', Just imtype') -> - if imtype' == show NormalImage + if imtype' == tshow NormalImage then el "p" alt' - else txt $ T.pack alt' - (Just alt', Nothing) -> txt $ T.pack alt' -- no type attribute + else txt alt' + (Just alt', Nothing) -> txt alt' -- no type attribute _ -> n -- don't replace if alt text is not found replaceNode n = n -- - getAttrVal :: [X.Attr] -> QName -> Maybe String + getAttrVal :: [X.Attr] -> QName -> Maybe Text getAttrVal attrs name = case filter ((name ==) . attrKey) attrs of (a:_) -> Just (attrVal a) @@ -524,7 +507,7 @@ replaceImagesWithAlt missingHrefs body = -- | Wrap all inlines with an XML tag (given its unqualified name). -wrap :: PandocMonad m => String -> [Inline] -> FBM m Content +wrap :: PandocMonad m => Text -> [Inline] -> FBM m Content wrap tagname inlines = el tagname `liftM` cMapM toXml inlines -- " Create a singleton list. @@ -532,31 +515,31 @@ list :: a -> [a] list = (:[]) -- | Convert an 'Inline' to plaintext. -plain :: Inline -> String -plain (Str s) = T.unpack s -plain (Emph ss) = cMap plain ss -plain (Underline ss) = cMap plain ss -plain (Span _ ss) = cMap plain ss -plain (Strong ss) = cMap plain ss -plain (Strikeout ss) = cMap plain ss -plain (Superscript ss) = cMap plain ss -plain (Subscript ss) = cMap plain ss -plain (SmallCaps ss) = cMap plain ss -plain (Quoted _ ss) = cMap plain ss -plain (Cite _ ss) = cMap plain ss -- FIXME -plain (Code _ s) = T.unpack s +plain :: Inline -> Text +plain (Str s) = s +plain (Emph ss) = mconcat $ map plain ss +plain (Underline ss) = mconcat $ map plain ss +plain (Span _ ss) = mconcat $ map plain ss +plain (Strong ss) = mconcat $ map plain ss +plain (Strikeout ss) = mconcat $ map plain ss +plain (Superscript ss) = mconcat $ map plain ss +plain (Subscript ss) = mconcat $ map plain ss +plain (SmallCaps ss) = mconcat $ map plain ss +plain (Quoted _ ss) = mconcat $ map plain ss +plain (Cite _ ss) = mconcat $ map plain ss -- FIXME +plain (Code _ s) = s plain Space = " " plain SoftBreak = " " plain LineBreak = "\n" -plain (Math _ s) = T.unpack s +plain (Math _ s) = s plain (RawInline _ _) = "" -plain (Link _ text (url,_)) = concat (map plain text ++ [" <", T.unpack url, ">"]) -plain (Image _ alt _) = cMap plain alt +plain (Link _ text (url,_)) = mconcat (map plain text ++ [" <", url, ">"]) +plain (Image _ alt _) = mconcat $ map plain alt plain (Note _) = "" -- FIXME -- | Create an XML element. el :: (Node t) - => String -- ^ unqualified element name + => Text -- ^ unqualified element name -> t -- ^ node contents -> Content -- ^ XML content el name cs = Elem $ unode name cs @@ -569,22 +552,18 @@ spaceBeforeAfter cs = -- | Create a plain-text XML content. txt :: Text -> Content -txt s = Text $ CData CDataText (T.unpack s) Nothing +txt s = Text $ CData CDataText s Nothing -- | Create an XML attribute with an unqualified name. -uattr :: String -> Text -> Text.XML.Light.Attr -uattr name = Attr (uname name) . T.unpack +uattr :: Text -> Text -> X.Attr +uattr name = Attr (unqual name) -- | Create an XML attribute with a qualified name from given namespace. -attr :: (String, String) -> Text -> Text.XML.Light.Attr -attr (ns, name) = Attr (qname ns name) . T.unpack - --- | Unqualified name -uname :: String -> QName -uname name = QName name Nothing Nothing +attr :: (Text, Text) -> Text -> X.Attr +attr (ns, name) = Attr (qname ns name) -- | Qualified name -qname :: String -> String -> QName +qname :: Text -> Text -> QName qname ns name = QName name Nothing (Just ns) -- | Abbreviation for 'concatMap'. diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 06369b4db..101b236aa 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -40,9 +40,9 @@ import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toTextLazy) import Text.Pandoc.Walk import Text.Pandoc.Writers.OpenDocument (writeOpenDocument) import Text.Pandoc.XML -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light import Text.TeXMath -import Text.XML.Light +import qualified Text.XML.Light as XL newtype ODTState = ODTState { stEntries :: [Entry] } @@ -181,18 +181,20 @@ updateStyleWithLang (Just lang) arch = do PandocXMLError "styles.xml" msg Right d -> return $ toEntry "styles.xml" epochtime - ( fromStringLazy + ( fromTextLazy + . TL.fromStrict . ppTopElement . addLang lang $ d ) else return e) (zEntries arch) return arch{ zEntries = entries } +-- TODO FIXME avoid this generic traversal! addLang :: Lang -> Element -> Element addLang lang = everywhere' (mkT updateLangAttr) where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) _) - = Attr n (T.unpack $ langLanguage lang) + = Attr n (langLanguage lang) updateLangAttr (Attr n@(QName "country" _ (Just "fo")) _) - = Attr n (T.unpack $ langRegion lang) + = Attr n (langRegion lang) updateLangAttr x = x -- | transform both Image and Math elements @@ -238,8 +240,8 @@ transformPicMath _ (Math t math) = do case writeMathML dt <$> readTeX math of Left _ -> return $ Math t math Right r -> do - let conf = useShortEmptyTags (const False) defaultConfigPP - let mathml = ppcTopElement conf r + let conf = XL.useShortEmptyTags (const False) XL.defaultConfigPP + let mathml = XL.ppcTopElement conf r epochtime <- floor `fmap` lift P.getPOSIXTime let dirname = "Formula-" ++ show (length entries) ++ "/" let fname = dirname ++ "content.xml" diff --git a/src/Text/Pandoc/Writers/OOXML.hs b/src/Text/Pandoc/Writers/OOXML.hs index 8f60e70d5..0533d6c12 100644 --- a/src/Text/Pandoc/Writers/OOXML.hs +++ b/src/Text/Pandoc/Writers/OOXML.hs @@ -29,33 +29,32 @@ import Control.Monad.Except (throwError) import Text.Pandoc.Error import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL -import qualified Data.ByteString.Lazy.Char8 as BL8 import Data.Maybe (mapMaybe) import qualified Data.Text as T +import Data.Text (Text) import Text.Pandoc.Class.PandocMonad (PandocMonad) import qualified Text.Pandoc.UTF8 as UTF8 -import Text.XML.Light as XML -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light -mknode :: Node t => String -> [(String,String)] -> t -> Element +mknode :: Node t => Text -> [(Text,Text)] -> t -> Element mknode s attrs = add_attrs (map (\(k,v) -> Attr (nodename k) v) attrs) . node (nodename s) -mktnode :: String -> [(String,String)] -> T.Text -> Element -mktnode s attrs = mknode s attrs . T.unpack +mktnode :: Text -> [(Text,Text)] -> T.Text -> Element +mktnode s attrs = mknode s attrs -nodename :: String -> QName +nodename :: Text -> QName nodename s = QName{ qName = name, qURI = Nothing, qPrefix = prefix } - where (name, prefix) = case break (==':') s of - (xs,[]) -> (xs, Nothing) - (ys, _:zs) -> (zs, Just ys) + where (name, prefix) = case T.break (==':') s of + (xs,ys) -> case T.uncons ys of + Nothing -> (xs, Nothing) + Just (_,zs) -> (zs, Just xs) toLazy :: B.ByteString -> BL.ByteString toLazy = BL.fromChunks . (:[]) renderXml :: Element -> BL.ByteString -renderXml elt = BL8.pack "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" <> - UTF8.fromStringLazy (showElement elt) +renderXml elt = BL.fromStrict (UTF8.fromText (showTopElement elt)) parseXml :: PandocMonad m => Archive -> Archive -> String -> m Element parseXml refArchive distArchive relpath = @@ -70,25 +69,25 @@ parseXml refArchive distArchive relpath = -- Copied from Util -attrToNSPair :: XML.Attr -> Maybe (String, String) -attrToNSPair (XML.Attr (QName s _ (Just "xmlns")) val) = Just (s, val) +attrToNSPair :: Attr -> Maybe (Text, Text) +attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) attrToNSPair _ = Nothing elemToNameSpaces :: Element -> NameSpaces elemToNameSpaces = mapMaybe attrToNSPair . elAttribs -elemName :: NameSpaces -> String -> String -> QName +elemName :: NameSpaces -> Text -> Text -> QName elemName ns prefix name = - QName name (lookup prefix ns) (if null prefix then Nothing else Just prefix) + QName name (lookup prefix ns) (if T.null prefix then Nothing else Just prefix) -isElem :: NameSpaces -> String -> String -> Element -> Bool +isElem :: NameSpaces -> Text -> Text -> Element -> Bool isElem ns prefix name element = let ns' = ns ++ elemToNameSpaces element in qName (elName element) == name && qURI (elName element) == lookup prefix ns' -type NameSpaces = [(String, String)] +type NameSpaces = [(Text, Text)] -- | Scales the image to fit the page -- sizes are passed in emu diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs index 0a7060895..5caeb0753 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs @@ -20,16 +20,16 @@ import Control.Monad.Except (throwError, catchError) import Control.Monad.Reader import Control.Monad.State import Codec.Archive.Zip -import Data.Char (toUpper) import Data.List (intercalate, stripPrefix, nub, union, isPrefixOf, intersperse) import Data.Default +import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.Read import Data.Time (formatTime, defaultTimeLocale) import Data.Time.Clock (UTCTime) import Data.Time.Clock.POSIX (utcTimeToPOSIXSeconds, posixSecondsToUTCTime) import System.FilePath.Posix (splitDirectories, splitExtension, takeExtension) -import Text.XML.Light -import Text.Pandoc.XMLParser (parseXMLElement) +import Text.Pandoc.XML.Light as XML import Text.Pandoc.Definition import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Class.PandocMonad (PandocMonad) @@ -48,6 +48,7 @@ import Text.DocTemplates (FromContext(lookupContext)) import Text.TeXMath import Text.Pandoc.Writers.Math (convertMath) import Text.Pandoc.Writers.Powerpoint.Presentation +import Text.Pandoc.Shared (tshow) import Skylighting (fromColor) -- |The 'EMU' type is used to specify sizes in English Metric Units. @@ -84,10 +85,13 @@ getPresentationSize refArchive distArchive = do sldSize <- findChild (elemName ns "p" "sldSz") presElement cxS <- findAttr (QName "cx" Nothing Nothing) sldSize cyS <- findAttr (QName "cy" Nothing Nothing) sldSize - (cx, _) <- listToMaybe $ reads cxS :: Maybe (Integer, String) - (cy, _) <- listToMaybe $ reads cyS :: Maybe (Integer, String) + cx <- readTextAsInteger cxS + cy <- readTextAsInteger cyS return (cx `div` 12700, cy `div` 12700) +readTextAsInteger :: Text -> Maybe Integer +readTextAsInteger = either (const Nothing) (Just . fst) . Data.Text.Read.decimal + data WriterEnv = WriterEnv { envRefArchive :: Archive , envDistArchive :: Archive , envUTCTime :: UTCTime @@ -161,9 +165,6 @@ runP env st p = evalStateT (runReaderT p env) st -------------------------------------------------------------------- -findAttrText :: QName -> Element -> Maybe T.Text -findAttrText n = fmap T.pack . findAttr n - monospaceFont :: Monad m => P m T.Text monospaceFont = do vars <- writerVariables <$> asks envOpts @@ -171,10 +172,9 @@ monospaceFont = do Just s -> return s Nothing -> return "Courier" --- Kept as string for XML.Light -fontSizeAttributes :: Monad m => RunProps -> P m [(String, String)] +fontSizeAttributes :: Monad m => RunProps -> P m [(Text, Text)] fontSizeAttributes RunProps { rPropForceSize = Just sz } = - return [("sz", show $ sz * 100)] + return [("sz", tshow $ sz * 100)] fontSizeAttributes _ = return [] copyFileToArchive :: PandocMonad m => Archive -> FilePath -> P m Archive @@ -365,7 +365,7 @@ shapeHasId :: NameSpaces -> T.Text -> Element -> Bool shapeHasId ns ident element | Just nvSpPr <- findChild (elemName ns "p" "nvSpPr") element , Just cNvPr <- findChild (elemName ns "p" "cNvPr") nvSpPr - , Just nm <- findAttrText (QName "id" Nothing Nothing) cNvPr = + , Just nm <- findAttr (QName "id" Nothing Nothing) cNvPr = nm == ident | otherwise = False @@ -396,10 +396,10 @@ getShapeDimensions ns element ext <- findChild (elemName ns "a" "ext") xfrm cxS <- findAttr (QName "cx" Nothing Nothing) ext cyS <- findAttr (QName "cy" Nothing Nothing) ext - (x, _) <- listToMaybe $ reads xS - (y, _) <- listToMaybe $ reads yS - (cx, _) <- listToMaybe $ reads cxS - (cy, _) <- listToMaybe $ reads cyS + x <- readTextAsInteger xS + y <- readTextAsInteger yS + cx <- readTextAsInteger cxS + cy <- readTextAsInteger cyS return ((x `div` 12700, y `div` 12700), (cx `div` 12700, cy `div` 12700)) | otherwise = Nothing @@ -430,7 +430,7 @@ getContentShapeSize ns layout master Nothing -> do let mbSz = findChild (elemName ns "p" "nvSpPr") sp >>= findChild (elemName ns "p" "cNvPr") >>= - findAttrText (QName "id" Nothing Nothing) >>= + findAttr (QName "id" Nothing Nothing) >>= flip getMasterShapeDimensionsById master case mbSz of Just sz' -> return sz' @@ -450,8 +450,8 @@ buildSpTree ns spTreeElem newShapes = fn _ = True replaceNamedChildren :: NameSpaces - -> String - -> String + -> Text + -> Text -> [Element] -> Element -> Element @@ -654,10 +654,10 @@ createCaption contentShapeDimensions paraElements = do ] , mknode "p:spPr" [] [ mknode "a:xfrm" [] - [ mknode "a:off" [("x", show $ 12700 * x), - ("y", show $ 12700 * (y + cy - captionHeight))] () - , mknode "a:ext" [("cx", show $ 12700 * cx), - ("cy", show $ 12700 * captionHeight)] () + [ mknode "a:off" [("x", tshow $ 12700 * x), + ("y", tshow $ 12700 * (y + cy - captionHeight))] () + , mknode "a:ext" [("cx", tshow $ 12700 * cx), + ("cy", tshow $ 12700 * captionHeight)] () ] , mknode "a:prstGeom" [("prst", "rect")] [ mknode "a:avLst" [] () @@ -706,11 +706,13 @@ makePicElements layout picProps mInfo alt = do ,("noChangeAspect","1")] () -- cNvPr will contain the link information so we do that separately, -- and register the link if necessary. - let cNvPrAttr = [("descr", mInfoFilePath mInfo), ("id","0"),("name","Picture 1")] + let cNvPrAttr = [("descr", T.pack $ mInfoFilePath mInfo), + ("id","0"), + ("name","Picture 1")] cNvPr <- case picPropLink picProps of Just link -> do idNum <- registerLink link return $ mknode "p:cNvPr" cNvPrAttr $ - mknode "a:hlinkClick" [("r:id", "rId" <> show idNum)] () + mknode "a:hlinkClick" [("r:id", "rId" <> tshow idNum)] () Nothing -> return $ mknode "p:cNvPr" cNvPrAttr () let nvPicPr = mknode "p:nvPicPr" [] [ cNvPr @@ -718,13 +720,13 @@ makePicElements layout picProps mInfo alt = do , mknode "p:nvPr" [] ()] let blipFill = mknode "p:blipFill" [] [ mknode "a:blip" [("r:embed", "rId" <> - show (mInfoLocalId mInfo))] () + tshow (mInfoLocalId mInfo))] () , mknode "a:stretch" [] $ mknode "a:fillRect" [] () ] let xfrm = mknode "a:xfrm" [] - [ mknode "a:off" [("x",show xoff'), ("y",show yoff')] () - , mknode "a:ext" [("cx",show dimX') - ,("cy",show dimY')] () ] + [ mknode "a:off" [("x", tshow xoff'), ("y", tshow yoff')] () + , mknode "a:ext" [("cx", tshow dimX') + ,("cy", tshow dimY')] () ] let prstGeom = mknode "a:prstGeom" [("prst","rect")] $ mknode "a:avLst" [] () let ln = mknode "a:ln" [("w","9525")] @@ -763,7 +765,7 @@ paraElemToElements (Run rpr s) = do Just DoubleStrike -> [("strike", "dblStrike")] Nothing -> []) <> (case rBaseline rpr of - Just n -> [("baseline", show n)] + Just n -> [("baseline", tshow n)] Nothing -> []) <> (case rCap rpr of Just NoCapitals -> [("cap", "none")] @@ -780,43 +782,44 @@ paraElemToElements (Run rpr s) = do return $ case link of InternalTarget _ -> let linkAttrs = - [ ("r:id", "rId" <> show idNum) + [ ("r:id", "rId" <> tshow idNum) , ("action", "ppaction://hlinksldjump") ] in [mknode "a:hlinkClick" linkAttrs ()] -- external ExternalTarget _ -> let linkAttrs = - [ ("r:id", "rId" <> show idNum) + [ ("r:id", "rId" <> tshow idNum) ] in [mknode "a:hlinkClick" linkAttrs ()] Nothing -> return [] let colorContents = case rSolidFill rpr of Just color -> case fromColor color of - '#':hx -> [mknode "a:solidFill" [] - [mknode "a:srgbClr" [("val", map toUpper hx)] ()] - ] + '#':hx -> + [mknode "a:solidFill" [] + [mknode "a:srgbClr" + [("val", T.toUpper $ T.pack hx)] ()]] _ -> [] Nothing -> [] codeFont <- monospaceFont let codeContents = - [mknode "a:latin" [("typeface", T.unpack codeFont)] () | rPropCode rpr] + [mknode "a:latin" [("typeface", codeFont)] () | rPropCode rpr] let propContents = linkProps <> colorContents <> codeContents return [Elem $ mknode "a:r" [] [ mknode "a:rPr" attrs propContents - , mknode "a:t" [] $ T.unpack s + , mknode "a:t" [] s ]] paraElemToElements (MathElem mathType texStr) = do isInSpkrNotes <- asks envInSpeakerNotes if isInSpkrNotes then paraElemToElements $ Run def $ unTeXString texStr else do res <- convertMath writeOMML mathType (unTeXString texStr) - case res of + case fromXLElement <$> res of Right r -> return [Elem $ mknode "a14:m" [] $ addMathInfo r] Left (Str s) -> paraElemToElements (Run def s) Left _ -> throwError $ PandocShouldNeverHappenError "non-string math fallback" paraElemToElements (RawOOXMLParaElem str) = return - [Text (CData CDataRaw (T.unpack str) Nothing)] + [Text (CData CDataRaw str Nothing)] -- This is a bit of a kludge -- really requires adding an option to @@ -824,9 +827,10 @@ paraElemToElements (RawOOXMLParaElem str) = return -- step at a time. addMathInfo :: Element -> Element addMathInfo element = - let mathspace = Attr { attrKey = QName "m" Nothing (Just "xmlns") - , attrVal = "http://schemas.openxmlformats.org/officeDocument/2006/math" - } + let mathspace = + Attr { attrKey = QName "m" Nothing (Just "xmlns") + , attrVal = "http://schemas.openxmlformats.org/officeDocument/2006/math" + } in add_attr mathspace element -- We look through the element to see if it contains an a14:m @@ -849,13 +853,13 @@ surroundWithMathAlternate element = paragraphToElement :: PandocMonad m => Paragraph -> P m Element paragraphToElement par = do let - attrs = [("lvl", show $ pPropLevel $ paraProps par)] <> + attrs = [("lvl", tshow $ pPropLevel $ paraProps par)] <> (case pPropMarginLeft (paraProps par) of - Just px -> [("marL", show $ pixelsToEmu px)] + Just px -> [("marL", tshow $ pixelsToEmu px)] Nothing -> [] ) <> (case pPropIndent (paraProps par) of - Just px -> [("indent", show $ pixelsToEmu px)] + Just px -> [("indent", tshow $ pixelsToEmu px)] Nothing -> [] ) <> (case pPropAlign (paraProps par) of @@ -867,7 +871,7 @@ paragraphToElement par = do props = [] <> (case pPropSpaceBefore $ paraProps par of Just px -> [mknode "a:spcBef" [] [ - mknode "a:spcPts" [("val", show $ 100 * px)] () + mknode "a:spcPts" [("val", tshow $ 100 * px)] () ] ] Nothing -> [] @@ -910,7 +914,7 @@ shapeToElements layout (Pic picProps fp alt) = do shapeToElements layout (GraphicFrame tbls cptn) = map Elem <$> graphicFrameToElements layout tbls cptn shapeToElements _ (RawOOXMLShape str) = return - [Text (CData CDataRaw (T.unpack str) Nothing)] + [Text (CData CDataRaw str Nothing)] shapeToElements layout shp = do element <- shapeToElement layout shp return [Elem element] @@ -942,8 +946,10 @@ graphicFrameToElements layout tbls caption = do [mknode "p:ph" [("idx", "1")] ()] ] , mknode "p:xfrm" [] - [ mknode "a:off" [("x", show $ 12700 * x), ("y", show $ 12700 * y)] () - , mknode "a:ext" [("cx", show $ 12700 * cx), ("cy", show $ 12700 * cy)] () + [ mknode "a:off" [("x", tshow $ 12700 * x), + ("y", tshow $ 12700 * y)] () + , mknode "a:ext" [("cx", tshow $ 12700 * cx), + ("cy", tshow $ 12700 * cy)] () ] ] <> elements @@ -957,7 +963,7 @@ getDefaultTableStyle = do refArchive <- asks envRefArchive distArchive <- asks envDistArchive tblStyleLst <- parseXml refArchive distArchive "ppt/tableStyles.xml" - return $ findAttrText (QName "def" Nothing Nothing) tblStyleLst + return $ findAttr (QName "def" Nothing Nothing) tblStyleLst graphicToElement :: PandocMonad m => Integer -> Graphic -> P m Element graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do @@ -995,7 +1001,7 @@ graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do let mkrow border cells = mknode "a:tr" [("h", "0")] $ map (mkcell border) cells let mkgridcol w = mknode "a:gridCol" - [("w", show ((12700 * w) :: Integer))] () + [("w", tshow ((12700 * w) :: Integer))] () let hasHeader = not (all null hdrCells) mbDefTblStyle <- getDefaultTableStyle @@ -1004,7 +1010,7 @@ graphicToElement tableWidth (Tbl tblPr hdrCells rows) = do , ("bandRow", if tblPrBandRow tblPr then "1" else "0") ] (case mbDefTblStyle of Nothing -> [] - Just sty -> [mknode "a:tableStyleId" [] $ T.unpack sty]) + Just sty -> [mknode "a:tableStyleId" [] sty]) return $ mknode "a:graphic" [] [mknode "a:graphicData" [("uri", "http://schemas.openxmlformats.org/drawingml/2006/table")] @@ -1037,7 +1043,7 @@ findPHType ns spElem phType -- if it's a named PHType, we want to check that the attribute -- value matches. Just phElem | (PHType tp) <- phType -> - case findAttrText (QName "type" Nothing Nothing) phElem of + case findAttr (QName "type" Nothing Nothing) phElem of Just tp' -> tp == tp' Nothing -> False -- if it's an ObjType, we want to check that there is NO @@ -1204,7 +1210,7 @@ getSlideNumberFieldId notesMaster , Just txBody <- findChild (elemName ns "p" "txBody") sp , Just p <- findChild (elemName ns "a" "p") txBody , Just fld <- findChild (elemName ns "a" "fld") p - , Just fldId <- findAttrText (QName "id" Nothing Nothing) fld = + , Just fldId <- findAttr (QName "id" Nothing Nothing) fld = return fldId | otherwise = throwError $ PandocSomeError @@ -1283,11 +1289,11 @@ speakerNotesSlideNumber pgNum fieldId = [ mknode "a:bodyPr" [] () , mknode "a:lstStyle" [] () , mknode "a:p" [] - [ mknode "a:fld" [ ("id", T.unpack fieldId) + [ mknode "a:fld" [ ("id", fieldId) , ("type", "slidenum") ] [ mknode "a:rPr" [("lang", "en-US")] () - , mknode "a:t" [] (show pgNum) + , mknode "a:t" [] (tshow pgNum) ] , mknode "a:endParaRPr" [("lang", "en-US")] () ] @@ -1339,7 +1345,7 @@ getSlideIdNum sldId = do Just n -> return n Nothing -> throwError $ PandocShouldNeverHappenError $ - "Slide Id " <> T.pack (show sldId) <> " not found." + "Slide Id " <> tshow sldId <> " not found." slideNum :: PandocMonad m => Slide -> P m Int slideNum slide = getSlideIdNum $ slideId slide @@ -1356,7 +1362,7 @@ slideToRelId :: PandocMonad m => Slide -> P m T.Text slideToRelId slide = do n <- slideNum slide offset <- asks envSlideIdOffset - return $ "rId" <> T.pack (show $ n + offset) + return $ "rId" <> tshow (n + offset) data Relationship = Relationship { relId :: Int @@ -1368,13 +1374,11 @@ elementToRel :: Element -> Maybe Relationship elementToRel element | elName element == QName "Relationship" (Just "http://schemas.openxmlformats.org/package/2006/relationships") Nothing = do rId <- findAttr (QName "Id" Nothing Nothing) element - numStr <- stripPrefix "rId" rId - num <- case reads numStr :: [(Int, String)] of - (n, _) : _ -> Just n - [] -> Nothing - type' <- findAttrText (QName "Type" Nothing Nothing) element + numStr <- T.stripPrefix "rId" rId + num <- fromIntegral <$> readTextAsInteger numStr + type' <- findAttr (QName "Type" Nothing Nothing) element target <- findAttr (QName "Target" Nothing Nothing) element - return $ Relationship num type' target + return $ Relationship num type' (T.unpack target) | otherwise = Nothing slideToPresRel :: PandocMonad m => Slide -> P m Relationship @@ -1463,10 +1467,9 @@ topLevelRelsEntry :: PandocMonad m => P m Entry topLevelRelsEntry = elemToEntry "_rels/.rels" $ relsToElement topLevelRels relToElement :: Relationship -> Element -relToElement rel = mknode "Relationship" [ ("Id", "rId" <> - show (relId rel)) - , ("Type", T.unpack $ relType rel) - , ("Target", relTarget rel) ] () +relToElement rel = mknode "Relationship" [ ("Id", "rId" <> tshow (relId rel)) + , ("Type", relType rel) + , ("Target", T.pack $ relTarget rel) ] () relsToElement :: [Relationship] -> Element relsToElement rels = mknode "Relationships" @@ -1501,7 +1504,8 @@ slideToSpeakerNotesEntry slide = do Just element | Just notesIdNum <- mbNotesIdNum -> Just <$> elemToEntry - ("ppt/notesSlides/notesSlide" <> show notesIdNum <> ".xml") + ("ppt/notesSlides/notesSlide" <> show notesIdNum <> + ".xml") element _ -> return Nothing @@ -1514,7 +1518,7 @@ slideToSpeakerNotesRelElement slide@Slide{} = do [("xmlns", "http://schemas.openxmlformats.org/package/2006/relationships")] [ mknode "Relationship" [ ("Id", "rId2") , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide") - , ("Target", "../slides/slide" <> show idNum <> ".xml") + , ("Target", "../slides/slide" <> tshow idNum <> ".xml") ] () , mknode "Relationship" [ ("Id", "rId1") , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesMaster") @@ -1547,15 +1551,15 @@ linkRelElement :: PandocMonad m => (Int, LinkTarget) -> P m Element linkRelElement (rIdNum, InternalTarget targetId) = do targetIdNum <- getSlideIdNum targetId return $ - mknode "Relationship" [ ("Id", "rId" <> show rIdNum) + mknode "Relationship" [ ("Id", "rId" <> tshow rIdNum) , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide") - , ("Target", "slide" <> show targetIdNum <> ".xml") + , ("Target", "slide" <> tshow targetIdNum <> ".xml") ] () linkRelElement (rIdNum, ExternalTarget (url, _)) = return $ - mknode "Relationship" [ ("Id", "rId" <> show rIdNum) + mknode "Relationship" [ ("Id", "rId" <> tshow rIdNum) , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink") - , ("Target", T.unpack url) + , ("Target", url) , ("TargetMode", "External") ] () @@ -1567,10 +1571,10 @@ mediaRelElement mInfo = let ext = fromMaybe "" (mInfoExt mInfo) in mknode "Relationship" [ ("Id", "rId" <> - show (mInfoLocalId mInfo)) + tshow (mInfoLocalId mInfo)) , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image") , ("Target", "../media/image" <> - show (mInfoGlobalId mInfo) <> T.unpack ext) + tshow (mInfoGlobalId mInfo) <> ext) ] () speakerNotesSlideRelElement :: PandocMonad m => Slide -> P m (Maybe Element) @@ -1580,7 +1584,7 @@ speakerNotesSlideRelElement slide = do return $ case M.lookup idNum mp of Nothing -> Nothing Just n -> - let target = "../notesSlides/notesSlide" <> show n <> ".xml" + let target = "../notesSlides/notesSlide" <> tshow n <> ".xml" in Just $ mknode "Relationship" [ ("Id", "rId2") , ("Type", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide") @@ -1619,9 +1623,9 @@ slideToSlideRelElement slide = do slideToSldIdElement :: PandocMonad m => Slide -> P m Element slideToSldIdElement slide = do n <- slideNum slide - let id' = show $ n + 255 + let id' = tshow $ n + 255 rId <- slideToRelId slide - return $ mknode "p:sldId" [("id", id'), ("r:id", T.unpack rId)] () + return $ mknode "p:sldId" [("id", id'), ("r:id", rId)] () presentationToSldIdLst :: PandocMonad m => Presentation -> P m Element presentationToSldIdLst (Presentation _ slides) = do @@ -1646,7 +1650,7 @@ presentationToPresentationElement pres@(Presentation _ slds) = do notesMasterElem = mknode "p:notesMasterIdLst" [] [ mknode "p:NotesMasterId" - [("r:id", "rId" <> show notesMasterRId)] + [("r:id", "rId" <> tshow notesMasterRId)] () ] @@ -1702,17 +1706,17 @@ docPropsElement docProps = do ,("xmlns:dcmitype","http://purl.org/dc/dcmitype/") ,("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")] $ - mknode "dc:title" [] (maybe "" T.unpack $ dcTitle docProps) + mknode "dc:title" [] (fromMaybe "" $ dcTitle docProps) : - mknode "dc:creator" [] (maybe "" T.unpack $ dcCreator docProps) + mknode "dc:creator" [] (fromMaybe "" $ dcCreator docProps) : - mknode "cp:keywords" [] (T.unpack keywords) - : ( [mknode "dc:subject" [] $ maybe "" T.unpack $ dcSubject docProps | isJust (dcSubject docProps)]) - <> ( [mknode "dc:description" [] $ maybe "" T.unpack $ dcDescription docProps | isJust (dcDescription docProps)]) - <> ( [mknode "cp:category" [] $ maybe "" T.unpack $ cpCategory docProps | isJust (cpCategory docProps)]) + mknode "cp:keywords" [] keywords + : ( [mknode "dc:subject" [] $ fromMaybe "" $ dcSubject docProps | isJust (dcSubject docProps)]) + <> ( [mknode "dc:description" [] $ fromMaybe "" $ dcDescription docProps | isJust (dcDescription docProps)]) + <> ( [mknode "cp:category" [] $ fromMaybe "" $ cpCategory docProps | isJust (cpCategory docProps)]) <> (\x -> [ mknode "dcterms:created" [("xsi:type","dcterms:W3CDTF")] x - , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x - ]) (formatTime defaultTimeLocale "%FT%XZ" utctime) + , mknode "dcterms:modified" [("xsi:type","dcterms:W3CDTF")] x + ]) (T.pack $ formatTime defaultTimeLocale "%FT%XZ" utctime) docPropsToEntry :: PandocMonad m => DocProps -> P m Entry docPropsToEntry docProps = docPropsElement docProps >>= @@ -1723,8 +1727,8 @@ docCustomPropsElement :: PandocMonad m => DocProps -> P m Element docCustomPropsElement docProps = do let mkCustomProp (k, v) pid = mknode "property" [("fmtid","{D5CDD505-2E9C-101B-9397-08002B2CF9AE}") - ,("pid", show pid) - ,("name", T.unpack k)] $ mknode "vt:lpwstr" [] (T.unpack v) + ,("pid", tshow pid) + ,("name", k)] $ mknode "vt:lpwstr" [] v return $ mknode "Properties" [("xmlns","http://schemas.openxmlformats.org/officeDocument/2006/custom-properties") ,("xmlns:vt","http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes") @@ -1743,7 +1747,7 @@ viewPropsElement = do distArchive <- asks envDistArchive viewPrElement <- parseXml refArchive distArchive "ppt/viewProps.xml" -- remove "lastView" if it exists: - let notLastView :: Text.XML.Light.Attr -> Bool + let notLastView :: XML.Attr -> Bool notLastView attr = qName (attrKey attr) /= "lastView" return $ @@ -1755,15 +1759,15 @@ makeViewPropsEntry = viewPropsElement >>= elemToEntry "ppt/viewProps.xml" defaultContentTypeToElem :: DefaultContentType -> Element defaultContentTypeToElem dct = mknode "Default" - [("Extension", T.unpack $ defContentTypesExt dct), - ("ContentType", T.unpack $ defContentTypesType dct)] + [("Extension", defContentTypesExt dct), + ("ContentType", defContentTypesType dct)] () overrideContentTypeToElem :: OverrideContentType -> Element overrideContentTypeToElem oct = mknode "Override" - [("PartName", overrideContentTypesPart oct), - ("ContentType", T.unpack $ overrideContentTypesType oct)] + [("PartName", T.pack $ overrideContentTypesPart oct), + ("ContentType", overrideContentTypesType oct)] () contentTypesToElement :: ContentTypes -> Element @@ -1821,7 +1825,8 @@ getSpeakerNotesFilePaths :: PandocMonad m => P m [FilePath] getSpeakerNotesFilePaths = do mp <- asks envSpeakerNotesIdMap let notesIdNums = M.elems mp - return $ map (\n -> "ppt/notesSlides/notesSlide" <> show n <> ".xml") notesIdNums + return $ map (\n -> "ppt/notesSlides/notesSlide" <> show n <> ".xml") + notesIdNums presentationToContentTypes :: PandocMonad m => Presentation -> P m ContentTypes presentationToContentTypes p@(Presentation _ slides) = do @@ -1885,11 +1890,11 @@ getContentType fp | otherwise = Nothing -- Kept as String for XML.Light -autoNumAttrs :: ListAttributes -> [(String, String)] +autoNumAttrs :: ListAttributes -> [(Text, Text)] autoNumAttrs (startNum, numStyle, numDelim) = numAttr <> typeAttr where - numAttr = [("startAt", show startNum) | startNum /= 1] + numAttr = [("startAt", tshow startNum) | startNum /= 1] typeAttr = [("type", typeString <> delimString)] typeString = case numStyle of Decimal -> "arabic" diff --git a/src/Text/Pandoc/XML/Light.hs b/src/Text/Pandoc/XML/Light.hs new file mode 100644 index 000000000..38e4df218 --- /dev/null +++ b/src/Text/Pandoc/XML/Light.hs @@ -0,0 +1,586 @@ +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.XML.Light + Copyright : Copyright (C) 2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +xml-light, which we used in pandoc's the XML-based readers, has +some limitations: in particular, it produces nodes with String +instead of Text, and the parser falls over on processing instructions +(see #7091). + +This module exports much of the API of xml-light, but using Text instead +of String. In addition, the xml-light parsers are replaced by xml-conduit's +well-tested parser. (The xml-conduit types are mapped to types +isomorphic to xml-light's, to avoid the need for massive code modifications +elsewhere.) Bridge functions to map xml-light types to this module's +types are also provided (since libraries like texmath still use xml-light). + +Another advantage of the xml-conduit parser is that it gives us +detailed information on xml parse errors. + +In the future we may want to move to using xml-conduit or another +xml library in the code base, but this change gives us +better performance and accuracy without much change in the +code that used xml-light. +-} +module Text.Pandoc.XML.Light + ( -- * Basic types, duplicating those from xml-light but with Text + -- instead of String + Line + , Content(..) + , Element(..) + , Attr(..) + , CData(..) + , CDataKind(..) + , QName(..) + , Node(..) + , unode + , unqual + , add_attr + , add_attrs + -- * Conversion functions from xml-light types + , fromXLQName + , fromXLCData + , fromXLElement + , fromXLAttr + , fromXLContent + -- * Replacement for xml-light's Text.XML.Proc + , strContent + , onlyElems + , elChildren + , onlyText + , findChildren + , filterChildren + , filterChildrenName + , findChild + , filterChild + , filterChildName + , findElement + , filterElement + , filterElementName + , findElements + , filterElements + , filterElementsName + , findAttr + , lookupAttr + , lookupAttrBy + , findAttrBy + -- * Replacement for xml-light's Text.XML.Output + , ppTopElement + , ppElement + , ppContent + , ppcElement + , ppcContent + , showTopElement + , showElement + , showContent + , useShortEmptyTags + , defaultConfigPP + , ConfigPP(..) + -- * Replacement for xml-light's Text.XML.Input + , parseXMLElement + , parseXMLContents + ) where + +import qualified Control.Exception as E +import qualified Text.XML as Conduit +import Text.XML.Unresolved (InvalidEventStream(..)) +import Data.Text (Text) +import qualified Data.Text as T +import qualified Data.Text.Lazy as TL +import Data.Text.Lazy.Builder (Builder, singleton, fromText, toLazyText) +import qualified Data.Map as M +import Data.Data (Data) +import Data.Typeable (Typeable) +import Data.Maybe (mapMaybe, listToMaybe) +import Data.List(find) +import qualified Text.XML.Light as XL + +-- Drop in replacement for parseXMLDoc in xml-light. +parseXMLElement :: TL.Text -> Either T.Text Element +parseXMLElement t = + elementToElement . Conduit.documentRoot <$> + either (Left . T.pack . E.displayException) Right + (Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t) + +parseXMLContents :: TL.Text -> Either T.Text [Content] +parseXMLContents t = + case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t of + Left e -> + case E.fromException e of + Just (ContentAfterRoot _) -> + elContent <$> parseXMLElement ("<wrapper>" <> t <> "</wrapper>") + _ -> Left . T.pack . E.displayException $ e + Right x -> Right [Elem . elementToElement . Conduit.documentRoot $ x] + +elementToElement :: Conduit.Element -> Element +elementToElement (Conduit.Element name attribMap nodes) = + Element (nameToQname name) attrs (mapMaybe nodeToContent nodes) Nothing + where + attrs = map (\(n,v) -> Attr (nameToQname n) v) $ + M.toList attribMap + nameToQname (Conduit.Name localName mbns mbpref) = + case mbpref of + Nothing -> + case T.stripPrefix "xmlns:" localName of + Just rest -> QName rest mbns (Just "xmlns") + Nothing -> QName localName mbns mbpref + _ -> QName localName mbns mbpref + +nodeToContent :: Conduit.Node -> Maybe Content +nodeToContent (Conduit.NodeElement el) = + Just (Elem (elementToElement el)) +nodeToContent (Conduit.NodeContent t) = + Just (Text (CData CDataText t Nothing)) +nodeToContent _ = Nothing + +unqual :: Text -> QName +unqual x = QName x Nothing Nothing + +-- | Add an attribute to an element. +add_attr :: Attr -> Element -> Element +add_attr a e = add_attrs [a] e + +-- | Add some attributes to an element. +add_attrs :: [Attr] -> Element -> Element +add_attrs as e = e { elAttribs = as ++ elAttribs e } + +-- +-- type definitions lightly modified from xml-light +-- + +-- | A line is an Integer +type Line = Integer + +-- | XML content +data Content = Elem Element + | Text CData + | CRef Text + deriving (Show, Typeable, Data) + +-- | XML elements +data Element = Element { + elName :: QName, + elAttribs :: [Attr], + elContent :: [Content], + elLine :: Maybe Line + } deriving (Show, Typeable, Data) + +-- | XML attributes +data Attr = Attr { + attrKey :: QName, + attrVal :: Text + } deriving (Eq, Ord, Show, Typeable, Data) + +-- | XML CData +data CData = CData { + cdVerbatim :: CDataKind, + cdData :: Text, + cdLine :: Maybe Line + } deriving (Show, Typeable, Data) + +data CDataKind + = CDataText -- ^ Ordinary character data; pretty printer escapes &, < etc. + | CDataVerbatim -- ^ Unescaped character data; pretty printer embeds it in <![CDATA[.. + | CDataRaw -- ^ As-is character data; pretty printer passes it along without any escaping or CDATA wrap-up. + deriving ( Eq, Show, Typeable, Data ) + +-- | XML qualified names +data QName = QName { + qName :: Text, + qURI :: Maybe Text, + qPrefix :: Maybe Text + } deriving (Show, Typeable, Data) + + +instance Eq QName where + q1 == q2 = compare q1 q2 == EQ + +instance Ord QName where + compare q1 q2 = + case compare (qName q1) (qName q2) of + EQ -> case (qURI q1, qURI q2) of + (Nothing,Nothing) -> compare (qPrefix q1) (qPrefix q2) + (u1,u2) -> compare u1 u2 + x -> x + +class Node t where + node :: QName -> t -> Element + +instance Node ([Attr],[Content]) where + node n (attrs,cont) = Element { elName = n + , elAttribs = attrs + , elContent = cont + , elLine = Nothing + } + +instance Node [Attr] where node n as = node n (as,[]::[Content]) +instance Node Attr where node n a = node n [a] +instance Node () where node n () = node n ([]::[Attr]) + +instance Node [Content] where node n cs = node n ([]::[Attr],cs) +instance Node Content where node n c = node n [c] +instance Node ([Attr],Content) where node n (as,c) = node n (as,[c]) +instance Node (Attr,Content) where node n (a,c) = node n ([a],[c]) + +instance Node ([Attr],[Element]) where + node n (as,cs) = node n (as,map Elem cs) + +instance Node ([Attr],Element) where node n (as,c) = node n (as,[c]) +instance Node (Attr,Element) where node n (a,c) = node n ([a],c) +instance Node [Element] where node n es = node n ([]::[Attr],es) +instance Node Element where node n e = node n [e] + +instance Node ([Attr],[CData]) where + node n (as,cs) = node n (as,map Text cs) + +instance Node ([Attr],CData) where node n (as,c) = node n (as,[c]) +instance Node (Attr,CData) where node n (a,c) = node n ([a],c) +instance Node [CData] where node n es = node n ([]::[Attr],es) +instance Node CData where node n e = node n [e] + +instance Node ([Attr],Text) where + node n (as,t) = node n (as, CData { cdVerbatim = CDataText + , cdData = t + , cdLine = Nothing }) + +instance Node (Attr,Text ) where node n (a,t) = node n ([a],t) +instance Node Text where node n t = node n ([]::[Attr],t) + +-- | Create node with unqualified name +unode :: Node t => Text -> t -> Element +unode = node . unqual + +-- +-- conversion from xml-light +-- + +fromXLQName :: XL.QName -> QName +fromXLQName qn = QName { qName = T.pack $ XL.qName qn + , qURI = T.pack <$> XL.qURI qn + , qPrefix = T.pack <$> XL.qPrefix qn } + +fromXLCData :: XL.CData -> CData +fromXLCData cd = CData { cdVerbatim = case XL.cdVerbatim cd of + XL.CDataText -> CDataText + XL.CDataVerbatim -> CDataVerbatim + XL.CDataRaw -> CDataRaw + , cdData = T.pack $ XL.cdData cd + , cdLine = XL.cdLine cd } + +fromXLElement :: XL.Element -> Element +fromXLElement el = Element { elName = fromXLQName $ XL.elName el + , elAttribs = map fromXLAttr $ XL.elAttribs el + , elContent = map fromXLContent $ XL.elContent el + , elLine = XL.elLine el } + +fromXLAttr :: XL.Attr -> Attr +fromXLAttr (XL.Attr qn s) = Attr (fromXLQName qn) (T.pack s) + +fromXLContent :: XL.Content -> Content +fromXLContent (XL.Elem el) = Elem $ fromXLElement el +fromXLContent (XL.Text cd) = Text $ fromXLCData cd +fromXLContent (XL.CRef s) = CRef (T.pack s) + +-- +-- copied from xml-light Text.XML.Proc +-- + +-- | Get the text value of an XML element. This function +-- ignores non-text elements, and concatenates all text elements. +strContent :: Element -> Text +strContent = mconcat . map cdData . onlyText . elContent + +-- | Select only the elements from a list of XML content. +onlyElems :: [Content] -> [Element] +onlyElems xs = [ x | Elem x <- xs ] + +-- | Select only the elements from a parent. +elChildren :: Element -> [Element] +elChildren e = [ x | Elem x <- elContent e ] + +-- | Select only the text from a list of XML content. +onlyText :: [Content] -> [CData] +onlyText xs = [ x | Text x <- xs ] + +-- | Find all immediate children with the given name. +findChildren :: QName -> Element -> [Element] +findChildren q e = filterChildren ((q ==) . elName) e + +-- | Filter all immediate children wrt a given predicate. +filterChildren :: (Element -> Bool) -> Element -> [Element] +filterChildren p e = filter p (onlyElems (elContent e)) + + +-- | Filter all immediate children wrt a given predicate over their names. +filterChildrenName :: (QName -> Bool) -> Element -> [Element] +filterChildrenName p e = filter (p.elName) (onlyElems (elContent e)) + + +-- | Find an immediate child with the given name. +findChild :: QName -> Element -> Maybe Element +findChild q e = listToMaybe (findChildren q e) + +-- | Find an immediate child with the given name. +filterChild :: (Element -> Bool) -> Element -> Maybe Element +filterChild p e = listToMaybe (filterChildren p e) + +-- | Find an immediate child with name matching a predicate. +filterChildName :: (QName -> Bool) -> Element -> Maybe Element +filterChildName p e = listToMaybe (filterChildrenName p e) + +-- | Find the left-most occurrence of an element matching given name. +findElement :: QName -> Element -> Maybe Element +findElement q e = listToMaybe (findElements q e) + +-- | Filter the left-most occurrence of an element wrt. given predicate. +filterElement :: (Element -> Bool) -> Element -> Maybe Element +filterElement p e = listToMaybe (filterElements p e) + +-- | Filter the left-most occurrence of an element wrt. given predicate. +filterElementName :: (QName -> Bool) -> Element -> Maybe Element +filterElementName p e = listToMaybe (filterElementsName p e) + +-- | Find all non-nested occurances of an element. +-- (i.e., once we have found an element, we do not search +-- for more occurances among the element's children). +findElements :: QName -> Element -> [Element] +findElements qn e = filterElementsName (qn==) e + +-- | Find all non-nested occurrences of an element wrt. given predicate. +-- (i.e., once we have found an element, we do not search +-- for more occurances among the element's children). +filterElements :: (Element -> Bool) -> Element -> [Element] +filterElements p e + | p e = [e] + | otherwise = concatMap (filterElements p) $ onlyElems $ elContent e + +-- | Find all non-nested occurences of an element wrt a predicate over element names. +-- (i.e., once we have found an element, we do not search +-- for more occurances among the element's children). +filterElementsName :: (QName -> Bool) -> Element -> [Element] +filterElementsName p e = filterElements (p.elName) e + +-- | Lookup the value of an attribute. +findAttr :: QName -> Element -> Maybe Text +findAttr x e = lookupAttr x (elAttribs e) + +-- | Lookup attribute name from list. +lookupAttr :: QName -> [Attr] -> Maybe Text +lookupAttr x = lookupAttrBy (x ==) + +-- | Lookup the first attribute whose name satisfies the given predicate. +lookupAttrBy :: (QName -> Bool) -> [Attr] -> Maybe Text +lookupAttrBy p as = attrVal `fmap` find (p . attrKey) as + +-- | Lookup the value of the first attribute whose name +-- satisfies the given predicate. +findAttrBy :: (QName -> Bool) -> Element -> Maybe Text +findAttrBy p e = lookupAttrBy p (elAttribs e) + + +-- +-- duplicates functinos from Text.XML.Output +-- + +-- | The XML 1.0 header +xmlHeader :: Text +xmlHeader = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + + +-------------------------------------------------------------------------------- +data ConfigPP = ConfigPP + { shortEmptyTag :: QName -> Bool + , prettify :: Bool + } + +-- | Default pretty orinting configuration. +-- * Always use abbreviate empty tags. +defaultConfigPP :: ConfigPP +defaultConfigPP = ConfigPP { shortEmptyTag = const True + , prettify = False + } + +-- | The predicate specifies for which empty tags we should use XML's +-- abbreviated notation <TAG />. This is useful if we are working with +-- some XML-ish standards (such as certain versions of HTML) where some +-- empty tags should always be displayed in the <TAG></TAG> form. +useShortEmptyTags :: (QName -> Bool) -> ConfigPP -> ConfigPP +useShortEmptyTags p c = c { shortEmptyTag = p } + + +-- | Specify if we should use extra white-space to make document more readable. +-- WARNING: This adds additional white-space to text elements, +-- and so it may change the meaning of the document. +useExtraWhiteSpace :: Bool -> ConfigPP -> ConfigPP +useExtraWhiteSpace p c = c { prettify = p } + +-- | A configuration that tries to make things pretty +-- (possibly at the cost of changing the semantics a bit +-- through adding white space.) +prettyConfigPP :: ConfigPP +prettyConfigPP = useExtraWhiteSpace True defaultConfigPP + + +-------------------------------------------------------------------------------- + + +-- | Pretty printing renders XML documents faithfully, +-- with the exception that whitespace may be added\/removed +-- in non-verbatim character data. +ppTopElement :: Element -> Text +ppTopElement = ppcTopElement prettyConfigPP + +-- | Pretty printing elements +ppElement :: Element -> Text +ppElement = ppcElement prettyConfigPP + +-- | Pretty printing content +ppContent :: Content -> Text +ppContent = ppcContent prettyConfigPP + +-- | Pretty printing renders XML documents faithfully, +-- with the exception that whitespace may be added\/removed +-- in non-verbatim character data. +ppcTopElement :: ConfigPP -> Element -> Text +ppcTopElement c e = T.unlines [xmlHeader,ppcElement c e] + +-- | Pretty printing elements +ppcElement :: ConfigPP -> Element -> Text +ppcElement c = TL.toStrict . toLazyText . ppElementS c mempty + +-- | Pretty printing content +ppcContent :: ConfigPP -> Content -> Text +ppcContent c = TL.toStrict . toLazyText . ppContentS c mempty + +ppcCData :: ConfigPP -> CData -> Text +ppcCData c = TL.toStrict . toLazyText . ppCDataS c mempty + +type Indent = Builder + +-- | Pretty printing content using ShowT +ppContentS :: ConfigPP -> Indent -> Content -> Builder +ppContentS c i x = case x of + Elem e -> ppElementS c i e + Text t -> ppCDataS c i t + CRef r -> showCRefS r + +ppElementS :: ConfigPP -> Indent -> Element -> Builder +ppElementS c i e = i <> tagStart (elName e) (elAttribs e) <> + (case elContent e of + [] | "?" `T.isPrefixOf` qName name -> fromText " ?>" + | shortEmptyTag c name -> fromText " />" + [Text t] -> singleton '>' <> ppCDataS c mempty t <> tagEnd name + cs -> singleton '>' <> nl <> + mconcat (map ((<> nl) . ppContentS c (sp <> i)) cs) <> + i <> tagEnd name + where (nl,sp) = if prettify c then ("\n"," ") else ("","") + ) + where name = elName e + +ppCDataS :: ConfigPP -> Indent -> CData -> Builder +ppCDataS c i t = i <> if cdVerbatim t /= CDataText || not (prettify c) + then showCDataS t + else foldr cons mempty (T.unpack (showCData t)) + where cons :: Char -> Builder -> Builder + cons '\n' ys = singleton '\n' <> i <> ys + cons y ys = singleton y <> ys + + + +-------------------------------------------------------------------------------- + +-- | Adds the <?xml?> header. +showTopElement :: Element -> Text +showTopElement c = xmlHeader <> showElement c + +showContent :: Content -> Text +showContent = ppcContent defaultConfigPP + +showElement :: Element -> Text +showElement = ppcElement defaultConfigPP + +showCData :: CData -> Text +showCData = ppcCData defaultConfigPP + +-- Note: crefs should not contain '&', ';', etc. +showCRefS :: Text -> Builder +showCRefS r = singleton '&' <> fromText r <> singleton ';' + +-- | Convert a text element to characters. +showCDataS :: CData -> Builder +showCDataS cd = + case cdVerbatim cd of + CDataText -> escStr (cdData cd) + CDataVerbatim -> fromText "<![CDATA[" <> escCData (cdData cd) <> + fromText "]]>" + CDataRaw -> fromText (cdData cd) + +-------------------------------------------------------------------------------- +escCData :: Text -> Builder +escCData t + | "]]>" `T.isPrefixOf` t = + fromText "]]]]><![CDATA[>" <> fromText (T.drop 3 t) +escCData t + = case T.uncons t of + Nothing -> mempty + Just (c,t') -> singleton c <> escCData t' + +escChar :: Char -> Builder +escChar c = case c of + '<' -> fromText "<" + '>' -> fromText ">" + '&' -> fromText "&" + '"' -> fromText """ + -- we use ' instead of ' because IE apparently has difficulties + -- rendering ' in xhtml. + -- Reported by Rohan Drape <rohan.drape@gmail.com>. + '\'' -> fromText "'" + _ -> singleton c + + {- original xml-light version: + -- NOTE: We escape '\r' explicitly because otherwise they get lost + -- when parsed back in because of then end-of-line normalization rules. + _ | isPrint c || c == '\n' -> singleton c + | otherwise -> showText "&#" . showsT oc . singleton ';' + where oc = ord c + -} + +escStr :: Text -> Builder +escStr cs = if T.any needsEscape cs + then mconcat (map escChar (T.unpack cs)) + else fromText cs + where + needsEscape '<' = True + needsEscape '>' = True + needsEscape '&' = True + needsEscape '"' = True + needsEscape '\'' = True + needsEscape _ = False + +tagEnd :: QName -> Builder +tagEnd qn = fromText "</" <> showQName qn <> singleton '>' + +tagStart :: QName -> [Attr] -> Builder +tagStart qn as = singleton '<' <> showQName qn <> as_str + where as_str = if null as + then mempty + else mconcat (map showAttr as) + +showAttr :: Attr -> Builder +showAttr (Attr qn v) = singleton ' ' <> showQName qn <> + singleton '=' <> + singleton '"' <> escStr v <> singleton '"' + +showQName :: QName -> Builder +showQName q = + case qPrefix q of + Nothing -> fromText (qName q) + Just p -> fromText p <> singleton ':' <> fromText (qName q) diff --git a/src/Text/Pandoc/XMLParser.hs b/src/Text/Pandoc/XMLParser.hs deleted file mode 100644 index 8ad22a66a..000000000 --- a/src/Text/Pandoc/XMLParser.hs +++ /dev/null @@ -1,66 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{- | - Module : Text.Pandoc.XMLParser - Copyright : Copyright (C) 2021 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Bridge to allow using xml-conduit's parser with xml-light's types. --} -module Text.Pandoc.XMLParser - ( parseXMLElement - , parseXMLContents - , module Text.XML.Light.Types - ) where - -import qualified Control.Exception as E -import qualified Text.XML as Conduit -import Text.XML.Unresolved (InvalidEventStream(..)) -import qualified Text.XML.Light as Light -import Text.XML.Light.Types -import qualified Data.Text as T -import qualified Data.Text.Lazy as TL -import qualified Data.Map as M -import Data.Maybe (mapMaybe) - --- Drop in replacement for parseXMLDoc in xml-light. -parseXMLElement :: TL.Text -> Either T.Text Light.Element -parseXMLElement t = - elementToElement . Conduit.documentRoot <$> - either (Left . T.pack . E.displayException) Right - (Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t) - -parseXMLContents :: TL.Text -> Either T.Text [Light.Content] -parseXMLContents t = - case Conduit.parseText Conduit.def{ Conduit.psRetainNamespaces = True } t of - Left e -> - case E.fromException e of - Just (ContentAfterRoot _) -> - elContent <$> parseXMLElement ("<wrapper>" <> t <> "</wrapper>") - _ -> Left . T.pack . E.displayException $ e - Right x -> Right [Light.Elem . elementToElement . Conduit.documentRoot $ x] - -elementToElement :: Conduit.Element -> Light.Element -elementToElement (Conduit.Element name attribMap nodes) = - Light.Element (nameToQname name) attrs (mapMaybe nodeToContent nodes) Nothing - where - attrs = map (\(n,v) -> Light.Attr (nameToQname n) (T.unpack v)) $ - M.toList attribMap - nameToQname (Conduit.Name localName mbns mbpref) = - case mbpref of - Nothing | "xmlns:" `T.isPrefixOf` localName -> - Light.QName (T.unpack $ T.drop 6 localName) (T.unpack <$> mbns) - (Just "xmlns") - _ -> Light.QName (T.unpack localName) (T.unpack <$> mbns) - (T.unpack <$> mbpref) - -nodeToContent :: Conduit.Node -> Maybe Light.Content -nodeToContent (Conduit.NodeElement el) = - Just (Light.Elem (elementToElement el)) -nodeToContent (Conduit.NodeContent t) = - Just (Light.Text (Light.CData Light.CDataText (T.unpack t) Nothing)) -nodeToContent _ = Nothing - diff --git a/test/Tests/Writers/OOXML.hs b/test/Tests/Writers/OOXML.hs index 376f02c55..c1e47622d 100644 --- a/test/Tests/Writers/OOXML.hs +++ b/test/Tests/Writers/OOXML.hs @@ -43,7 +43,8 @@ compareXMLBool _ _ = False displayDiff :: Content -> Content -> String displayDiff elemA elemB = - showDiff (1,1) $ getDiff (lines $ ppContent elemA) (lines $ ppContent elemB) + showDiff (1,1) + (getDiff (lines $ showContent elemA) (lines $ showContent elemB)) goldenArchive :: FilePath -> IO Archive goldenArchive fp = toArchive . BL.fromStrict <$> BS.readFile fp diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx index d3b16d0f2..ed7d1165c 100644 Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx index 6293ef493..07ae75676 100644 Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx index 4205a1516..e5f034378 100644 Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx index adb3f23db..174942135 100644 Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx index 92c8137fe..b5c31a851 100644 Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx index f53470617..c42ca1b05 100644 Binary files a/test/docx/golden/custom_style_reference.docx and b/test/docx/golden/custom_style_reference.docx differ diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx index d6af90a72..1cb4c1fd7 100644 Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx index e18dbe853..7122456ea 100644 Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx index 820299043..616ba0f81 100644 Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx index ae0f41d12..c30dcdee9 100644 Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx index 94cd35dfa..8a704b41e 100644 Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx index 879f2a25b..b1906c8c4 100644 Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx index 93f86478f..8adf1cf75 100644 Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx index 967d297f2..584117503 100644 Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx index c5614e2fa..8859fe55c 100644 Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx index 0f39a831f..b80f3b3ba 100644 Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx index 07046f223..35beed68a 100644 Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx index 3656618e6..2c29fd674 100644 Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx index 8798253d5..10a948886 100644 Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx index 0a24d1840..5b90e74a0 100644 Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx index 52bb7a217..cc81b46d1 100644 Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx index 182c06c64..1394dc442 100644 Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx index 7b69a56a3..0d1688694 100644 Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx index 3d3a35701..be1caef2d 100644 Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx index 5ae37b406..a1d2323c2 100644 Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx index c29aa6716..2f3a831a7 100644 Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx index 664493246..af066107c 100644 Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx index b6d15340e..9cc7a075f 100644 Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx index f8e1092d2..f8b8dcfde 100644 Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx index b4cda82f2..1c3baf0bf 100644 Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx index ee222efa0..28686970d 100644 Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx index c6f8d9c96..7051cefbd 100644 Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx index ea8146690..9df631640 100644 Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ diff --git a/test/pptx/code-custom.pptx b/test/pptx/code-custom.pptx index 58070eb3f..5e9c2c630 100644 Binary files a/test/pptx/code-custom.pptx and b/test/pptx/code-custom.pptx differ diff --git a/test/pptx/code-custom_templated.pptx b/test/pptx/code-custom_templated.pptx index db9b7e371..15232c32d 100644 Binary files a/test/pptx/code-custom_templated.pptx and b/test/pptx/code-custom_templated.pptx differ diff --git a/test/pptx/code.pptx b/test/pptx/code.pptx index c7b1ed7d5..aab0cc6f5 100644 Binary files a/test/pptx/code.pptx and b/test/pptx/code.pptx differ diff --git a/test/pptx/code_templated.pptx b/test/pptx/code_templated.pptx index 6944d92bf..fe5b675f3 100644 Binary files a/test/pptx/code_templated.pptx and b/test/pptx/code_templated.pptx differ diff --git a/test/pptx/document-properties-short-desc.pptx b/test/pptx/document-properties-short-desc.pptx index ae0d28429..de5e68151 100644 Binary files a/test/pptx/document-properties-short-desc.pptx and b/test/pptx/document-properties-short-desc.pptx differ diff --git a/test/pptx/document-properties-short-desc_templated.pptx b/test/pptx/document-properties-short-desc_templated.pptx index 37c74c69a..89e6fbdf2 100644 Binary files a/test/pptx/document-properties-short-desc_templated.pptx and b/test/pptx/document-properties-short-desc_templated.pptx differ diff --git a/test/pptx/document-properties.pptx b/test/pptx/document-properties.pptx index 324e443a1..6bcbd1b9c 100644 Binary files a/test/pptx/document-properties.pptx and b/test/pptx/document-properties.pptx differ diff --git a/test/pptx/document-properties_templated.pptx b/test/pptx/document-properties_templated.pptx index c81b983e3..79d48560b 100644 Binary files a/test/pptx/document-properties_templated.pptx and b/test/pptx/document-properties_templated.pptx differ diff --git a/test/pptx/endnotes.pptx b/test/pptx/endnotes.pptx index 30ce33db6..9d46036fe 100644 Binary files a/test/pptx/endnotes.pptx and b/test/pptx/endnotes.pptx differ diff --git a/test/pptx/endnotes_templated.pptx b/test/pptx/endnotes_templated.pptx index d6c604968..54ec7f305 100644 Binary files a/test/pptx/endnotes_templated.pptx and b/test/pptx/endnotes_templated.pptx differ diff --git a/test/pptx/endnotes_toc.pptx b/test/pptx/endnotes_toc.pptx index 000e17ecd..a028b346f 100644 Binary files a/test/pptx/endnotes_toc.pptx and b/test/pptx/endnotes_toc.pptx differ diff --git a/test/pptx/endnotes_toc_templated.pptx b/test/pptx/endnotes_toc_templated.pptx index fdcd2e29b..1158c16fc 100644 Binary files a/test/pptx/endnotes_toc_templated.pptx and b/test/pptx/endnotes_toc_templated.pptx differ diff --git a/test/pptx/images.pptx b/test/pptx/images.pptx index e73126376..670a825de 100644 Binary files a/test/pptx/images.pptx and b/test/pptx/images.pptx differ diff --git a/test/pptx/images_templated.pptx b/test/pptx/images_templated.pptx index e3f968e9e..6d297ef11 100644 Binary files a/test/pptx/images_templated.pptx and b/test/pptx/images_templated.pptx differ diff --git a/test/pptx/inline_formatting.pptx b/test/pptx/inline_formatting.pptx index eadb9372e..473b9498d 100644 Binary files a/test/pptx/inline_formatting.pptx and b/test/pptx/inline_formatting.pptx differ diff --git a/test/pptx/inline_formatting_templated.pptx b/test/pptx/inline_formatting_templated.pptx index 8ca6bab2b..2cdf54474 100644 Binary files a/test/pptx/inline_formatting_templated.pptx and b/test/pptx/inline_formatting_templated.pptx differ diff --git a/test/pptx/lists.pptx b/test/pptx/lists.pptx index ae188ee68..ffc2eb9f7 100644 Binary files a/test/pptx/lists.pptx and b/test/pptx/lists.pptx differ diff --git a/test/pptx/lists_templated.pptx b/test/pptx/lists_templated.pptx index 60301fa50..676954cb8 100644 Binary files a/test/pptx/lists_templated.pptx and b/test/pptx/lists_templated.pptx differ diff --git a/test/pptx/raw_ooxml.pptx b/test/pptx/raw_ooxml.pptx index 17124a50d..29164af15 100644 Binary files a/test/pptx/raw_ooxml.pptx and b/test/pptx/raw_ooxml.pptx differ diff --git a/test/pptx/raw_ooxml_templated.pptx b/test/pptx/raw_ooxml_templated.pptx index 19ae7dd4e..1742b3296 100644 Binary files a/test/pptx/raw_ooxml_templated.pptx and b/test/pptx/raw_ooxml_templated.pptx differ diff --git a/test/pptx/remove_empty_slides.pptx b/test/pptx/remove_empty_slides.pptx index b650b7585..c6df8e18e 100644 Binary files a/test/pptx/remove_empty_slides.pptx and b/test/pptx/remove_empty_slides.pptx differ diff --git a/test/pptx/remove_empty_slides_templated.pptx b/test/pptx/remove_empty_slides_templated.pptx index 0ab029614..cf6e52eef 100644 Binary files a/test/pptx/remove_empty_slides_templated.pptx and b/test/pptx/remove_empty_slides_templated.pptx differ diff --git a/test/pptx/slide_breaks.pptx b/test/pptx/slide_breaks.pptx index 2a6e35080..e06d9079d 100644 Binary files a/test/pptx/slide_breaks.pptx and b/test/pptx/slide_breaks.pptx differ diff --git a/test/pptx/slide_breaks_slide_level_1.pptx b/test/pptx/slide_breaks_slide_level_1.pptx index a7bcf6a4b..449339778 100644 Binary files a/test/pptx/slide_breaks_slide_level_1.pptx and b/test/pptx/slide_breaks_slide_level_1.pptx differ diff --git a/test/pptx/slide_breaks_slide_level_1_templated.pptx b/test/pptx/slide_breaks_slide_level_1_templated.pptx index 21b018c25..258098082 100644 Binary files a/test/pptx/slide_breaks_slide_level_1_templated.pptx and b/test/pptx/slide_breaks_slide_level_1_templated.pptx differ diff --git a/test/pptx/slide_breaks_templated.pptx b/test/pptx/slide_breaks_templated.pptx index 4ec4772a4..2f0213919 100644 Binary files a/test/pptx/slide_breaks_templated.pptx and b/test/pptx/slide_breaks_templated.pptx differ diff --git a/test/pptx/slide_breaks_toc.pptx b/test/pptx/slide_breaks_toc.pptx index 5983657b6..9dbfa41a0 100644 Binary files a/test/pptx/slide_breaks_toc.pptx and b/test/pptx/slide_breaks_toc.pptx differ diff --git a/test/pptx/slide_breaks_toc_templated.pptx b/test/pptx/slide_breaks_toc_templated.pptx index dd54c7082..f288dde14 100644 Binary files a/test/pptx/slide_breaks_toc_templated.pptx and b/test/pptx/slide_breaks_toc_templated.pptx differ diff --git a/test/pptx/speaker_notes.pptx b/test/pptx/speaker_notes.pptx index b3e5ed5b9..0ab1302da 100644 Binary files a/test/pptx/speaker_notes.pptx and b/test/pptx/speaker_notes.pptx differ diff --git a/test/pptx/speaker_notes_after_metadata.pptx b/test/pptx/speaker_notes_after_metadata.pptx index 1078854bb..6343bffe4 100644 Binary files a/test/pptx/speaker_notes_after_metadata.pptx and b/test/pptx/speaker_notes_after_metadata.pptx differ diff --git a/test/pptx/speaker_notes_after_metadata_templated.pptx b/test/pptx/speaker_notes_after_metadata_templated.pptx index 5116c6c4e..5d4465f64 100644 Binary files a/test/pptx/speaker_notes_after_metadata_templated.pptx and b/test/pptx/speaker_notes_after_metadata_templated.pptx differ diff --git a/test/pptx/speaker_notes_afterheader.pptx b/test/pptx/speaker_notes_afterheader.pptx index 0c8e49bd9..d581681aa 100644 Binary files a/test/pptx/speaker_notes_afterheader.pptx and b/test/pptx/speaker_notes_afterheader.pptx differ diff --git a/test/pptx/speaker_notes_afterheader_templated.pptx b/test/pptx/speaker_notes_afterheader_templated.pptx index 68695939d..c922df3a8 100644 Binary files a/test/pptx/speaker_notes_afterheader_templated.pptx and b/test/pptx/speaker_notes_afterheader_templated.pptx differ diff --git a/test/pptx/speaker_notes_afterseps.pptx b/test/pptx/speaker_notes_afterseps.pptx index 7ed9b946d..13f564bf0 100644 Binary files a/test/pptx/speaker_notes_afterseps.pptx and b/test/pptx/speaker_notes_afterseps.pptx differ diff --git a/test/pptx/speaker_notes_afterseps_templated.pptx b/test/pptx/speaker_notes_afterseps_templated.pptx index 79fc82345..f0b302738 100644 Binary files a/test/pptx/speaker_notes_afterseps_templated.pptx and b/test/pptx/speaker_notes_afterseps_templated.pptx differ diff --git a/test/pptx/speaker_notes_templated.pptx b/test/pptx/speaker_notes_templated.pptx index 9f943c279..2d8d00242 100644 Binary files a/test/pptx/speaker_notes_templated.pptx and b/test/pptx/speaker_notes_templated.pptx differ diff --git a/test/pptx/start_numbering_at.pptx b/test/pptx/start_numbering_at.pptx index ac72d8ced..4320128b3 100644 Binary files a/test/pptx/start_numbering_at.pptx and b/test/pptx/start_numbering_at.pptx differ diff --git a/test/pptx/start_numbering_at_templated.pptx b/test/pptx/start_numbering_at_templated.pptx index 15c7b5469..0a3f6e56d 100644 Binary files a/test/pptx/start_numbering_at_templated.pptx and b/test/pptx/start_numbering_at_templated.pptx differ diff --git a/test/pptx/tables.pptx b/test/pptx/tables.pptx index 926c5e699..e41219844 100644 Binary files a/test/pptx/tables.pptx and b/test/pptx/tables.pptx differ diff --git a/test/pptx/tables_templated.pptx b/test/pptx/tables_templated.pptx index a37e72d2c..82b8aa13d 100644 Binary files a/test/pptx/tables_templated.pptx and b/test/pptx/tables_templated.pptx differ diff --git a/test/pptx/two_column.pptx b/test/pptx/two_column.pptx index 7f86533fe..270a7eeac 100644 Binary files a/test/pptx/two_column.pptx and b/test/pptx/two_column.pptx differ diff --git a/test/pptx/two_column_templated.pptx b/test/pptx/two_column_templated.pptx index 89e3db0ab..44985d701 100644 Binary files a/test/pptx/two_column_templated.pptx and b/test/pptx/two_column_templated.pptx differ -- cgit v1.2.3 From d7a4996b1e7e201095ce792375a0776984fa8fcd Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 16 Feb 2021 18:40:06 -0800 Subject: Split up T.P.XML.Light into submodules. --- pandoc.cabal | 3 + src/Text/Pandoc/XML/Light.hs | 511 +----------------------------------- src/Text/Pandoc/XML/Light/Output.hs | 230 ++++++++++++++++ src/Text/Pandoc/XML/Light/Proc.hs | 138 ++++++++++ src/Text/Pandoc/XML/Light/Types.hs | 190 ++++++++++++++ 5 files changed, 568 insertions(+), 504 deletions(-) create mode 100644 src/Text/Pandoc/XML/Light/Output.hs create mode 100644 src/Text/Pandoc/XML/Light/Proc.hs create mode 100644 src/Text/Pandoc/XML/Light/Types.hs (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index 22aebd55e..d27520ba0 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -688,6 +688,9 @@ library Text.Pandoc.Lua.Util, Text.Pandoc.Lua.Walk, Text.Pandoc.XML.Light, + Text.Pandoc.XML.Light.Types, + Text.Pandoc.XML.Light.Proc, + Text.Pandoc.XML.Light.Output, Text.Pandoc.CSS, Text.Pandoc.CSV, Text.Pandoc.RoffChar, diff --git a/src/Text/Pandoc/XML/Light.hs b/src/Text/Pandoc/XML/Light.hs index 38e4df218..07113ea92 100644 --- a/src/Text/Pandoc/XML/Light.hs +++ b/src/Text/Pandoc/XML/Light.hs @@ -1,5 +1,4 @@ {-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.XML.Light @@ -31,59 +30,9 @@ better performance and accuracy without much change in the code that used xml-light. -} module Text.Pandoc.XML.Light - ( -- * Basic types, duplicating those from xml-light but with Text - -- instead of String - Line - , Content(..) - , Element(..) - , Attr(..) - , CData(..) - , CDataKind(..) - , QName(..) - , Node(..) - , unode - , unqual - , add_attr - , add_attrs - -- * Conversion functions from xml-light types - , fromXLQName - , fromXLCData - , fromXLElement - , fromXLAttr - , fromXLContent - -- * Replacement for xml-light's Text.XML.Proc - , strContent - , onlyElems - , elChildren - , onlyText - , findChildren - , filterChildren - , filterChildrenName - , findChild - , filterChild - , filterChildName - , findElement - , filterElement - , filterElementName - , findElements - , filterElements - , filterElementsName - , findAttr - , lookupAttr - , lookupAttrBy - , findAttrBy - -- * Replacement for xml-light's Text.XML.Output - , ppTopElement - , ppElement - , ppContent - , ppcElement - , ppcContent - , showTopElement - , showElement - , showContent - , useShortEmptyTags - , defaultConfigPP - , ConfigPP(..) + ( module Text.Pandoc.XML.Light.Types + , module Text.Pandoc.XML.Light.Proc + , module Text.Pandoc.XML.Light.Output -- * Replacement for xml-light's Text.XML.Input , parseXMLElement , parseXMLContents @@ -92,16 +41,13 @@ module Text.Pandoc.XML.Light import qualified Control.Exception as E import qualified Text.XML as Conduit import Text.XML.Unresolved (InvalidEventStream(..)) -import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Lazy as TL -import Data.Text.Lazy.Builder (Builder, singleton, fromText, toLazyText) import qualified Data.Map as M -import Data.Data (Data) -import Data.Typeable (Typeable) -import Data.Maybe (mapMaybe, listToMaybe) -import Data.List(find) -import qualified Text.XML.Light as XL +import Data.Maybe (mapMaybe) +import Text.Pandoc.XML.Light.Types +import Text.Pandoc.XML.Light.Proc +import Text.Pandoc.XML.Light.Output -- Drop in replacement for parseXMLDoc in xml-light. parseXMLElement :: TL.Text -> Either T.Text Element @@ -141,446 +87,3 @@ nodeToContent (Conduit.NodeContent t) = Just (Text (CData CDataText t Nothing)) nodeToContent _ = Nothing -unqual :: Text -> QName -unqual x = QName x Nothing Nothing - --- | Add an attribute to an element. -add_attr :: Attr -> Element -> Element -add_attr a e = add_attrs [a] e - --- | Add some attributes to an element. -add_attrs :: [Attr] -> Element -> Element -add_attrs as e = e { elAttribs = as ++ elAttribs e } - --- --- type definitions lightly modified from xml-light --- - --- | A line is an Integer -type Line = Integer - --- | XML content -data Content = Elem Element - | Text CData - | CRef Text - deriving (Show, Typeable, Data) - --- | XML elements -data Element = Element { - elName :: QName, - elAttribs :: [Attr], - elContent :: [Content], - elLine :: Maybe Line - } deriving (Show, Typeable, Data) - --- | XML attributes -data Attr = Attr { - attrKey :: QName, - attrVal :: Text - } deriving (Eq, Ord, Show, Typeable, Data) - --- | XML CData -data CData = CData { - cdVerbatim :: CDataKind, - cdData :: Text, - cdLine :: Maybe Line - } deriving (Show, Typeable, Data) - -data CDataKind - = CDataText -- ^ Ordinary character data; pretty printer escapes &, < etc. - | CDataVerbatim -- ^ Unescaped character data; pretty printer embeds it in <![CDATA[.. - | CDataRaw -- ^ As-is character data; pretty printer passes it along without any escaping or CDATA wrap-up. - deriving ( Eq, Show, Typeable, Data ) - --- | XML qualified names -data QName = QName { - qName :: Text, - qURI :: Maybe Text, - qPrefix :: Maybe Text - } deriving (Show, Typeable, Data) - - -instance Eq QName where - q1 == q2 = compare q1 q2 == EQ - -instance Ord QName where - compare q1 q2 = - case compare (qName q1) (qName q2) of - EQ -> case (qURI q1, qURI q2) of - (Nothing,Nothing) -> compare (qPrefix q1) (qPrefix q2) - (u1,u2) -> compare u1 u2 - x -> x - -class Node t where - node :: QName -> t -> Element - -instance Node ([Attr],[Content]) where - node n (attrs,cont) = Element { elName = n - , elAttribs = attrs - , elContent = cont - , elLine = Nothing - } - -instance Node [Attr] where node n as = node n (as,[]::[Content]) -instance Node Attr where node n a = node n [a] -instance Node () where node n () = node n ([]::[Attr]) - -instance Node [Content] where node n cs = node n ([]::[Attr],cs) -instance Node Content where node n c = node n [c] -instance Node ([Attr],Content) where node n (as,c) = node n (as,[c]) -instance Node (Attr,Content) where node n (a,c) = node n ([a],[c]) - -instance Node ([Attr],[Element]) where - node n (as,cs) = node n (as,map Elem cs) - -instance Node ([Attr],Element) where node n (as,c) = node n (as,[c]) -instance Node (Attr,Element) where node n (a,c) = node n ([a],c) -instance Node [Element] where node n es = node n ([]::[Attr],es) -instance Node Element where node n e = node n [e] - -instance Node ([Attr],[CData]) where - node n (as,cs) = node n (as,map Text cs) - -instance Node ([Attr],CData) where node n (as,c) = node n (as,[c]) -instance Node (Attr,CData) where node n (a,c) = node n ([a],c) -instance Node [CData] where node n es = node n ([]::[Attr],es) -instance Node CData where node n e = node n [e] - -instance Node ([Attr],Text) where - node n (as,t) = node n (as, CData { cdVerbatim = CDataText - , cdData = t - , cdLine = Nothing }) - -instance Node (Attr,Text ) where node n (a,t) = node n ([a],t) -instance Node Text where node n t = node n ([]::[Attr],t) - --- | Create node with unqualified name -unode :: Node t => Text -> t -> Element -unode = node . unqual - --- --- conversion from xml-light --- - -fromXLQName :: XL.QName -> QName -fromXLQName qn = QName { qName = T.pack $ XL.qName qn - , qURI = T.pack <$> XL.qURI qn - , qPrefix = T.pack <$> XL.qPrefix qn } - -fromXLCData :: XL.CData -> CData -fromXLCData cd = CData { cdVerbatim = case XL.cdVerbatim cd of - XL.CDataText -> CDataText - XL.CDataVerbatim -> CDataVerbatim - XL.CDataRaw -> CDataRaw - , cdData = T.pack $ XL.cdData cd - , cdLine = XL.cdLine cd } - -fromXLElement :: XL.Element -> Element -fromXLElement el = Element { elName = fromXLQName $ XL.elName el - , elAttribs = map fromXLAttr $ XL.elAttribs el - , elContent = map fromXLContent $ XL.elContent el - , elLine = XL.elLine el } - -fromXLAttr :: XL.Attr -> Attr -fromXLAttr (XL.Attr qn s) = Attr (fromXLQName qn) (T.pack s) - -fromXLContent :: XL.Content -> Content -fromXLContent (XL.Elem el) = Elem $ fromXLElement el -fromXLContent (XL.Text cd) = Text $ fromXLCData cd -fromXLContent (XL.CRef s) = CRef (T.pack s) - --- --- copied from xml-light Text.XML.Proc --- - --- | Get the text value of an XML element. This function --- ignores non-text elements, and concatenates all text elements. -strContent :: Element -> Text -strContent = mconcat . map cdData . onlyText . elContent - --- | Select only the elements from a list of XML content. -onlyElems :: [Content] -> [Element] -onlyElems xs = [ x | Elem x <- xs ] - --- | Select only the elements from a parent. -elChildren :: Element -> [Element] -elChildren e = [ x | Elem x <- elContent e ] - --- | Select only the text from a list of XML content. -onlyText :: [Content] -> [CData] -onlyText xs = [ x | Text x <- xs ] - --- | Find all immediate children with the given name. -findChildren :: QName -> Element -> [Element] -findChildren q e = filterChildren ((q ==) . elName) e - --- | Filter all immediate children wrt a given predicate. -filterChildren :: (Element -> Bool) -> Element -> [Element] -filterChildren p e = filter p (onlyElems (elContent e)) - - --- | Filter all immediate children wrt a given predicate over their names. -filterChildrenName :: (QName -> Bool) -> Element -> [Element] -filterChildrenName p e = filter (p.elName) (onlyElems (elContent e)) - - --- | Find an immediate child with the given name. -findChild :: QName -> Element -> Maybe Element -findChild q e = listToMaybe (findChildren q e) - --- | Find an immediate child with the given name. -filterChild :: (Element -> Bool) -> Element -> Maybe Element -filterChild p e = listToMaybe (filterChildren p e) - --- | Find an immediate child with name matching a predicate. -filterChildName :: (QName -> Bool) -> Element -> Maybe Element -filterChildName p e = listToMaybe (filterChildrenName p e) - --- | Find the left-most occurrence of an element matching given name. -findElement :: QName -> Element -> Maybe Element -findElement q e = listToMaybe (findElements q e) - --- | Filter the left-most occurrence of an element wrt. given predicate. -filterElement :: (Element -> Bool) -> Element -> Maybe Element -filterElement p e = listToMaybe (filterElements p e) - --- | Filter the left-most occurrence of an element wrt. given predicate. -filterElementName :: (QName -> Bool) -> Element -> Maybe Element -filterElementName p e = listToMaybe (filterElementsName p e) - --- | Find all non-nested occurances of an element. --- (i.e., once we have found an element, we do not search --- for more occurances among the element's children). -findElements :: QName -> Element -> [Element] -findElements qn e = filterElementsName (qn==) e - --- | Find all non-nested occurrences of an element wrt. given predicate. --- (i.e., once we have found an element, we do not search --- for more occurances among the element's children). -filterElements :: (Element -> Bool) -> Element -> [Element] -filterElements p e - | p e = [e] - | otherwise = concatMap (filterElements p) $ onlyElems $ elContent e - --- | Find all non-nested occurences of an element wrt a predicate over element names. --- (i.e., once we have found an element, we do not search --- for more occurances among the element's children). -filterElementsName :: (QName -> Bool) -> Element -> [Element] -filterElementsName p e = filterElements (p.elName) e - --- | Lookup the value of an attribute. -findAttr :: QName -> Element -> Maybe Text -findAttr x e = lookupAttr x (elAttribs e) - --- | Lookup attribute name from list. -lookupAttr :: QName -> [Attr] -> Maybe Text -lookupAttr x = lookupAttrBy (x ==) - --- | Lookup the first attribute whose name satisfies the given predicate. -lookupAttrBy :: (QName -> Bool) -> [Attr] -> Maybe Text -lookupAttrBy p as = attrVal `fmap` find (p . attrKey) as - --- | Lookup the value of the first attribute whose name --- satisfies the given predicate. -findAttrBy :: (QName -> Bool) -> Element -> Maybe Text -findAttrBy p e = lookupAttrBy p (elAttribs e) - - --- --- duplicates functinos from Text.XML.Output --- - --- | The XML 1.0 header -xmlHeader :: Text -xmlHeader = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" - - --------------------------------------------------------------------------------- -data ConfigPP = ConfigPP - { shortEmptyTag :: QName -> Bool - , prettify :: Bool - } - --- | Default pretty orinting configuration. --- * Always use abbreviate empty tags. -defaultConfigPP :: ConfigPP -defaultConfigPP = ConfigPP { shortEmptyTag = const True - , prettify = False - } - --- | The predicate specifies for which empty tags we should use XML's --- abbreviated notation <TAG />. This is useful if we are working with --- some XML-ish standards (such as certain versions of HTML) where some --- empty tags should always be displayed in the <TAG></TAG> form. -useShortEmptyTags :: (QName -> Bool) -> ConfigPP -> ConfigPP -useShortEmptyTags p c = c { shortEmptyTag = p } - - --- | Specify if we should use extra white-space to make document more readable. --- WARNING: This adds additional white-space to text elements, --- and so it may change the meaning of the document. -useExtraWhiteSpace :: Bool -> ConfigPP -> ConfigPP -useExtraWhiteSpace p c = c { prettify = p } - --- | A configuration that tries to make things pretty --- (possibly at the cost of changing the semantics a bit --- through adding white space.) -prettyConfigPP :: ConfigPP -prettyConfigPP = useExtraWhiteSpace True defaultConfigPP - - --------------------------------------------------------------------------------- - - --- | Pretty printing renders XML documents faithfully, --- with the exception that whitespace may be added\/removed --- in non-verbatim character data. -ppTopElement :: Element -> Text -ppTopElement = ppcTopElement prettyConfigPP - --- | Pretty printing elements -ppElement :: Element -> Text -ppElement = ppcElement prettyConfigPP - --- | Pretty printing content -ppContent :: Content -> Text -ppContent = ppcContent prettyConfigPP - --- | Pretty printing renders XML documents faithfully, --- with the exception that whitespace may be added\/removed --- in non-verbatim character data. -ppcTopElement :: ConfigPP -> Element -> Text -ppcTopElement c e = T.unlines [xmlHeader,ppcElement c e] - --- | Pretty printing elements -ppcElement :: ConfigPP -> Element -> Text -ppcElement c = TL.toStrict . toLazyText . ppElementS c mempty - --- | Pretty printing content -ppcContent :: ConfigPP -> Content -> Text -ppcContent c = TL.toStrict . toLazyText . ppContentS c mempty - -ppcCData :: ConfigPP -> CData -> Text -ppcCData c = TL.toStrict . toLazyText . ppCDataS c mempty - -type Indent = Builder - --- | Pretty printing content using ShowT -ppContentS :: ConfigPP -> Indent -> Content -> Builder -ppContentS c i x = case x of - Elem e -> ppElementS c i e - Text t -> ppCDataS c i t - CRef r -> showCRefS r - -ppElementS :: ConfigPP -> Indent -> Element -> Builder -ppElementS c i e = i <> tagStart (elName e) (elAttribs e) <> - (case elContent e of - [] | "?" `T.isPrefixOf` qName name -> fromText " ?>" - | shortEmptyTag c name -> fromText " />" - [Text t] -> singleton '>' <> ppCDataS c mempty t <> tagEnd name - cs -> singleton '>' <> nl <> - mconcat (map ((<> nl) . ppContentS c (sp <> i)) cs) <> - i <> tagEnd name - where (nl,sp) = if prettify c then ("\n"," ") else ("","") - ) - where name = elName e - -ppCDataS :: ConfigPP -> Indent -> CData -> Builder -ppCDataS c i t = i <> if cdVerbatim t /= CDataText || not (prettify c) - then showCDataS t - else foldr cons mempty (T.unpack (showCData t)) - where cons :: Char -> Builder -> Builder - cons '\n' ys = singleton '\n' <> i <> ys - cons y ys = singleton y <> ys - - - --------------------------------------------------------------------------------- - --- | Adds the <?xml?> header. -showTopElement :: Element -> Text -showTopElement c = xmlHeader <> showElement c - -showContent :: Content -> Text -showContent = ppcContent defaultConfigPP - -showElement :: Element -> Text -showElement = ppcElement defaultConfigPP - -showCData :: CData -> Text -showCData = ppcCData defaultConfigPP - --- Note: crefs should not contain '&', ';', etc. -showCRefS :: Text -> Builder -showCRefS r = singleton '&' <> fromText r <> singleton ';' - --- | Convert a text element to characters. -showCDataS :: CData -> Builder -showCDataS cd = - case cdVerbatim cd of - CDataText -> escStr (cdData cd) - CDataVerbatim -> fromText "<![CDATA[" <> escCData (cdData cd) <> - fromText "]]>" - CDataRaw -> fromText (cdData cd) - --------------------------------------------------------------------------------- -escCData :: Text -> Builder -escCData t - | "]]>" `T.isPrefixOf` t = - fromText "]]]]><![CDATA[>" <> fromText (T.drop 3 t) -escCData t - = case T.uncons t of - Nothing -> mempty - Just (c,t') -> singleton c <> escCData t' - -escChar :: Char -> Builder -escChar c = case c of - '<' -> fromText "<" - '>' -> fromText ">" - '&' -> fromText "&" - '"' -> fromText """ - -- we use ' instead of ' because IE apparently has difficulties - -- rendering ' in xhtml. - -- Reported by Rohan Drape <rohan.drape@gmail.com>. - '\'' -> fromText "'" - _ -> singleton c - - {- original xml-light version: - -- NOTE: We escape '\r' explicitly because otherwise they get lost - -- when parsed back in because of then end-of-line normalization rules. - _ | isPrint c || c == '\n' -> singleton c - | otherwise -> showText "&#" . showsT oc . singleton ';' - where oc = ord c - -} - -escStr :: Text -> Builder -escStr cs = if T.any needsEscape cs - then mconcat (map escChar (T.unpack cs)) - else fromText cs - where - needsEscape '<' = True - needsEscape '>' = True - needsEscape '&' = True - needsEscape '"' = True - needsEscape '\'' = True - needsEscape _ = False - -tagEnd :: QName -> Builder -tagEnd qn = fromText "</" <> showQName qn <> singleton '>' - -tagStart :: QName -> [Attr] -> Builder -tagStart qn as = singleton '<' <> showQName qn <> as_str - where as_str = if null as - then mempty - else mconcat (map showAttr as) - -showAttr :: Attr -> Builder -showAttr (Attr qn v) = singleton ' ' <> showQName qn <> - singleton '=' <> - singleton '"' <> escStr v <> singleton '"' - -showQName :: QName -> Builder -showQName q = - case qPrefix q of - Nothing -> fromText (qName q) - Just p -> fromText p <> singleton ':' <> fromText (qName q) diff --git a/src/Text/Pandoc/XML/Light/Output.hs b/src/Text/Pandoc/XML/Light/Output.hs new file mode 100644 index 000000000..dc94ce914 --- /dev/null +++ b/src/Text/Pandoc/XML/Light/Output.hs @@ -0,0 +1,230 @@ +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.XML.Light.Output + Copyright : Copyright (C) 2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable +-} +module Text.Pandoc.XML.Light.Output + ( -- * Replacement for xml-light's Text.XML.Output + ppTopElement + , ppElement + , ppContent + , ppcElement + , ppcContent + , showTopElement + , showElement + , showContent + , useShortEmptyTags + , defaultConfigPP + , ConfigPP(..) + ) where + +import Data.Text (Text) +import qualified Data.Text as T +import qualified Data.Text.Lazy as TL +import Data.Text.Lazy.Builder (Builder, singleton, fromText, toLazyText) +import Text.Pandoc.XML.Light.Types + +-- +-- duplicates functinos from Text.XML.Output +-- + +-- | The XML 1.0 header +xmlHeader :: Text +xmlHeader = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + + +-------------------------------------------------------------------------------- +data ConfigPP = ConfigPP + { shortEmptyTag :: QName -> Bool + , prettify :: Bool + } + +-- | Default pretty orinting configuration. +-- * Always use abbreviate empty tags. +defaultConfigPP :: ConfigPP +defaultConfigPP = ConfigPP { shortEmptyTag = const True + , prettify = False + } + +-- | The predicate specifies for which empty tags we should use XML's +-- abbreviated notation <TAG />. This is useful if we are working with +-- some XML-ish standards (such as certain versions of HTML) where some +-- empty tags should always be displayed in the <TAG></TAG> form. +useShortEmptyTags :: (QName -> Bool) -> ConfigPP -> ConfigPP +useShortEmptyTags p c = c { shortEmptyTag = p } + + +-- | Specify if we should use extra white-space to make document more readable. +-- WARNING: This adds additional white-space to text elements, +-- and so it may change the meaning of the document. +useExtraWhiteSpace :: Bool -> ConfigPP -> ConfigPP +useExtraWhiteSpace p c = c { prettify = p } + +-- | A configuration that tries to make things pretty +-- (possibly at the cost of changing the semantics a bit +-- through adding white space.) +prettyConfigPP :: ConfigPP +prettyConfigPP = useExtraWhiteSpace True defaultConfigPP + + +-------------------------------------------------------------------------------- + + +-- | Pretty printing renders XML documents faithfully, +-- with the exception that whitespace may be added\/removed +-- in non-verbatim character data. +ppTopElement :: Element -> Text +ppTopElement = ppcTopElement prettyConfigPP + +-- | Pretty printing elements +ppElement :: Element -> Text +ppElement = ppcElement prettyConfigPP + +-- | Pretty printing content +ppContent :: Content -> Text +ppContent = ppcContent prettyConfigPP + +-- | Pretty printing renders XML documents faithfully, +-- with the exception that whitespace may be added\/removed +-- in non-verbatim character data. +ppcTopElement :: ConfigPP -> Element -> Text +ppcTopElement c e = T.unlines [xmlHeader,ppcElement c e] + +-- | Pretty printing elements +ppcElement :: ConfigPP -> Element -> Text +ppcElement c = TL.toStrict . toLazyText . ppElementS c mempty + +-- | Pretty printing content +ppcContent :: ConfigPP -> Content -> Text +ppcContent c = TL.toStrict . toLazyText . ppContentS c mempty + +ppcCData :: ConfigPP -> CData -> Text +ppcCData c = TL.toStrict . toLazyText . ppCDataS c mempty + +type Indent = Builder + +-- | Pretty printing content using ShowT +ppContentS :: ConfigPP -> Indent -> Content -> Builder +ppContentS c i x = case x of + Elem e -> ppElementS c i e + Text t -> ppCDataS c i t + CRef r -> showCRefS r + +ppElementS :: ConfigPP -> Indent -> Element -> Builder +ppElementS c i e = i <> tagStart (elName e) (elAttribs e) <> + (case elContent e of + [] | "?" `T.isPrefixOf` qName name -> fromText " ?>" + | shortEmptyTag c name -> fromText " />" + [Text t] -> singleton '>' <> ppCDataS c mempty t <> tagEnd name + cs -> singleton '>' <> nl <> + mconcat (map ((<> nl) . ppContentS c (sp <> i)) cs) <> + i <> tagEnd name + where (nl,sp) = if prettify c then ("\n"," ") else ("","") + ) + where name = elName e + +ppCDataS :: ConfigPP -> Indent -> CData -> Builder +ppCDataS c i t = i <> if cdVerbatim t /= CDataText || not (prettify c) + then showCDataS t + else foldr cons mempty (T.unpack (showCData t)) + where cons :: Char -> Builder -> Builder + cons '\n' ys = singleton '\n' <> i <> ys + cons y ys = singleton y <> ys + + + +-------------------------------------------------------------------------------- + +-- | Adds the <?xml?> header. +showTopElement :: Element -> Text +showTopElement c = xmlHeader <> showElement c + +showContent :: Content -> Text +showContent = ppcContent defaultConfigPP + +showElement :: Element -> Text +showElement = ppcElement defaultConfigPP + +showCData :: CData -> Text +showCData = ppcCData defaultConfigPP + +-- Note: crefs should not contain '&', ';', etc. +showCRefS :: Text -> Builder +showCRefS r = singleton '&' <> fromText r <> singleton ';' + +-- | Convert a text element to characters. +showCDataS :: CData -> Builder +showCDataS cd = + case cdVerbatim cd of + CDataText -> escStr (cdData cd) + CDataVerbatim -> fromText "<![CDATA[" <> escCData (cdData cd) <> + fromText "]]>" + CDataRaw -> fromText (cdData cd) + +-------------------------------------------------------------------------------- +escCData :: Text -> Builder +escCData t + | "]]>" `T.isPrefixOf` t = + fromText "]]]]><![CDATA[>" <> fromText (T.drop 3 t) +escCData t + = case T.uncons t of + Nothing -> mempty + Just (c,t') -> singleton c <> escCData t' + +escChar :: Char -> Builder +escChar c = case c of + '<' -> fromText "<" + '>' -> fromText ">" + '&' -> fromText "&" + '"' -> fromText """ + -- we use ' instead of ' because IE apparently has difficulties + -- rendering ' in xhtml. + -- Reported by Rohan Drape <rohan.drape@gmail.com>. + '\'' -> fromText "'" + _ -> singleton c + + {- original xml-light version: + -- NOTE: We escape '\r' explicitly because otherwise they get lost + -- when parsed back in because of then end-of-line normalization rules. + _ | isPrint c || c == '\n' -> singleton c + | otherwise -> showText "&#" . showsT oc . singleton ';' + where oc = ord c + -} + +escStr :: Text -> Builder +escStr cs = if T.any needsEscape cs + then mconcat (map escChar (T.unpack cs)) + else fromText cs + where + needsEscape '<' = True + needsEscape '>' = True + needsEscape '&' = True + needsEscape '"' = True + needsEscape '\'' = True + needsEscape _ = False + +tagEnd :: QName -> Builder +tagEnd qn = fromText "</" <> showQName qn <> singleton '>' + +tagStart :: QName -> [Attr] -> Builder +tagStart qn as = singleton '<' <> showQName qn <> as_str + where as_str = if null as + then mempty + else mconcat (map showAttr as) + +showAttr :: Attr -> Builder +showAttr (Attr qn v) = singleton ' ' <> showQName qn <> + singleton '=' <> + singleton '"' <> escStr v <> singleton '"' + +showQName :: QName -> Builder +showQName q = + case qPrefix q of + Nothing -> fromText (qName q) + Just p -> fromText p <> singleton ':' <> fromText (qName q) diff --git a/src/Text/Pandoc/XML/Light/Proc.hs b/src/Text/Pandoc/XML/Light/Proc.hs new file mode 100644 index 000000000..838d5af74 --- /dev/null +++ b/src/Text/Pandoc/XML/Light/Proc.hs @@ -0,0 +1,138 @@ +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.XML.Light.Proc + Copyright : Copyright (C) 2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable +-} +module Text.Pandoc.XML.Light.Proc + ( + -- * Replacement for xml-light's Text.XML.Proc + strContent + , onlyElems + , elChildren + , onlyText + , findChildren + , filterChildren + , filterChildrenName + , findChild + , filterChild + , filterChildName + , findElement + , filterElement + , filterElementName + , findElements + , filterElements + , filterElementsName + , findAttr + , lookupAttr + , lookupAttrBy + , findAttrBy + ) where + +import Data.Text (Text) +import Data.Maybe (listToMaybe) +import Data.List(find) +import Text.Pandoc.XML.Light.Types + +-- +-- copied from xml-light Text.XML.Proc +-- + +-- | Get the text value of an XML element. This function +-- ignores non-text elements, and concatenates all text elements. +strContent :: Element -> Text +strContent = mconcat . map cdData . onlyText . elContent + +-- | Select only the elements from a list of XML content. +onlyElems :: [Content] -> [Element] +onlyElems xs = [ x | Elem x <- xs ] + +-- | Select only the elements from a parent. +elChildren :: Element -> [Element] +elChildren e = [ x | Elem x <- elContent e ] + +-- | Select only the text from a list of XML content. +onlyText :: [Content] -> [CData] +onlyText xs = [ x | Text x <- xs ] + +-- | Find all immediate children with the given name. +findChildren :: QName -> Element -> [Element] +findChildren q e = filterChildren ((q ==) . elName) e + +-- | Filter all immediate children wrt a given predicate. +filterChildren :: (Element -> Bool) -> Element -> [Element] +filterChildren p e = filter p (onlyElems (elContent e)) + + +-- | Filter all immediate children wrt a given predicate over their names. +filterChildrenName :: (QName -> Bool) -> Element -> [Element] +filterChildrenName p e = filter (p.elName) (onlyElems (elContent e)) + + +-- | Find an immediate child with the given name. +findChild :: QName -> Element -> Maybe Element +findChild q e = listToMaybe (findChildren q e) + +-- | Find an immediate child with the given name. +filterChild :: (Element -> Bool) -> Element -> Maybe Element +filterChild p e = listToMaybe (filterChildren p e) + +-- | Find an immediate child with name matching a predicate. +filterChildName :: (QName -> Bool) -> Element -> Maybe Element +filterChildName p e = listToMaybe (filterChildrenName p e) + +-- | Find the left-most occurrence of an element matching given name. +findElement :: QName -> Element -> Maybe Element +findElement q e = listToMaybe (findElements q e) + +-- | Filter the left-most occurrence of an element wrt. given predicate. +filterElement :: (Element -> Bool) -> Element -> Maybe Element +filterElement p e = listToMaybe (filterElements p e) + +-- | Filter the left-most occurrence of an element wrt. given predicate. +filterElementName :: (QName -> Bool) -> Element -> Maybe Element +filterElementName p e = listToMaybe (filterElementsName p e) + +-- | Find all non-nested occurances of an element. +-- (i.e., once we have found an element, we do not search +-- for more occurances among the element's children). +findElements :: QName -> Element -> [Element] +findElements qn e = filterElementsName (qn==) e + +-- | Find all non-nested occurrences of an element wrt. given predicate. +-- (i.e., once we have found an element, we do not search +-- for more occurances among the element's children). +filterElements :: (Element -> Bool) -> Element -> [Element] +filterElements p e + | p e = [e] + | otherwise = concatMap (filterElements p) $ onlyElems $ elContent e + +-- | Find all non-nested occurences of an element wrt a predicate over element names. +-- (i.e., once we have found an element, we do not search +-- for more occurances among the element's children). +filterElementsName :: (QName -> Bool) -> Element -> [Element] +filterElementsName p e = filterElements (p.elName) e + +-- | Lookup the value of an attribute. +findAttr :: QName -> Element -> Maybe Text +findAttr x e = lookupAttr x (elAttribs e) + +-- | Lookup attribute name from list. +lookupAttr :: QName -> [Attr] -> Maybe Text +lookupAttr x = lookupAttrBy (x ==) + +-- | Lookup the first attribute whose name satisfies the given predicate. +lookupAttrBy :: (QName -> Bool) -> [Attr] -> Maybe Text +lookupAttrBy p as = attrVal `fmap` find (p . attrKey) as + +-- | Lookup the value of the first attribute whose name +-- satisfies the given predicate. +findAttrBy :: (QName -> Bool) -> Element -> Maybe Text +findAttrBy p e = lookupAttrBy p (elAttribs e) + + diff --git a/src/Text/Pandoc/XML/Light/Types.hs b/src/Text/Pandoc/XML/Light/Types.hs new file mode 100644 index 000000000..f338da6ea --- /dev/null +++ b/src/Text/Pandoc/XML/Light/Types.hs @@ -0,0 +1,190 @@ +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE DeriveDataTypeable #-} +{- | + Module : Text.Pandoc.XML.Light.Types + Copyright : Copyright (C) 2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +-} +module Text.Pandoc.XML.Light.Types + ( -- * Basic types, duplicating those from xml-light but with Text + -- instead of String + Line + , Content(..) + , Element(..) + , Attr(..) + , CData(..) + , CDataKind(..) + , QName(..) + , Node(..) + , unode + , unqual + , add_attr + , add_attrs + -- * Conversion functions from xml-light types + , fromXLQName + , fromXLCData + , fromXLElement + , fromXLAttr + , fromXLContent + ) where + +import Data.Text (Text) +import qualified Data.Text as T +import Data.Data (Data) +import Data.Typeable (Typeable) +import qualified Text.XML.Light as XL + +-- +-- type definitions lightly modified from xml-light +-- + +-- | A line is an Integer +type Line = Integer + +-- | XML content +data Content = Elem Element + | Text CData + | CRef Text + deriving (Show, Typeable, Data) + +-- | XML elements +data Element = Element { + elName :: QName, + elAttribs :: [Attr], + elContent :: [Content], + elLine :: Maybe Line + } deriving (Show, Typeable, Data) + +-- | XML attributes +data Attr = Attr { + attrKey :: QName, + attrVal :: Text + } deriving (Eq, Ord, Show, Typeable, Data) + +-- | XML CData +data CData = CData { + cdVerbatim :: CDataKind, + cdData :: Text, + cdLine :: Maybe Line + } deriving (Show, Typeable, Data) + +data CDataKind + = CDataText -- ^ Ordinary character data; pretty printer escapes &, < etc. + | CDataVerbatim -- ^ Unescaped character data; pretty printer embeds it in <![CDATA[.. + | CDataRaw -- ^ As-is character data; pretty printer passes it along without any escaping or CDATA wrap-up. + deriving ( Eq, Show, Typeable, Data ) + +-- | XML qualified names +data QName = QName { + qName :: Text, + qURI :: Maybe Text, + qPrefix :: Maybe Text + } deriving (Show, Typeable, Data) + + +instance Eq QName where + q1 == q2 = compare q1 q2 == EQ + +instance Ord QName where + compare q1 q2 = + case compare (qName q1) (qName q2) of + EQ -> case (qURI q1, qURI q2) of + (Nothing,Nothing) -> compare (qPrefix q1) (qPrefix q2) + (u1,u2) -> compare u1 u2 + x -> x + +class Node t where + node :: QName -> t -> Element + +instance Node ([Attr],[Content]) where + node n (attrs,cont) = Element { elName = n + , elAttribs = attrs + , elContent = cont + , elLine = Nothing + } + +instance Node [Attr] where node n as = node n (as,[]::[Content]) +instance Node Attr where node n a = node n [a] +instance Node () where node n () = node n ([]::[Attr]) + +instance Node [Content] where node n cs = node n ([]::[Attr],cs) +instance Node Content where node n c = node n [c] +instance Node ([Attr],Content) where node n (as,c) = node n (as,[c]) +instance Node (Attr,Content) where node n (a,c) = node n ([a],[c]) + +instance Node ([Attr],[Element]) where + node n (as,cs) = node n (as,map Elem cs) + +instance Node ([Attr],Element) where node n (as,c) = node n (as,[c]) +instance Node (Attr,Element) where node n (a,c) = node n ([a],c) +instance Node [Element] where node n es = node n ([]::[Attr],es) +instance Node Element where node n e = node n [e] + +instance Node ([Attr],[CData]) where + node n (as,cs) = node n (as,map Text cs) + +instance Node ([Attr],CData) where node n (as,c) = node n (as,[c]) +instance Node (Attr,CData) where node n (a,c) = node n ([a],c) +instance Node [CData] where node n es = node n ([]::[Attr],es) +instance Node CData where node n e = node n [e] + +instance Node ([Attr],Text) where + node n (as,t) = node n (as, CData { cdVerbatim = CDataText + , cdData = t + , cdLine = Nothing }) + +instance Node (Attr,Text ) where node n (a,t) = node n ([a],t) +instance Node Text where node n t = node n ([]::[Attr],t) + +-- | Create node with unqualified name +unode :: Node t => Text -> t -> Element +unode = node . unqual + +unqual :: Text -> QName +unqual x = QName x Nothing Nothing + +-- | Add an attribute to an element. +add_attr :: Attr -> Element -> Element +add_attr a e = add_attrs [a] e + +-- | Add some attributes to an element. +add_attrs :: [Attr] -> Element -> Element +add_attrs as e = e { elAttribs = as ++ elAttribs e } + +-- +-- conversion from xml-light +-- + +fromXLQName :: XL.QName -> QName +fromXLQName qn = QName { qName = T.pack $ XL.qName qn + , qURI = T.pack <$> XL.qURI qn + , qPrefix = T.pack <$> XL.qPrefix qn } + +fromXLCData :: XL.CData -> CData +fromXLCData cd = CData { cdVerbatim = case XL.cdVerbatim cd of + XL.CDataText -> CDataText + XL.CDataVerbatim -> CDataVerbatim + XL.CDataRaw -> CDataRaw + , cdData = T.pack $ XL.cdData cd + , cdLine = XL.cdLine cd } + +fromXLElement :: XL.Element -> Element +fromXLElement el = Element { elName = fromXLQName $ XL.elName el + , elAttribs = map fromXLAttr $ XL.elAttribs el + , elContent = map fromXLContent $ XL.elContent el + , elLine = XL.elLine el } + +fromXLAttr :: XL.Attr -> Attr +fromXLAttr (XL.Attr qn s) = Attr (fromXLQName qn) (T.pack s) + +fromXLContent :: XL.Content -> Content +fromXLContent (XL.Elem el) = Elem $ fromXLElement el +fromXLContent (XL.Text cd) = Text $ fromXLCData cd +fromXLContent (XL.CRef s) = CRef (T.pack s) + + -- cgit v1.2.3 From 4af378702ae31d4c8a11d0c827a5986f54b5e310 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 16 Feb 2021 18:44:38 -0800 Subject: Add orig copyright/license info for code derived from xml-light. --- src/Text/Pandoc/XML/Light/Output.hs | 6 +++++- src/Text/Pandoc/XML/Light/Proc.hs | 4 +++- src/Text/Pandoc/XML/Light/Types.hs | 5 ++++- 3 files changed, 12 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/XML/Light/Output.hs b/src/Text/Pandoc/XML/Light/Output.hs index dc94ce914..8182ef2ec 100644 --- a/src/Text/Pandoc/XML/Light/Output.hs +++ b/src/Text/Pandoc/XML/Light/Output.hs @@ -2,12 +2,16 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.XML.Light.Output - Copyright : Copyright (C) 2021 John MacFarlane + Copyright : Copyright (C) 2007 Galois, Inc., 2021 John MacFarlane License : GNU GPL, version 2 or above + Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha Portability : portable + + This code is based on code from xml-light, released under the BSD3 license. + We use a text Builder instead of ShowS. -} module Text.Pandoc.XML.Light.Output ( -- * Replacement for xml-light's Text.XML.Output diff --git a/src/Text/Pandoc/XML/Light/Proc.hs b/src/Text/Pandoc/XML/Light/Proc.hs index 838d5af74..b53c4b545 100644 --- a/src/Text/Pandoc/XML/Light/Proc.hs +++ b/src/Text/Pandoc/XML/Light/Proc.hs @@ -2,12 +2,14 @@ {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.XML.Light.Proc - Copyright : Copyright (C) 2021 John MacFarlane + Copyright : Copyright (C) 2007 Galois, Inc., 2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha Portability : portable + + This code is taken from xml-light, released under the BSD3 license. -} module Text.Pandoc.XML.Light.Proc ( diff --git a/src/Text/Pandoc/XML/Light/Types.hs b/src/Text/Pandoc/XML/Light/Types.hs index f338da6ea..03fdc2e4d 100644 --- a/src/Text/Pandoc/XML/Light/Types.hs +++ b/src/Text/Pandoc/XML/Light/Types.hs @@ -2,13 +2,16 @@ {-# LANGUAGE DeriveDataTypeable #-} {- | Module : Text.Pandoc.XML.Light.Types - Copyright : Copyright (C) 2021 John MacFarlane + Copyright : Copyright (C) 2007 Galois, Inc., 2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> Stability : alpha Portability : portable + This code is taken from xml-light, released under the BSD3 license. + It has been modified to use Text instead of String, and the fromXL* + functions have been added. -} module Text.Pandoc.XML.Light.Types ( -- * Basic types, duplicating those from xml-light but with Text -- cgit v1.2.3 From d8fc4971868104274881570ce9bc3d9edf0d2506 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 16 Feb 2021 18:51:41 -0800 Subject: Add T.P.XML.Light.Cursor. --- pandoc.cabal | 1 + src/Text/Pandoc/XML/Light/Cursor.hs | 346 ++++++++++++++++++++++++++++++++++++ 2 files changed, 347 insertions(+) create mode 100644 src/Text/Pandoc/XML/Light/Cursor.hs (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index d27520ba0..61bf0b51d 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -689,6 +689,7 @@ library Text.Pandoc.Lua.Walk, Text.Pandoc.XML.Light, Text.Pandoc.XML.Light.Types, + Text.Pandoc.XML.Light.Cursor, Text.Pandoc.XML.Light.Proc, Text.Pandoc.XML.Light.Output, Text.Pandoc.CSS, diff --git a/src/Text/Pandoc/XML/Light/Cursor.hs b/src/Text/Pandoc/XML/Light/Cursor.hs new file mode 100644 index 000000000..2e6da5346 --- /dev/null +++ b/src/Text/Pandoc/XML/Light/Cursor.hs @@ -0,0 +1,346 @@ +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.XML.Light.Cursor + Copyright : Copyright (C) 2007 Galois, Inc., 2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + + This code is taken from xml-light, released under the BSD3 license. +-} +module Text.Pandoc.XML.Light.Cursor + ( -- * Replacement for xml-light's Text.XML.Cursor + Tag(..) + , getTag + , setTag + , fromTag + , Cursor(..) + , Path + + -- * Conversions + , fromContent + , fromElement + , fromForest + , toForest + , toTree + + -- * Moving around + , parent + , root + , getChild + , firstChild + , lastChild + , left + , right + , nextDF + + -- ** Searching + , findChild + , findLeft + , findRight + , findRec + + -- * Node classification + , isRoot + , isFirst + , isLast + , isLeaf + , isChild + , hasChildren + , getNodeIndex + + -- * Updates + , setContent + , modifyContent + , modifyContentM + + -- ** Inserting content + , insertLeft + , insertRight + , insertGoLeft + , insertGoRight + + -- ** Removing content + , removeLeft + , removeRight + , removeGoLeft + , removeGoRight + , removeGoUp + + ) where + +import Text.Pandoc.XML.Light.Types +import Data.Maybe(isNothing) +import Control.Monad(mplus) + +data Tag = Tag { tagName :: QName + , tagAttribs :: [Attr] + , tagLine :: Maybe Line + } deriving (Show) + +getTag :: Element -> Tag +getTag e = Tag { tagName = elName e + , tagAttribs = elAttribs e + , tagLine = elLine e + } + +setTag :: Tag -> Element -> Element +setTag t e = fromTag t (elContent e) + +fromTag :: Tag -> [Content] -> Element +fromTag t cs = Element { elName = tagName t + , elAttribs = tagAttribs t + , elLine = tagLine t + , elContent = cs + } + +type Path = [([Content],Tag,[Content])] + +-- | The position of a piece of content in an XML document. +data Cursor = Cur + { current :: Content -- ^ The currently selected content. + , lefts :: [Content] -- ^ Siblings on the left, closest first. + , rights :: [Content] -- ^ Siblings on the right, closest first. + , parents :: Path -- ^ The contexts of the parent elements of this location. + } deriving (Show) + +-- Moving around --------------------------------------------------------------- + +-- | The parent of the given location. +parent :: Cursor -> Maybe Cursor +parent loc = + case parents loc of + (pls,v,prs) : ps -> Just + Cur { current = Elem + (fromTag v + (combChildren (lefts loc) (current loc) (rights loc))) + , lefts = pls, rights = prs, parents = ps + } + [] -> Nothing + + +-- | The top-most parent of the given location. +root :: Cursor -> Cursor +root loc = maybe loc root (parent loc) + +-- | The left sibling of the given location. +left :: Cursor -> Maybe Cursor +left loc = + case lefts loc of + t : ts -> Just loc { current = t, lefts = ts + , rights = current loc : rights loc } + [] -> Nothing + +-- | The right sibling of the given location. +right :: Cursor -> Maybe Cursor +right loc = + case rights loc of + t : ts -> Just loc { current = t, lefts = current loc : lefts loc + , rights = ts } + [] -> Nothing + +-- | The first child of the given location. +firstChild :: Cursor -> Maybe Cursor +firstChild loc = + do (t : ts, ps) <- downParents loc + return Cur { current = t, lefts = [], rights = ts , parents = ps } + +-- | The last child of the given location. +lastChild :: Cursor -> Maybe Cursor +lastChild loc = + do (ts, ps) <- downParents loc + case reverse ts of + l : ls -> return Cur { current = l, lefts = ls, rights = [] + , parents = ps } + [] -> Nothing + +-- | Find the next left sibling that satisfies a predicate. +findLeft :: (Cursor -> Bool) -> Cursor -> Maybe Cursor +findLeft p loc = do loc1 <- left loc + if p loc1 then return loc1 else findLeft p loc1 + +-- | Find the next right sibling that satisfies a predicate. +findRight :: (Cursor -> Bool) -> Cursor -> Maybe Cursor +findRight p loc = do loc1 <- right loc + if p loc1 then return loc1 else findRight p loc1 + +-- | The first child that satisfies a predicate. +findChild :: (Cursor -> Bool) -> Cursor -> Maybe Cursor +findChild p loc = + do loc1 <- firstChild loc + if p loc1 then return loc1 else findRight p loc1 + +-- | The next position in a left-to-right depth-first traversal of a document: +-- either the first child, right sibling, or the right sibling of a parent that +-- has one. +nextDF :: Cursor -> Maybe Cursor +nextDF c = firstChild c `mplus` up c + where up x = right x `mplus` (up =<< parent x) + +-- | Perform a depth first search for a descendant that satisfies the +-- given predicate. +findRec :: (Cursor -> Bool) -> Cursor -> Maybe Cursor +findRec p c = if p c then Just c else findRec p =<< nextDF c + +-- | The child with the given index (starting from 0). +getChild :: Int -> Cursor -> Maybe Cursor +getChild n loc = + do (ts,ps) <- downParents loc + (ls,t,rs) <- splitChildren ts n + return Cur { current = t, lefts = ls, rights = rs, parents = ps } + + +-- | private: computes the parent for "down" operations. +downParents :: Cursor -> Maybe ([Content], Path) +downParents loc = + case current loc of + Elem e -> Just ( elContent e + , (lefts loc, getTag e, rights loc) : parents loc + ) + _ -> Nothing + +-- Conversions ----------------------------------------------------------------- + +-- | A cursor for the given content. +fromContent :: Content -> Cursor +fromContent t = Cur { current = t, lefts = [], rights = [], parents = [] } + +-- | A cursor for the given element. +fromElement :: Element -> Cursor +fromElement e = fromContent (Elem e) + +-- | The location of the first tree in a forest. +fromForest :: [Content] -> Maybe Cursor +fromForest (t:ts) = Just Cur { current = t, lefts = [], rights = ts + , parents = [] } +fromForest [] = Nothing + +-- | Computes the tree containing this location. +toTree :: Cursor -> Content +toTree loc = current (root loc) + +-- | Computes the forest containing this location. +toForest :: Cursor -> [Content] +toForest loc = let r = root loc in combChildren (lefts r) (current r) (rights r) + + +-- Queries --------------------------------------------------------------------- + +-- | Are we at the top of the document? +isRoot :: Cursor -> Bool +isRoot loc = null (parents loc) + +-- | Are we at the left end of the the document? +isFirst :: Cursor -> Bool +isFirst loc = null (lefts loc) + +-- | Are we at the right end of the document? +isLast :: Cursor -> Bool +isLast loc = null (rights loc) + +-- | Are we at the bottom of the document? +isLeaf :: Cursor -> Bool +isLeaf loc = isNothing (downParents loc) + +-- | Do we have a parent? +isChild :: Cursor -> Bool +isChild loc = not (isRoot loc) + +-- | Get the node index inside the sequence of children +getNodeIndex :: Cursor -> Int +getNodeIndex loc = length (lefts loc) + +-- | Do we have children? +hasChildren :: Cursor -> Bool +hasChildren loc = not (isLeaf loc) + + + +-- Updates --------------------------------------------------------------------- + +-- | Change the current content. +setContent :: Content -> Cursor -> Cursor +setContent t loc = loc { current = t } + +-- | Modify the current content. +modifyContent :: (Content -> Content) -> Cursor -> Cursor +modifyContent f loc = setContent (f (current loc)) loc + +-- | Modify the current content, allowing for an effect. +modifyContentM :: Monad m => (Content -> m Content) -> Cursor -> m Cursor +modifyContentM f loc = do x <- f (current loc) + return (setContent x loc) + +-- | Insert content to the left of the current position. +insertLeft :: Content -> Cursor -> Cursor +insertLeft t loc = loc { lefts = t : lefts loc } + +-- | Insert content to the right of the current position. +insertRight :: Content -> Cursor -> Cursor +insertRight t loc = loc { rights = t : rights loc } + +-- | Remove the content on the left of the current position, if any. +removeLeft :: Cursor -> Maybe (Content,Cursor) +removeLeft loc = case lefts loc of + l : ls -> return (l,loc { lefts = ls }) + [] -> Nothing + +-- | Remove the content on the right of the current position, if any. +removeRight :: Cursor -> Maybe (Content,Cursor) +removeRight loc = case rights loc of + l : ls -> return (l,loc { rights = ls }) + [] -> Nothing + + +-- | Insert content to the left of the current position. +-- The new content becomes the current position. +insertGoLeft :: Content -> Cursor -> Cursor +insertGoLeft t loc = loc { current = t, rights = current loc : rights loc } + +-- | Insert content to the right of the current position. +-- The new content becomes the current position. +insertGoRight :: Content -> Cursor -> Cursor +insertGoRight t loc = loc { current = t, lefts = current loc : lefts loc } + +-- | Remove the current element. +-- The new position is the one on the left. +removeGoLeft :: Cursor -> Maybe Cursor +removeGoLeft loc = case lefts loc of + l : ls -> Just loc { current = l, lefts = ls } + [] -> Nothing + +-- | Remove the current element. +-- The new position is the one on the right. +removeGoRight :: Cursor -> Maybe Cursor +removeGoRight loc = case rights loc of + l : ls -> Just loc { current = l, rights = ls } + [] -> Nothing + +-- | Remove the current element. +-- The new position is the parent of the old position. +removeGoUp :: Cursor -> Maybe Cursor +removeGoUp loc = + case parents loc of + (pls,v,prs) : ps -> Just + Cur { current = Elem (fromTag v (reverse (lefts loc) ++ rights loc)) + , lefts = pls, rights = prs, parents = ps + } + [] -> Nothing + + +-- | private: Gets the given element of a list. +-- Also returns the preceding elements (reversed) and the following elements. +splitChildren :: [a] -> Int -> Maybe ([a],a,[a]) +splitChildren _ n | n < 0 = Nothing +splitChildren cs pos = loop [] cs pos + where loop acc (x:xs) 0 = Just (acc,x,xs) + loop acc (x:xs) n = loop (x:acc) xs $! n-1 + loop _ _ _ = Nothing + +-- | private: combChildren ls x ys = reverse ls ++ [x] ++ ys +combChildren :: [a] -> a -> [a] -> [a] +combChildren ls t rs = foldl (flip (:)) (t:rs) ls + -- cgit v1.2.3 From 80a1d5c9b60b676ba7b7e6ed0267197c8f0ec459 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 16 Feb 2021 19:18:01 -0800 Subject: Revert "Add T.P.XML.Light.Cursor." This reverts commit d8fc4971868104274881570ce9bc3d9edf0d2506. --- pandoc.cabal | 1 - src/Text/Pandoc/XML/Light/Cursor.hs | 346 ------------------------------------ 2 files changed, 347 deletions(-) delete mode 100644 src/Text/Pandoc/XML/Light/Cursor.hs (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index 61bf0b51d..d27520ba0 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -689,7 +689,6 @@ library Text.Pandoc.Lua.Walk, Text.Pandoc.XML.Light, Text.Pandoc.XML.Light.Types, - Text.Pandoc.XML.Light.Cursor, Text.Pandoc.XML.Light.Proc, Text.Pandoc.XML.Light.Output, Text.Pandoc.CSS, diff --git a/src/Text/Pandoc/XML/Light/Cursor.hs b/src/Text/Pandoc/XML/Light/Cursor.hs deleted file mode 100644 index 2e6da5346..000000000 --- a/src/Text/Pandoc/XML/Light/Cursor.hs +++ /dev/null @@ -1,346 +0,0 @@ -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE OverloadedStrings #-} -{- | - Module : Text.Pandoc.XML.Light.Cursor - Copyright : Copyright (C) 2007 Galois, Inc., 2021 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - - This code is taken from xml-light, released under the BSD3 license. --} -module Text.Pandoc.XML.Light.Cursor - ( -- * Replacement for xml-light's Text.XML.Cursor - Tag(..) - , getTag - , setTag - , fromTag - , Cursor(..) - , Path - - -- * Conversions - , fromContent - , fromElement - , fromForest - , toForest - , toTree - - -- * Moving around - , parent - , root - , getChild - , firstChild - , lastChild - , left - , right - , nextDF - - -- ** Searching - , findChild - , findLeft - , findRight - , findRec - - -- * Node classification - , isRoot - , isFirst - , isLast - , isLeaf - , isChild - , hasChildren - , getNodeIndex - - -- * Updates - , setContent - , modifyContent - , modifyContentM - - -- ** Inserting content - , insertLeft - , insertRight - , insertGoLeft - , insertGoRight - - -- ** Removing content - , removeLeft - , removeRight - , removeGoLeft - , removeGoRight - , removeGoUp - - ) where - -import Text.Pandoc.XML.Light.Types -import Data.Maybe(isNothing) -import Control.Monad(mplus) - -data Tag = Tag { tagName :: QName - , tagAttribs :: [Attr] - , tagLine :: Maybe Line - } deriving (Show) - -getTag :: Element -> Tag -getTag e = Tag { tagName = elName e - , tagAttribs = elAttribs e - , tagLine = elLine e - } - -setTag :: Tag -> Element -> Element -setTag t e = fromTag t (elContent e) - -fromTag :: Tag -> [Content] -> Element -fromTag t cs = Element { elName = tagName t - , elAttribs = tagAttribs t - , elLine = tagLine t - , elContent = cs - } - -type Path = [([Content],Tag,[Content])] - --- | The position of a piece of content in an XML document. -data Cursor = Cur - { current :: Content -- ^ The currently selected content. - , lefts :: [Content] -- ^ Siblings on the left, closest first. - , rights :: [Content] -- ^ Siblings on the right, closest first. - , parents :: Path -- ^ The contexts of the parent elements of this location. - } deriving (Show) - --- Moving around --------------------------------------------------------------- - --- | The parent of the given location. -parent :: Cursor -> Maybe Cursor -parent loc = - case parents loc of - (pls,v,prs) : ps -> Just - Cur { current = Elem - (fromTag v - (combChildren (lefts loc) (current loc) (rights loc))) - , lefts = pls, rights = prs, parents = ps - } - [] -> Nothing - - --- | The top-most parent of the given location. -root :: Cursor -> Cursor -root loc = maybe loc root (parent loc) - --- | The left sibling of the given location. -left :: Cursor -> Maybe Cursor -left loc = - case lefts loc of - t : ts -> Just loc { current = t, lefts = ts - , rights = current loc : rights loc } - [] -> Nothing - --- | The right sibling of the given location. -right :: Cursor -> Maybe Cursor -right loc = - case rights loc of - t : ts -> Just loc { current = t, lefts = current loc : lefts loc - , rights = ts } - [] -> Nothing - --- | The first child of the given location. -firstChild :: Cursor -> Maybe Cursor -firstChild loc = - do (t : ts, ps) <- downParents loc - return Cur { current = t, lefts = [], rights = ts , parents = ps } - --- | The last child of the given location. -lastChild :: Cursor -> Maybe Cursor -lastChild loc = - do (ts, ps) <- downParents loc - case reverse ts of - l : ls -> return Cur { current = l, lefts = ls, rights = [] - , parents = ps } - [] -> Nothing - --- | Find the next left sibling that satisfies a predicate. -findLeft :: (Cursor -> Bool) -> Cursor -> Maybe Cursor -findLeft p loc = do loc1 <- left loc - if p loc1 then return loc1 else findLeft p loc1 - --- | Find the next right sibling that satisfies a predicate. -findRight :: (Cursor -> Bool) -> Cursor -> Maybe Cursor -findRight p loc = do loc1 <- right loc - if p loc1 then return loc1 else findRight p loc1 - --- | The first child that satisfies a predicate. -findChild :: (Cursor -> Bool) -> Cursor -> Maybe Cursor -findChild p loc = - do loc1 <- firstChild loc - if p loc1 then return loc1 else findRight p loc1 - --- | The next position in a left-to-right depth-first traversal of a document: --- either the first child, right sibling, or the right sibling of a parent that --- has one. -nextDF :: Cursor -> Maybe Cursor -nextDF c = firstChild c `mplus` up c - where up x = right x `mplus` (up =<< parent x) - --- | Perform a depth first search for a descendant that satisfies the --- given predicate. -findRec :: (Cursor -> Bool) -> Cursor -> Maybe Cursor -findRec p c = if p c then Just c else findRec p =<< nextDF c - --- | The child with the given index (starting from 0). -getChild :: Int -> Cursor -> Maybe Cursor -getChild n loc = - do (ts,ps) <- downParents loc - (ls,t,rs) <- splitChildren ts n - return Cur { current = t, lefts = ls, rights = rs, parents = ps } - - --- | private: computes the parent for "down" operations. -downParents :: Cursor -> Maybe ([Content], Path) -downParents loc = - case current loc of - Elem e -> Just ( elContent e - , (lefts loc, getTag e, rights loc) : parents loc - ) - _ -> Nothing - --- Conversions ----------------------------------------------------------------- - --- | A cursor for the given content. -fromContent :: Content -> Cursor -fromContent t = Cur { current = t, lefts = [], rights = [], parents = [] } - --- | A cursor for the given element. -fromElement :: Element -> Cursor -fromElement e = fromContent (Elem e) - --- | The location of the first tree in a forest. -fromForest :: [Content] -> Maybe Cursor -fromForest (t:ts) = Just Cur { current = t, lefts = [], rights = ts - , parents = [] } -fromForest [] = Nothing - --- | Computes the tree containing this location. -toTree :: Cursor -> Content -toTree loc = current (root loc) - --- | Computes the forest containing this location. -toForest :: Cursor -> [Content] -toForest loc = let r = root loc in combChildren (lefts r) (current r) (rights r) - - --- Queries --------------------------------------------------------------------- - --- | Are we at the top of the document? -isRoot :: Cursor -> Bool -isRoot loc = null (parents loc) - --- | Are we at the left end of the the document? -isFirst :: Cursor -> Bool -isFirst loc = null (lefts loc) - --- | Are we at the right end of the document? -isLast :: Cursor -> Bool -isLast loc = null (rights loc) - --- | Are we at the bottom of the document? -isLeaf :: Cursor -> Bool -isLeaf loc = isNothing (downParents loc) - --- | Do we have a parent? -isChild :: Cursor -> Bool -isChild loc = not (isRoot loc) - --- | Get the node index inside the sequence of children -getNodeIndex :: Cursor -> Int -getNodeIndex loc = length (lefts loc) - --- | Do we have children? -hasChildren :: Cursor -> Bool -hasChildren loc = not (isLeaf loc) - - - --- Updates --------------------------------------------------------------------- - --- | Change the current content. -setContent :: Content -> Cursor -> Cursor -setContent t loc = loc { current = t } - --- | Modify the current content. -modifyContent :: (Content -> Content) -> Cursor -> Cursor -modifyContent f loc = setContent (f (current loc)) loc - --- | Modify the current content, allowing for an effect. -modifyContentM :: Monad m => (Content -> m Content) -> Cursor -> m Cursor -modifyContentM f loc = do x <- f (current loc) - return (setContent x loc) - --- | Insert content to the left of the current position. -insertLeft :: Content -> Cursor -> Cursor -insertLeft t loc = loc { lefts = t : lefts loc } - --- | Insert content to the right of the current position. -insertRight :: Content -> Cursor -> Cursor -insertRight t loc = loc { rights = t : rights loc } - --- | Remove the content on the left of the current position, if any. -removeLeft :: Cursor -> Maybe (Content,Cursor) -removeLeft loc = case lefts loc of - l : ls -> return (l,loc { lefts = ls }) - [] -> Nothing - --- | Remove the content on the right of the current position, if any. -removeRight :: Cursor -> Maybe (Content,Cursor) -removeRight loc = case rights loc of - l : ls -> return (l,loc { rights = ls }) - [] -> Nothing - - --- | Insert content to the left of the current position. --- The new content becomes the current position. -insertGoLeft :: Content -> Cursor -> Cursor -insertGoLeft t loc = loc { current = t, rights = current loc : rights loc } - --- | Insert content to the right of the current position. --- The new content becomes the current position. -insertGoRight :: Content -> Cursor -> Cursor -insertGoRight t loc = loc { current = t, lefts = current loc : lefts loc } - --- | Remove the current element. --- The new position is the one on the left. -removeGoLeft :: Cursor -> Maybe Cursor -removeGoLeft loc = case lefts loc of - l : ls -> Just loc { current = l, lefts = ls } - [] -> Nothing - --- | Remove the current element. --- The new position is the one on the right. -removeGoRight :: Cursor -> Maybe Cursor -removeGoRight loc = case rights loc of - l : ls -> Just loc { current = l, rights = ls } - [] -> Nothing - --- | Remove the current element. --- The new position is the parent of the old position. -removeGoUp :: Cursor -> Maybe Cursor -removeGoUp loc = - case parents loc of - (pls,v,prs) : ps -> Just - Cur { current = Elem (fromTag v (reverse (lefts loc) ++ rights loc)) - , lefts = pls, rights = prs, parents = ps - } - [] -> Nothing - - --- | private: Gets the given element of a list. --- Also returns the preceding elements (reversed) and the following elements. -splitChildren :: [a] -> Int -> Maybe ([a],a,[a]) -splitChildren _ n | n < 0 = Nothing -splitChildren cs pos = loop [] cs pos - where loop acc (x:xs) 0 = Just (acc,x,xs) - loop acc (x:xs) n = loop (x:acc) xs $! n-1 - loop _ _ _ = Nothing - --- | private: combChildren ls x ys = reverse ls ++ [x] ++ ys -combChildren :: [a] -> a -> [a] -> [a] -combChildren ls t rs = foldl (flip (:)) (t:rs) ls - -- cgit v1.2.3 From 73add0578989e1da6e9cd1de68e2e4142f789188 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 17 Feb 2021 09:54:39 -0800 Subject: Docx reader: use Map instead of list for Namespaces. This gives a speedup of about 5-10%. The reader is now approximately twice as fast as in the last release. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 14 +++++++------- src/Text/Pandoc/Readers/Docx/Util.hs | 26 +++++++++++++------------- 2 files changed, 20 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index c76f3c171..f8ed248d7 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -396,9 +396,9 @@ archiveToNotes zf = >>= parseXMLFromEntry enElem = findEntryByPath "word/endnotes.xml" zf >>= parseXMLFromEntry - fn_namespaces = maybe [] elemToNameSpaces fnElem - en_namespaces = maybe [] elemToNameSpaces enElem - ns = unionBy (\x y -> fst x == fst y) fn_namespaces en_namespaces + fn_namespaces = maybe mempty elemToNameSpaces fnElem + en_namespaces = maybe mempty elemToNameSpaces enElem + ns = M.union fn_namespaces en_namespaces fn = fnElem >>= elemToNotes ns "footnote" . walkDocument ns en = enElem >>= elemToNotes ns "endnote" . walkDocument ns in @@ -408,7 +408,7 @@ archiveToComments :: Archive -> Comments archiveToComments zf = let cmtsElem = findEntryByPath "word/comments.xml" zf >>= parseXMLFromEntry - cmts_namespaces = maybe [] elemToNameSpaces cmtsElem + cmts_namespaces = maybe mempty elemToNameSpaces cmtsElem cmts = elemToComments cmts_namespaces . walkDocument cmts_namespaces <$> cmtsElem in @@ -518,7 +518,7 @@ levelElemToLevel _ _ = Nothing archiveToNumbering' :: Archive -> Maybe Numbering archiveToNumbering' zf = case findEntryByPath "word/numbering.xml" zf of - Nothing -> Just $ Numbering [] [] [] + Nothing -> Just $ Numbering mempty [] [] Just entry -> do numberingElem <- parseXMLFromEntry entry let namespaces = elemToNameSpaces numberingElem @@ -530,7 +530,7 @@ archiveToNumbering' zf = archiveToNumbering :: Archive -> Numbering archiveToNumbering archive = - fromMaybe (Numbering [] [] []) (archiveToNumbering' archive) + fromMaybe (Numbering mempty [] []) (archiveToNumbering' archive) elemToNotes :: NameSpaces -> Text -> Element -> Maybe (M.Map T.Text Element) elemToNotes ns notetype element @@ -875,7 +875,7 @@ childElemToRun ns element = let (title, alt) = getTitleAndAlt ns element a_ns = "http://schemas.openxmlformats.org/drawingml/2006/main" drawing = findElement (QName "blip" (Just a_ns) (Just "a")) picElem - >>= findAttr (QName "embed" (lookup "r" ns) (Just "r")) + >>= findAttr (QName "embed" (M.lookup "r" ns) (Just "r")) in case drawing of Just s -> expandDrawingId s >>= diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs index 21df03d9e..ac331cba6 100644 --- a/src/Text/Pandoc/Readers/Docx/Util.hs +++ b/src/Text/Pandoc/Readers/Docx/Util.hs @@ -22,42 +22,42 @@ module Text.Pandoc.Readers.Docx.Util ( , findAttrByName ) where -import Data.Maybe (mapMaybe) import qualified Data.Text as T import Data.Text (Text) import Text.Pandoc.XML.Light +import qualified Data.Map as M -type NameSpaces = [(Text, Text)] +type NameSpaces = M.Map Text Text elemToNameSpaces :: Element -> NameSpaces -elemToNameSpaces = mapMaybe attrToNSPair . elAttribs - -attrToNSPair :: Attr -> Maybe (Text, Text) -attrToNSPair (Attr (QName s _ (Just "xmlns")) val) = Just (s, val) -attrToNSPair _ = Nothing +elemToNameSpaces = foldr (\(Attr qn val) -> + case qn of + QName s _ (Just "xmlns") -> M.insert s val + _ -> id) mempty . elAttribs elemName :: NameSpaces -> Text -> Text -> QName elemName ns prefix name = - QName name (lookup prefix ns) (if T.null prefix then Nothing else Just prefix) + QName name (M.lookup prefix ns) + (if T.null prefix then Nothing else Just prefix) isElem :: NameSpaces -> Text -> Text -> Element -> Bool isElem ns prefix name element = - let ns' = ns ++ elemToNameSpaces element + let ns' = ns <> elemToNameSpaces element in qName (elName element) == name && - qURI (elName element) == lookup prefix ns' + qURI (elName element) == M.lookup prefix ns' findChildByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element findChildByName ns pref name el = - let ns' = ns ++ elemToNameSpaces el + let ns' = ns <> elemToNameSpaces el in findChild (elemName ns' pref name) el findChildrenByName :: NameSpaces -> Text -> Text -> Element -> [Element] findChildrenByName ns pref name el = - let ns' = ns ++ elemToNameSpaces el + let ns' = ns <> elemToNameSpaces el in findChildren (elemName ns' pref name) el findAttrByName :: NameSpaces -> Text -> Text -> Element -> Maybe Text findAttrByName ns pref name el = - let ns' = ns ++ elemToNameSpaces el + let ns' = ns <> elemToNameSpaces el in findAttr (elemName ns' pref name) el -- cgit v1.2.3 From 743f7216de98556a316301ac72b8606bafc2deee Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Thu, 18 Feb 2021 21:53:19 +0100 Subject: Org reader: fix bug in org-ref citation parsing. The org-ref syntax allows to list multiple citations separated by comma. This fixes a bug that accepted commas as part of the citation id, so all citation lists were parsed as one single citation. Fixes: #7101 --- src/Text/Pandoc/Readers/Org/Inlines.hs | 2 +- test/Tests/Readers/Org/Inline/Citation.hs | 40 +++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 68c2ba5e0..519a6ce04 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -322,7 +322,7 @@ linkLikeOrgRefCite = try $ do -- from the `org-ref-cite-re` variable in `org-ref.el`. orgRefCiteKey :: PandocMonad m => OrgParser m Text orgRefCiteKey = - let citeKeySpecialChars = "-_:\\./," :: String + let citeKeySpecialChars = "-_:\\./" :: String isCiteKeySpecialChar c = c `elem` citeKeySpecialChars isCiteKeyChar c = isAlphaNum c || isCiteKeySpecialChar c endOfCitation = try $ do diff --git a/test/Tests/Readers/Org/Inline/Citation.hs b/test/Tests/Readers/Org/Inline/Citation.hs index 5538f1ec8..a11804983 100644 --- a/test/Tests/Readers/Org/Inline/Citation.hs +++ b/test/Tests/Readers/Org/Inline/Citation.hs @@ -116,6 +116,46 @@ tests = } in (para $ cite [citation] "citep:pandoc") + , "multiple simple citations" =: + "citep:picard,riker" =?> + let picard = Citation + { citationId = "picard" + , citationPrefix = mempty + , citationSuffix = mempty + , citationMode = NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } + riker = Citation + { citationId = "riker" + , citationPrefix = mempty + , citationSuffix = mempty + , citationMode = NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } + in (para $ cite [picard,riker] "citep:picard,riker") + + , "multiple simple citations succeeded by comma" =: + "citep:picard,riker," =?> + let picard = Citation + { citationId = "picard" + , citationPrefix = mempty + , citationSuffix = mempty + , citationMode = NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } + riker = Citation + { citationId = "riker" + , citationPrefix = mempty + , citationSuffix = mempty + , citationMode = NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } + in (para $ cite [picard,riker] "citep:picard,riker" <> str ",") + , "extended citation" =: "[[citep:Dominik201408][See page 20::, for example]]" =?> let citation = Citation -- cgit v1.2.3 From 9e728b40f36d48c687372ad447670186ed415337 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 17 Feb 2021 17:21:22 -0800 Subject: T.P.Shared: cleanup. Cleanup up some functions and added deprecation pragmas to funtions no longer used in the code base. --- src/Text/Pandoc/Shared.hs | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 6d5d4c97d..a579681b1 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -151,21 +151,22 @@ splitBy :: (a -> Bool) -> [a] -> [[a]] splitBy _ [] = [] splitBy isSep lst = let (first, rest) = break isSep lst - rest' = dropWhile isSep rest - in first:splitBy isSep rest' + in first:splitBy isSep (dropWhile isSep rest) +-- | Split text by groups of one or more separator. splitTextBy :: (Char -> Bool) -> T.Text -> [T.Text] splitTextBy isSep t | T.null t = [] | otherwise = let (first, rest) = T.break isSep t - rest' = T.dropWhile isSep rest - in first : splitTextBy isSep rest' + in first : splitTextBy isSep (T.dropWhile isSep rest) +{-# DEPRECATED splitByIndices "This function is slated for removal" #-} splitByIndices :: [Int] -> [a] -> [[a]] splitByIndices [] lst = [lst] splitByIndices (x:xs) lst = first:splitByIndices (map (\y -> y - x) xs) rest where (first, rest) = splitAt x lst +{-# DEPRECATED splitStringByIndices "This function is slated for removal" #-} -- | Split string into chunks divided at specified indices. splitStringByIndices :: [Int] -> [Char] -> [[Char]] splitStringByIndices [] lst = [lst] @@ -173,15 +174,22 @@ splitStringByIndices (x:xs) lst = let (first, rest) = splitAt' x lst in first : splitStringByIndices (map (\y -> y - x) xs) rest -splitTextByIndices :: [Int] -> T.Text -> [T.Text] -splitTextByIndices ns = fmap T.pack . splitStringByIndices ns . T.unpack - +-- DEPRECATED: can be removed when splitStringByIndices is splitAt' :: Int -> [Char] -> ([Char],[Char]) splitAt' _ [] = ([],[]) splitAt' n xs | n <= 0 = ([],xs) splitAt' n (x:xs) = (x:ys,zs) where (ys,zs) = splitAt' (n - charWidth x) xs +splitTextByIndices :: [Int] -> T.Text -> [T.Text] +splitTextByIndices ns = splitTextByRelIndices (zipWith (-) ns (0:ns)) + where + splitTextByRelIndices [] t = [t] + splitTextByRelIndices (x:xs) t = + let (first, rest) = T.splitAt x t + in first : splitTextByRelIndices xs rest + +{-# DEPRECATED substitute "This function is slated for removal" #-} -- | Replace each occurrence of one sublist in a list with another. substitute :: (Eq a) => [a] -> [a] -> [a] -> [a] substitute _ _ [] = [] @@ -254,17 +262,24 @@ notElemText c = T.all (/= c) stripTrailingNewlines :: T.Text -> T.Text stripTrailingNewlines = T.dropWhileEnd (== '\n') +isWS :: Char -> Bool +isWS ' ' = True +isWS '\r' = True +isWS '\n' = True +isWS '\t' = True +isWS _ = False + -- | Remove leading and trailing space (including newlines) from string. trim :: T.Text -> T.Text -trim = T.dropAround (`elemText` " \r\n\t") +trim = T.dropAround isWS -- | Remove leading space (including newlines) from string. triml :: T.Text -> T.Text -triml = T.dropWhile (`elemText` " \r\n\t") +triml = T.dropWhile isWS -- | Remove trailing space (including newlines) from string. trimr :: T.Text -> T.Text -trimr = T.dropWhileEnd (`elemText` " \r\n\t") +trimr = T.dropWhileEnd isWS -- | Trim leading space and trailing space unless after \. trimMath :: T.Text -> T.Text @@ -275,7 +290,7 @@ trimMath = triml . T.reverse . stripBeginSpace . T.reverse -- no Text.spanEnd | Just ('\\', _) <- T.uncons suff = T.cons (T.last pref) suff | otherwise = suff where - (pref, suff) = T.span (`elemText` " \t\n\r") t + (pref, suff) = T.span isWS t -- | Strip leading and trailing characters from string stripFirstAndLast :: T.Text -> T.Text -- cgit v1.2.3 From 53cf8295a4825297f78e20b6ce73d2da8fd7ef84 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 18 Feb 2021 14:33:56 -0800 Subject: LaTeX writer: adjust hypertargets to beginnings of paragraphs. Use `\vadjust pre` so that the hypertarget takes you to the beginning of the paragraph rather than one line down. Closes #7078. This makes a particular difference for links to citations using `--citeproc` and `link-citations: true`. --- src/Text/Pandoc/Writers/LaTeX.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 93603a26e..e8a187599 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -500,12 +500,13 @@ blockToLaTeX (Div (identifier,classes,kvs) bs) = do else blockListToLaTeX bs modify $ \st -> st{ stIncremental = oldIncremental } linkAnchor' <- hypertarget True identifier empty - -- see #2704 for the motivation for adding \leavevmode: + -- see #2704 for the motivation for adding \leavevmode + -- and #7078 for \vadjust pre let linkAnchor = case bs of Para _ : _ | not (isEmpty linkAnchor') - -> "\\leavevmode" <> linkAnchor' <> "%" + -> "\\leavevmode\\vadjust pre{" <> linkAnchor' <> "}%" _ -> linkAnchor' wrapNotes txt = if beamer && "notes" `elem` classes then "\\note" <> braces txt -- speaker notes -- cgit v1.2.3 From 0f5c56dfb171e27745f4fe8530325223ecefe52a Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 18 Feb 2021 17:11:53 -0800 Subject: T.P.PDF: disable `smart` when building PDF via LaTeX. This is to prevent accidental creation of ligatures like `` ?` `` and `` !` `` (especially in languages with quotations like German), and similar ligature issues. See jgm/citeproc#54. --- src/Text/Pandoc/PDF.hs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index 7c0082c29..3f9dd8dad 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -43,6 +43,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Error (PandocError (PandocPDFProgramNotFoundError)) import Text.Pandoc.MIME (getMimeType) import Text.Pandoc.Options (HTMLMathMethod (..), WriterOptions (..)) +import Text.Pandoc.Extensions (disableExtension, Extension(Ext_smart)) import Text.Pandoc.Process (pipeProcess) import System.Process (readProcessWithExitCode) import Text.Pandoc.Shared (inDirectory, stringify, tshow) @@ -114,7 +115,10 @@ makePDF program pdfargs writer opts doc = runIOorExplode $ do putCommonState commonState doc' <- handleImages opts tmpdir doc - source <- writer opts doc' + source <- writer opts{ writerExtensions = -- disable use of quote + -- ligatures to avoid bad ligatures like ?` + disableExtension Ext_smart + (writerExtensions opts) } doc' res <- case baseProg of "context" -> context2pdf verbosity program pdfargs tmpdir source "tectonic" -> tectonic2pdf verbosity program pdfargs tmpdir source -- cgit v1.2.3 From ef642e2bbc1f46056fc27560ceba791f27f2daa6 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 18 Feb 2021 18:11:27 -0800 Subject: T.P.XML Improve fromEntities. --- src/Text/Pandoc/XML.hs | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index c4e3ed1e7..6dbbce1d2 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -1,6 +1,5 @@ {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.XML Copyright : Copyright (C) 2006-2021 John MacFarlane @@ -123,23 +122,20 @@ html5EntityMap = foldr go mempty htmlEntities -- Unescapes XML entities fromEntities :: Text -> Text -fromEntities = T.pack . fromEntities' +fromEntities t + = let (x, y) = T.break (== '&') t + in if T.null y + then t + else x <> + let (ent, rest) = T.break (\c -> isSpace c || c == ';') y + rest' = case T.uncons rest of + Just (';',ys) -> ys + _ -> rest + ent' = T.drop 1 ent <> ";" + in case T.pack <$> lookupEntity (T.unpack ent') of + Just c -> c <> fromEntities rest' + Nothing -> ent <> fromEntities rest -fromEntities' :: Text -> String -fromEntities' (T.uncons -> Just ('&', xs)) = - case lookupEntity $ T.unpack ent' of - Just c -> c <> fromEntities' rest - Nothing -> "&" <> fromEntities' xs - where (ent, rest) = case T.break (\c -> isSpace c || c == ';') xs of - (zs,T.uncons -> Just (';',ys)) -> (zs,ys) - (zs, ys) -> (zs,ys) - ent' - | Just ys <- T.stripPrefix "#X" ent = "#x" <> ys -- workaround tagsoup bug - | Just ('#', _) <- T.uncons ent = ent - | otherwise = ent <> ";" -fromEntities' t = case T.uncons t of - Just (x, xs) -> x : fromEntities' xs - Nothing -> "" html5Attributes :: Set.Set Text html5Attributes = Set.fromList -- cgit v1.2.3 From 98d26c234579a06446a5bef1992ed77bac48a4ac Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 18 Feb 2021 21:24:31 -0800 Subject: DocBook, JATS, OPML readers: performance optimization. With the new XML parser, we can avoid the expensive tree normalization step we used to do. This gives a significant speed boost in docbook and JATS parsing (e.g. 9.7 to 6 ms). --- src/Text/Pandoc/Readers/DocBook.hs | 26 ++++---------------------- src/Text/Pandoc/Readers/JATS.hs | 24 +++--------------------- src/Text/Pandoc/Readers/OPML.hs | 22 +--------------------- 3 files changed, 8 insertions(+), 64 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index e201b54fe..d38b07864 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -12,7 +12,7 @@ Conversion of DocBook XML to 'Pandoc' document. -} module Text.Pandoc.Readers.DocBook ( readDocBook ) where import Control.Monad.State.Strict -import Data.Char (isSpace, toUpper, isLetter) +import Data.Char (isSpace, isLetter) import Data.Default import Data.Either (rights) import Data.Foldable (asum) @@ -540,8 +540,9 @@ instance Default DBState where readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readDocBook _ inp = do - tree <- either (throwError . PandocXMLError "") (return . normalizeTree) $ - parseXMLContents (TL.fromStrict . handleInstructions $ crFilter inp) + tree <- either (throwError . PandocXMLError "") return $ + parseXMLContents + (TL.fromStrict . handleInstructions $ crFilter inp) (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree return $ Pandoc (dbMeta st') (toList . mconcat $ bs) @@ -571,25 +572,6 @@ getFigure e = do modify $ \st -> st{ dbFigureTitle = mempty, dbFigureId = mempty } return res --- normalize input, consolidating adjacent Text and CRef elements -normalizeTree :: [Content] -> [Content] -normalizeTree = everywhere (mkT go) - where go :: [Content] -> [Content] - go (Text (CData CDataRaw _ _):xs) = xs - go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) = - Text (CData CDataText (s1 <> s2) z):xs - go (Text (CData CDataText s1 z):CRef r:xs) = - Text (CData CDataText (s1 <> convertEntity r) z):xs - go (CRef r:Text (CData CDataText s1 z):xs) = - Text (CData CDataText (convertEntity r <> s1) z):xs - go (CRef r1:CRef r2:xs) = - Text (CData CDataText (convertEntity r1 <> - convertEntity r2) Nothing):xs - go xs = xs - -convertEntity :: Text -> Text -convertEntity e = maybe (T.map toUpper e) T.pack (lookupEntity $ T.unpack e) - -- convenience function to get an attribute value, defaulting to "" attrValue :: Text -> Element -> Text attrValue attr elt = diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index 5353f2001..602f3b4f2 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -54,30 +54,11 @@ instance Default JATSState where readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readJATS _ inp = do - tree <- either (throwError . PandocXMLError "") - (return . normalizeTree) $ + tree <- either (throwError . PandocXMLError "") return $ parseXMLContents (TL.fromStrict $ crFilter inp) (bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree return $ Pandoc (jatsMeta st') (toList . mconcat $ bs) --- normalize input, consolidating adjacent Text and CRef elements -normalizeTree :: [Content] -> [Content] -normalizeTree = everywhere (mkT go) - where go :: [Content] -> [Content] - go (Text (CData CDataRaw _ _):xs) = xs - go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) = - Text (CData CDataText (s1 <> s2) z):xs - go (Text (CData CDataText s1 z):CRef r:xs) = - Text (CData CDataText (s1 <> convertEntity r) z):xs - go (CRef r:Text (CData CDataText s1 z):xs) = - Text (CData CDataText (convertEntity r <> s1) z):xs - go (CRef r1:CRef r2:xs) = - Text (CData CDataText (convertEntity r1 <> convertEntity r2) Nothing):xs - go xs = xs - -convertEntity :: Text -> Text -convertEntity e = maybe (T.toUpper e) T.pack (lookupEntity $ T.unpack e) - -- convenience function to get an attribute value, defaulting to "" attrValue :: Text -> Element -> Text attrValue attr = @@ -454,7 +435,8 @@ elementToStr x = x parseInline :: PandocMonad m => Content -> JATS m Inlines parseInline (Text (CData _ s _)) = return $ text s -parseInline (CRef ref) = return . text . convertEntity $ ref +parseInline (CRef ref) = return $ maybe (text $ T.toUpper ref) (text . T.pack) + $ lookupEntity (T.unpack ref) parseInline (Elem e) = case qName (elName e) of "italic" -> innerInlines emph diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index 184d5a63f..5f2ddb876 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -14,12 +14,10 @@ Conversion of OPML to 'Pandoc' document. module Text.Pandoc.Readers.OPML ( readOPML ) where import Control.Monad.State.Strict import Data.Default -import Data.Generics import Data.Maybe (fromMaybe) import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Lazy as TL -import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Options @@ -53,7 +51,7 @@ readOPML opts inp = do (bs, st') <- runStateT (case parseXMLContents (TL.fromStrict (crFilter inp)) of Left msg -> throwError $ PandocXMLError "" msg - Right ns -> mapM parseBlock $ normalizeTree ns) + Right ns -> mapM parseBlock ns) def{ opmlOptions = opts } return $ setTitle (opmlDocTitle st') $ @@ -61,24 +59,6 @@ readOPML opts inp = do setDate (opmlDocDate st') $ doc $ mconcat bs --- normalize input, consolidating adjacent Text and CRef elements -normalizeTree :: [Content] -> [Content] -normalizeTree = everywhere (mkT go) - where go :: [Content] -> [Content] - go (Text (CData CDataRaw _ _):xs) = xs - go (Text (CData CDataText s1 z):Text (CData CDataText s2 _):xs) = - Text (CData CDataText (s1 <> s2) z):xs - go (Text (CData CDataText s1 z):CRef r:xs) = - Text (CData CDataText (s1 <> convertEntity r) z):xs - go (CRef r:Text (CData CDataText s1 z):xs) = - Text (CData CDataText (convertEntity r <> s1) z):xs - go (CRef r1:CRef r2:xs) = - Text (CData CDataText (convertEntity r1 <> convertEntity r2) Nothing):xs - go xs = xs - -convertEntity :: Text -> Text -convertEntity e = maybe (T.toUpper e) T.pack (lookupEntity (T.unpack e)) - -- convenience function to get an attribute value, defaulting to "" attrValue :: Text -> Element -> Text attrValue attr elt = -- cgit v1.2.3 From 13847267e969b634e9c16c15170e7f217d432e8a Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 20 Feb 2021 00:07:38 -0800 Subject: HTML reader: efficiency improvements. Do a lookahead to find the right parser to use. Benchmarks from 34ms to 23ms, with less allocation. Also speeds up the epub reader. --- src/Text/Pandoc/Readers/HTML.hs | 210 ++++++++++++++++++++++++---------------- 1 file changed, 129 insertions(+), 81 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index cc60b5501..47856d2f7 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -159,29 +159,65 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAny) block :: PandocMonad m => TagParser m Blocks block = do - res <- choice - [ eSection - , eSwitch B.para block - , mempty <$ eFootnote - , mempty <$ eTOC - , mempty <$ eTitlePage - , pPara - , pHeader - , pBlockQuote - , pCodeBlock - , pList - , pHrule - , pTable block - , pHtml - , pHead - , pBody - , pLineBlock - , pDiv - , pPlain - , pFigure - , pIframe - , pRawHtmlBlock - ] + exts <- getOption readerExtensions + tag <- lookAhead pAny + res <- + (case tag of + TagOpen name attr -> + let type' = fromMaybe "" $ + lookup "type" attr <|> lookup "epub:type" attr + epubExts = extensionEnabled Ext_epub_html_exts exts + in + case name of + _ | name `elem` sectioningContent + , epubExts + , "chapter" `T.isInfixOf` type' + -> eSection + _ | epubExts + , type' `elem` ["footnote", "rearnote"] + -> mempty <$ eFootnote + _ | epubExts + , type' == "toc" + -> mempty <$ eTOC + _ | "titlepage" `T.isInfixOf` type' + , name `elem` ("section" : groupingContent) + -> mempty <$ eTitlePage + "p" -> pPara + "h1" -> pHeader + "h2" -> pHeader + "h3" -> pHeader + "h4" -> pHeader + "h5" -> pHeader + "h6" -> pHeader + "blockquote" -> pBlockQuote + "pre" -> pCodeBlock + "ul" -> pBulletList + "ol" -> pOrderedList + "dl" -> pDefinitionList + "table" -> pTable block + "hr" -> pHrule + "html" -> pHtml + "head" -> pHead + "body" -> pBody + "div" + | extensionEnabled Ext_line_blocks exts + , Just "line-block" <- lookup "class" attr + -> pLineBlock + | otherwise + -> pDiv + "section" -> pDiv + "main" -> pDiv + "figure" -> pFigure + "iframe" -> pIframe + "style" -> pRawHtmlBlock + "textarea" -> pRawHtmlBlock + "switch" + | epubExts + -> eSwitch B.para block + _ -> mzero + _ -> mzero) + <|> pPlain + <|> pRawHtmlBlock trace (T.take 60 $ tshow $ B.toList res) return res @@ -256,9 +292,6 @@ eTOC = try $ do guard $ (lookup "type" attr <|> lookup "epub:type" attr) == Just "toc" void (pInTags tag block) -pList :: PandocMonad m => TagParser m Blocks -pList = pBulletList <|> pOrderedList <|> pDefinitionList - pBulletList :: PandocMonad m => TagParser m Blocks pBulletList = try $ do pSatisfy (matchTagOpen "ul" []) @@ -369,13 +402,15 @@ pLineBlock = try $ do B.toList ils return $ B.lineBlock lns +isDivLike :: Text -> Bool +isDivLike "div" = True +isDivLike "section" = True +isDivLike "main" = True +isDivLike _ = False + pDiv :: PandocMonad m => TagParser m Blocks pDiv = try $ do guardEnabled Ext_native_divs - let isDivLike "div" = True - isDivLike "section" = True - isDivLike "main" = True - isDivLike _ = False TagOpen tag attr' <- lookAhead $ pSatisfy $ tagOpen isDivLike (const True) let (ident, classes, kvs) = toAttr attr' contents <- pInTags tag block @@ -544,31 +579,47 @@ tagToText (TagOpen "br" _) = "\n" tagToText _ = "" inline :: PandocMonad m => TagParser m Inlines -inline = choice - [ eNoteref - , eSwitch id inline - , pTagText - , pQ - , pEmph - , pStrong - , pSuperscript - , pSubscript - , pSpanLike - , pSmall - , pStrikeout - , pUnderline - , pLineBreak - , pLink - , pImage - , pSvg - , pBdo - , pCode - , pCodeWithClass [("samp","sample"),("var","variable")] - , pSpan - , pMath False - , pScriptMath - , pRawHtmlInline - ] +inline = do + exts <- getOption readerExtensions + tag <- lookAhead pAny + case tag of + TagOpen name attr -> + case name of + "a" | extensionEnabled Ext_epub_html_exts exts + , Just "noteref" <- lookup "type" attr <|> lookup "epub:type" attr + , Just ('#',_) <- lookup "href" attr >>= T.uncons + -> eNoteref + | otherwise -> pLink + "switch" -> eSwitch id inline + "q" -> pQ + "em" -> pEmph + "i" -> pEmph + "strong" -> pStrong + "b" -> pStrong + "sup" -> pSuperscript + "sub" -> pSubscript + "small" -> pSmall + "s" -> pStrikeout + "strike" -> pStrikeout + "del" -> pStrikeout + "u" -> pUnderline + "ins" -> pUnderline + "br" -> pLineBreak + "img" -> pImage + "svg" -> pSvg + "bdo" -> pBdo + "code" -> pCode + "samp" -> pCodeWithClass "samp" "sample" + "var" -> pCodeWithClass "var" "variable" + "span" -> pSpan + "math" -> pMath False + "script" + | Just x <- lookup "type" attr + , "math/tex" `T.isPrefixOf` x -> pScriptMath + _ | name `elem` htmlSpanLikeElements -> pSpanLike + _ -> pRawHtmlInline + TagText _ -> pTagText + _ -> pRawHtmlInline pSelfClosing :: PandocMonad m => (Text -> Bool) -> ([Attribute Text] -> Bool) @@ -579,27 +630,25 @@ pSelfClosing f g = do return open pQ :: PandocMonad m => TagParser m Inlines -pQ = choice $ map try [citedQuote, normalQuote] - where citedQuote = do - tag <- pSatisfy $ tagOpenLit "q" (any ((=="cite") . fst)) - - url <- canonicalizeUrl $ fromAttrib "cite" tag - let uid = fromMaybe (fromAttrib "name" tag) $ - maybeFromAttrib "id" tag - let cls = T.words $ fromAttrib "class" tag - - makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url)]) - normalQuote = do - pSatisfy $ tagOpenLit "q" (const True) - makeQuote id - makeQuote wrapper = do - ctx <- asks quoteContext - let (constructor, innerContext) = case ctx of - InDoubleQuote -> (B.singleQuoted, InSingleQuote) - _ -> (B.doubleQuoted, InDoubleQuote) - - content <- withQuoteContext innerContext (mconcat <$> manyTill inline (pCloses "q")) - return $ extractSpaces (constructor . wrapper) content +pQ = do + TagOpen _ attrs <- pSatisfy $ tagOpenLit "q" (const True) + case lookup "cite" attrs of + Just url -> do + let uid = fromMaybe mempty $ + lookup "name" attrs <> lookup "id" attrs + let cls = maybe [] T.words $ lookup "class" attrs + url' <- canonicalizeUrl url + makeQuote $ B.spanWith (uid, cls, [("cite", escapeURI url')]) + Nothing -> makeQuote id + where + makeQuote wrapper = do + ctx <- asks quoteContext + let (constructor, innerContext) = case ctx of + InDoubleQuote -> (B.singleQuoted, InSingleQuote) + _ -> (B.doubleQuoted, InDoubleQuote) + content <- withQuoteContext innerContext + (mconcat <$> manyTill inline (pCloses "q")) + return $ extractSpaces (constructor . wrapper) content pEmph :: PandocMonad m => TagParser m Inlines pEmph = pInlinesInTags "em" B.emph <|> pInlinesInTags "i" B.emph @@ -690,13 +739,12 @@ pSvg = do UTF8.toText (encode $ UTF8.fromText rawText) return $ B.imageWith (ident,cls,[]) svgData mempty mempty -pCodeWithClass :: PandocMonad m => [(T.Text,Text)] -> TagParser m Inlines -pCodeWithClass elemToClass = try $ do - let tagTest = flip elem . fmap fst $ elemToClass - TagOpen open attr' <- pSatisfy $ tagOpen tagTest (const True) +pCodeWithClass :: PandocMonad m => Text -> Text -> TagParser m Inlines +pCodeWithClass name class' = try $ do + TagOpen open attr' <- pSatisfy $ tagOpen (== name) (const True) result <- manyTill pAny (pCloses open) let (ids,cs,kvs) = toAttr attr' - cs' = maybe cs (:cs) . lookup open $ elemToClass + cs' = class' : cs return . B.codeWith (ids,cs',kvs) . T.unwords . T.lines . innerText $ result -- cgit v1.2.3 From 0f955b10b455e9b3d326262d03261f17538a6943 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 20 Feb 2021 18:57:21 -0800 Subject: T.P.Readers.LaTeX.Parsing: improve braced'. Remove the parameter, have it parse the opening brace, and make it more efficient. --- src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index 313aa6c51..dab4d334e 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -680,28 +680,25 @@ grouped parser = try $ do -- {{a,b}} should be parsed the same as {a,b} try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup) -braced' :: PandocMonad m => LP m Tok -> Int -> LP m [Tok] -braced' getTok n = - handleEgroup <|> handleBgroup <|> handleOther - where handleEgroup = do - t <- symbol '}' - if n == 1 - then return [] - else (t:) <$> braced' getTok (n - 1) - handleBgroup = do - t <- symbol '{' - (t:) <$> braced' getTok (n + 1) - handleOther = do - t <- getTok - (t:) <$> braced' getTok n +braced' :: PandocMonad m => LP m Tok -> LP m [Tok] +braced' getTok = symbol '{' *> go (1 :: Int) + where + go n = do + t <- getTok + case t of + Tok _ Symbol "}" + | n > 1 -> (t:) <$> go (n - 1) + | otherwise -> return [] + Tok _ Symbol "{" -> (t:) <$> go (n + 1) + _ -> (t:) <$> go n braced :: PandocMonad m => LP m [Tok] -braced = symbol '{' *> braced' anyTok 1 +braced = braced' anyTok -- URLs require special handling, because they can contain % -- characters. So we retonenize comments as we go... bracedUrl :: PandocMonad m => LP m [Tok] -bracedUrl = bgroup *> braced' (retokenizeComment >> anyTok) 1 +bracedUrl = braced' (retokenizeComment >> anyTok) -- For handling URLs, which allow literal % characters... retokenizeComment :: PandocMonad m => LP m () -- cgit v1.2.3 From 31b8f60ea82d96b370cf4a765c46c18004ff6fa8 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 20 Feb 2021 22:03:29 -0800 Subject: LaTeX reader: avoid macro resolution code if no macros defined. --- src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 35 +++++++++++++++++--------------- 1 file changed, 19 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index dab4d334e..fc8542894 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -453,20 +453,24 @@ doMacros = do updateState $ \st -> st{ sExpanded = True } doMacros' :: PandocMonad m => Int -> [Tok] -> LP m [Tok] -doMacros' n inp = - case inp of - Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" : - Tok _ Word name : Tok _ Symbol "}" : ts - -> handleMacros n spos name ts - Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" : - Tok _ Word name : Tok _ Symbol "}" : ts - -> handleMacros n spos ("end" <> name) ts - Tok _ (CtrlSeq "expandafter") _ : t : ts - -> combineTok t <$> doMacros' n ts - Tok spos (CtrlSeq name) _ : ts - -> handleMacros n spos name ts - _ -> return inp - <|> return inp +doMacros' n inp = do + macros <- sMacros <$> getState + if M.null macros + then return inp + else + case inp of + Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" : + Tok _ Word name : Tok _ Symbol "}" : ts + -> handleMacros macros n spos name ts + Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" : + Tok _ Word name : Tok _ Symbol "}" : ts + -> handleMacros macros n spos ("end" <> name) ts + Tok _ (CtrlSeq "expandafter") _ : t : ts + -> combineTok t <$> doMacros' n ts + Tok spos (CtrlSeq name) _ : ts + -> handleMacros macros n spos name ts + _ -> return inp + <|> return inp where combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts) @@ -507,10 +511,9 @@ doMacros' n inp = Tok spos (CtrlSeq x) (txt <> " ") : acc addTok _ _ spos t acc = setpos spos t : acc - handleMacros n' spos name ts = do + handleMacros macros n' spos name ts = do when (n' > 20) -- detect macro expansion loops $ throwError $ PandocMacroLoop name - macros <- sMacros <$> getState case M.lookup name macros of Nothing -> mzero Just (Macro expansionPoint argspecs optarg newtoks) -> do -- cgit v1.2.3 From cec541e54cd947c8032f9148db18104cd1a48783 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 20 Feb 2021 22:14:31 -0800 Subject: LaTeX reader: Another small improvement to macro handling. --- src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index fc8542894..953747d2f 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -461,16 +461,15 @@ doMacros' n inp = do case inp of Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" : Tok _ Word name : Tok _ Symbol "}" : ts - -> handleMacros macros n spos name ts + -> handleMacros macros n spos name ts <|> return inp Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" : Tok _ Word name : Tok _ Symbol "}" : ts - -> handleMacros macros n spos ("end" <> name) ts + -> handleMacros macros n spos ("end" <> name) ts <|> return inp Tok _ (CtrlSeq "expandafter") _ : t : ts -> combineTok t <$> doMacros' n ts Tok spos (CtrlSeq name) _ : ts - -> handleMacros macros n spos name ts + -> handleMacros macros n spos name ts <|> return inp _ -> return inp - <|> return inp where combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts) -- cgit v1.2.3 From 321343b2cf8a4a75abe1b6713aa40e278ca57997 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 20 Feb 2021 22:49:20 -0800 Subject: HTML reader: small efficiency improvements. Also, remove exported class NamedTag(..) [API change]. This was just intended to smooth over the transition from String to Text and is no longer needed. The functions isInlineTag and isBlockTag are no longer polymorphic. --- src/Text/Pandoc/Readers/HTML.hs | 43 +++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 25 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 47856d2f7..50201fe77 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -19,7 +19,6 @@ module Text.Pandoc.Readers.HTML ( readHtml , htmlInBalanced , isInlineTag , isBlockTag - , NamedTag(..) , isTextTag , isCommentTag ) where @@ -579,9 +578,9 @@ tagToText (TagOpen "br" _) = "\n" tagToText _ = "" inline :: PandocMonad m => TagParser m Inlines -inline = do +inline = pTagText <|> do + tag <- lookAhead (pSatisfy isInlineTag) exts <- getOption readerExtensions - tag <- lookAhead pAny case tag of TagOpen name attr -> case name of @@ -935,27 +934,21 @@ pSpace = many1 (satisfy isSpace) >>= \xs -> then return B.softbreak else return B.space -class NamedTag a where - getTagName :: a -> Maybe Text - -instance NamedTag (Tag Text) where - getTagName (TagOpen t _) = Just t - getTagName (TagClose t) = Just t - getTagName _ = Nothing +getTagName :: Tag Text -> Maybe Text +getTagName (TagOpen t _) = Just t +getTagName (TagClose t) = Just t +getTagName _ = Nothing -instance NamedTag (Tag String) where - getTagName (TagOpen t _) = Just (T.pack t) - getTagName (TagClose t) = Just (T.pack t) - getTagName _ = Nothing - -isInlineTag :: NamedTag (Tag a) => Tag a -> Bool +isInlineTag :: Tag Text -> Bool isInlineTag t = - isCommentTag t || case getTagName t of - Nothing -> False - Just x -> x `Set.notMember` blockTags || - T.take 1 x == "?" -- processing instr. - -isBlockTag :: NamedTag (Tag a) => Tag a -> Bool + isCommentTag t || + case getTagName t of + Nothing -> False + Just "script" -> "math/tex" `T.isPrefixOf` fromAttrib "type" t + Just x -> x `Set.notMember` blockTags || + T.take 1 x == "?" -- processing instr. + +isBlockTag :: Tag Text -> Bool isBlockTag t = isBlockTagName || isTagComment t where isBlockTagName = case getTagName t of @@ -966,10 +959,10 @@ isBlockTag t = isBlockTagName || isTagComment t || x `Set.member` eitherBlockOrInline Nothing -> False -isTextTag :: Tag a -> Bool +isTextTag :: Tag Text -> Bool isTextTag = tagText (const True) -isCommentTag :: Tag a -> Bool +isCommentTag :: Tag Text -> Bool isCommentTag = tagComment (const True) --- parsers for use in markdown, textile readers @@ -1018,7 +1011,7 @@ htmlInBalanced' tagname ts = fromMaybe [] $ go 0 ts go n (t:ts') = (t :) <$> go n ts' go _ [] = mzero -hasTagWarning :: [Tag a] -> Bool +hasTagWarning :: [Tag Text] -> Bool hasTagWarning (TagWarning _:_) = True hasTagWarning _ = False -- cgit v1.2.3 From d8ef383692a167c97c67114107878a60d0aee6e1 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 20 Feb 2021 23:01:07 -0800 Subject: T.P.Shared: remove some obsolete functions [API change]. Removed: - `splitByIndices` - `splitStringByIndicies` - `substitute` - `underlineSpan` None of these are used elsewhere in the code base. --- src/Text/Pandoc/Shared.hs | 44 +------------------------------------------- 1 file changed, 1 insertion(+), 43 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index a579681b1..922df7922 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -21,10 +21,7 @@ module Text.Pandoc.Shared ( -- * List processing splitBy, splitTextBy, - splitByIndices, - splitStringByIndices, splitTextByIndices, - substitute, ordNub, findM, -- * Text processing @@ -74,7 +71,6 @@ module Text.Pandoc.Shared ( addMetaField, makeMeta, eastAsianLineBreakFilter, - underlineSpan, htmlSpanLikeElements, splitSentences, filterIpynbOutput, @@ -113,7 +109,7 @@ import qualified Data.Bifunctor as Bifunctor import Data.Char (isAlpha, isLower, isSpace, isUpper, toLower, isAlphaNum, generalCategory, GeneralCategory(NonSpacingMark, SpacingCombiningMark, EnclosingMark, ConnectorPunctuation)) -import Data.List (find, intercalate, intersperse, stripPrefix, sortOn) +import Data.List (find, intercalate, intersperse, sortOn) import qualified Data.Map as M import Data.Maybe (mapMaybe, fromMaybe) import Data.Monoid (Any (..)) @@ -160,27 +156,6 @@ splitTextBy isSep t | otherwise = let (first, rest) = T.break isSep t in first : splitTextBy isSep (T.dropWhile isSep rest) -{-# DEPRECATED splitByIndices "This function is slated for removal" #-} -splitByIndices :: [Int] -> [a] -> [[a]] -splitByIndices [] lst = [lst] -splitByIndices (x:xs) lst = first:splitByIndices (map (\y -> y - x) xs) rest - where (first, rest) = splitAt x lst - -{-# DEPRECATED splitStringByIndices "This function is slated for removal" #-} --- | Split string into chunks divided at specified indices. -splitStringByIndices :: [Int] -> [Char] -> [[Char]] -splitStringByIndices [] lst = [lst] -splitStringByIndices (x:xs) lst = - let (first, rest) = splitAt' x lst in - first : splitStringByIndices (map (\y -> y - x) xs) rest - --- DEPRECATED: can be removed when splitStringByIndices is -splitAt' :: Int -> [Char] -> ([Char],[Char]) -splitAt' _ [] = ([],[]) -splitAt' n xs | n <= 0 = ([],xs) -splitAt' n (x:xs) = (x:ys,zs) - where (ys,zs) = splitAt' (n - charWidth x) xs - splitTextByIndices :: [Int] -> T.Text -> [T.Text] splitTextByIndices ns = splitTextByRelIndices (zipWith (-) ns (0:ns)) where @@ -189,16 +164,6 @@ splitTextByIndices ns = splitTextByRelIndices (zipWith (-) ns (0:ns)) let (first, rest) = T.splitAt x t in first : splitTextByRelIndices xs rest -{-# DEPRECATED substitute "This function is slated for removal" #-} --- | Replace each occurrence of one sublist in a list with another. -substitute :: (Eq a) => [a] -> [a] -> [a] -> [a] -substitute _ _ [] = [] -substitute [] _ xs = xs -substitute target replacement lst@(x:xs) = - case stripPrefix target lst of - Just lst' -> replacement ++ substitute target replacement lst' - Nothing -> x : substitute target replacement xs - ordNub :: (Ord a) => [a] -> [a] ordNub l = go Set.empty l where @@ -765,13 +730,6 @@ eastAsianLineBreakFilter = bottomUp go go xs = xs -{-# DEPRECATED underlineSpan "Use Text.Pandoc.Builder.underline instead" #-} --- | Builder for underline (deprecated). --- This probably belongs in Builder.hs in pandoc-types. --- Will be replaced once Underline is an element. -underlineSpan :: Inlines -> Inlines -underlineSpan = B.underline - -- | Set of HTML elements that are represented as Span with a class equal as -- the element tag itself. htmlSpanLikeElements :: Set.Set T.Text -- cgit v1.2.3 From c0c8865eaac7089b0304c9b21d981f82ea4c2ebd Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 20 Feb 2021 23:40:02 -0800 Subject: HTML reader: small performance tweak. --- src/Text/Pandoc/Readers/HTML.hs | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 50201fe77..b73c138ab 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -157,11 +157,10 @@ pHead = pInTags "head" $ pTitle <|> pMetaTag <|> pBaseTag <|> (mempty <$ pAny) return mempty block :: PandocMonad m => TagParser m Blocks -block = do +block = ((do + tag <- lookAhead (pSatisfy isBlockTag) exts <- getOption readerExtensions - tag <- lookAhead pAny - res <- - (case tag of + case tag of TagOpen name attr -> let type' = fromMaybe "" $ lookup "type" attr <|> lookup "epub:type" attr @@ -214,11 +213,8 @@ block = do | epubExts -> eSwitch B.para block _ -> mzero - _ -> mzero) - <|> pPlain - <|> pRawHtmlBlock - trace (T.take 60 $ tshow $ B.toList res) - return res + _ -> mzero) <|> pPlain <|> pRawHtmlBlock) >>= \res -> + res <$ trace (T.take 60 $ tshow $ B.toList res) namespaces :: PandocMonad m => [(Text, TagParser m Inlines)] namespaces = [(mathMLNamespace, pMath True)] -- cgit v1.2.3 From f43cb5ddcf56ab9387b24ad55c2c30eceb606fad Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 21 Feb 2021 10:26:48 -0800 Subject: LaTeX reader: further performance optimization. Avoid unnecessary 'doMacros'. --- src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 42 +++++++++++++++----------------- 1 file changed, 19 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index 953747d2f..20311651b 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -446,30 +446,25 @@ satisfyTok f = do doMacros :: PandocMonad m => LP m () doMacros = do - expanded <- sExpanded <$> getState - verbatimMode <- sVerbatimMode <$> getState - unless (expanded || verbatimMode) $ do - getInput >>= doMacros' 1 >>= setInput - updateState $ \st -> st{ sExpanded = True } + st <- getState + unless (sExpanded st || sVerbatimMode st || M.null (sMacros st)) $ do + getInput >>= doMacros' 1 >>= setInput + updateState $ \s -> s{ sExpanded = True } doMacros' :: PandocMonad m => Int -> [Tok] -> LP m [Tok] -doMacros' n inp = do - macros <- sMacros <$> getState - if M.null macros - then return inp - else - case inp of - Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" : - Tok _ Word name : Tok _ Symbol "}" : ts - -> handleMacros macros n spos name ts <|> return inp - Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" : - Tok _ Word name : Tok _ Symbol "}" : ts - -> handleMacros macros n spos ("end" <> name) ts <|> return inp - Tok _ (CtrlSeq "expandafter") _ : t : ts - -> combineTok t <$> doMacros' n ts - Tok spos (CtrlSeq name) _ : ts - -> handleMacros macros n spos name ts <|> return inp - _ -> return inp +doMacros' n inp = + case inp of + Tok spos (CtrlSeq "begin") _ : Tok _ Symbol "{" : + Tok _ Word name : Tok _ Symbol "}" : ts + -> handleMacros n spos name ts <|> return inp + Tok spos (CtrlSeq "end") _ : Tok _ Symbol "{" : + Tok _ Word name : Tok _ Symbol "}" : ts + -> handleMacros n spos ("end" <> name) ts <|> return inp + Tok _ (CtrlSeq "expandafter") _ : t : ts + -> combineTok t <$> doMacros' n ts + Tok spos (CtrlSeq name) _ : ts + -> handleMacros n spos name ts <|> return inp + _ -> return inp where combineTok (Tok spos (CtrlSeq name) x) (Tok _ Word w : ts) @@ -510,9 +505,10 @@ doMacros' n inp = do Tok spos (CtrlSeq x) (txt <> " ") : acc addTok _ _ spos t acc = setpos spos t : acc - handleMacros macros n' spos name ts = do + handleMacros n' spos name ts = do when (n' > 20) -- detect macro expansion loops $ throwError $ PandocMacroLoop name + macros <- sMacros <$> getState case M.lookup name macros of Nothing -> mzero Just (Macro expansionPoint argspecs optarg newtoks) -> do -- cgit v1.2.3 From db4f8823152578d199691f8084fddcc0f04f679b Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 21 Feb 2021 11:23:04 -0800 Subject: LaTeX reader: removed sExpanded in state. This isn't actually needed and checking it doesn't change anything. Also remove an unnecessary `doMacros` before `satisfyTok`, which does it anyway. --- src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index 20311651b..0e098d9d9 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -155,7 +155,6 @@ data LaTeXState = LaTeXState{ sOptions :: ReaderOptions , sLabels :: M.Map Text [Inline] , sHasChapters :: Bool , sToggles :: M.Map Text Bool - , sExpanded :: Bool , sFileContents :: M.Map Text Text , sEnableWithRaw :: Bool , sRawTokens :: IntMap.IntMap [Tok] @@ -183,7 +182,6 @@ defaultLaTeXState = LaTeXState{ sOptions = def , sLabels = M.empty , sHasChapters = False , sToggles = M.empty - , sExpanded = False , sFileContents = M.empty , sEnableWithRaw = True , sRawTokens = IntMap.empty @@ -256,7 +254,6 @@ rawLaTeXParser toks retokenize parser valParser = do Right toks' -> do res <- lift $ runParserT (do when retokenize $ do -- retokenize, applying macros - doMacros ts <- many (satisfyTok (const True)) setInput ts rawparser) @@ -432,8 +429,7 @@ satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok satisfyTok f = do doMacros -- apply macros on remaining input stream res <- tokenPrim (T.unpack . untoken) updatePos matcher - updateState $ \st -> st{ sExpanded = False - , sRawTokens = + updateState $ \st -> st{ sRawTokens = if sEnableWithRaw st then IntMap.map (res:) $ sRawTokens st else sRawTokens st } @@ -447,9 +443,8 @@ satisfyTok f = do doMacros :: PandocMonad m => LP m () doMacros = do st <- getState - unless (sExpanded st || sVerbatimMode st || M.null (sMacros st)) $ do + unless (sVerbatimMode st || M.null (sMacros st)) $ do getInput >>= doMacros' 1 >>= setInput - updateState $ \s -> s{ sExpanded = True } doMacros' :: PandocMonad m => Int -> [Tok] -> LP m [Tok] doMacros' n inp = -- cgit v1.2.3 From 2b37ed9f212b711a6381dc89ff7d3431d7c5d916 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 21 Feb 2021 11:29:38 -0800 Subject: LaTeX reader: further optimizations in satisfyTok. Benchmarks show 2/3 of the run time and 2/3 of the allocation of the Feb. 10 benchmarks. --- src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index 0e098d9d9..c2e10570d 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -429,11 +429,11 @@ satisfyTok :: PandocMonad m => (Tok -> Bool) -> LP m Tok satisfyTok f = do doMacros -- apply macros on remaining input stream res <- tokenPrim (T.unpack . untoken) updatePos matcher - updateState $ \st -> st{ sRawTokens = - if sEnableWithRaw st - then IntMap.map (res:) $ sRawTokens st - else sRawTokens st } - return res + updateState $ \st -> + if sEnableWithRaw st + then st{ sRawTokens = IntMap.map (res:) $ sRawTokens st } + else st + return $! res where matcher t | f t = Just t | otherwise = Nothing updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos -- cgit v1.2.3 From 80fde18fb1d983b938476ed5b3771ed5d6158d44 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 22 Feb 2021 11:30:07 -0800 Subject: Text.Pandoc.UTF8: change IO functions to return Text, not String. [API change] This affects `readFile`, `getContents`, `writeFileWith`, `writeFile`, `putStrWith`, `putStr`, `putStrLnWith`, `putStrLn`. `hPutStrWith`, `hPutStr`, `hPutStrLnWith`, `hPutStrLn`, `hGetContents`. This avoids the need to uselessly create a linked list of characters when emiting output. --- src/Text/Pandoc/App.hs | 10 ++++---- src/Text/Pandoc/App/CommandLineOptions.hs | 38 +++++++++++++++------------- src/Text/Pandoc/App/OutputSettings.hs | 4 +-- src/Text/Pandoc/Class/IO.hs | 6 ++--- src/Text/Pandoc/Error.hs | 2 +- src/Text/Pandoc/PDF.hs | 25 +++++++++--------- src/Text/Pandoc/Parsing.hs | 2 +- src/Text/Pandoc/UTF8.hs | 42 +++++++++++++++---------------- test/Tests/Command.hs | 2 +- test/Tests/Old.hs | 5 ++-- test/Tests/Readers/LaTeX.hs | 2 +- 11 files changed, 70 insertions(+), 68 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 6a071ad5a..63996828e 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -73,8 +73,9 @@ convertWithOpts opts = do let verbosity = optVerbosity opts when (optDumpArgs opts) $ - do UTF8.hPutStrLn stdout outputFile - mapM_ (UTF8.hPutStrLn stdout) (fromMaybe ["-"] $ optInputFiles opts) + do UTF8.hPutStrLn stdout (T.pack outputFile) + mapM_ (UTF8.hPutStrLn stdout . T.pack) + (fromMaybe ["-"] $ optInputFiles opts) exitSuccess let sources = case optInputFiles opts of @@ -354,6 +355,5 @@ writeFnBinary "-" = liftIO . BL.putStr writeFnBinary f = liftIO . BL.writeFile (UTF8.encodePath f) writerFn :: MonadIO m => IO.Newline -> FilePath -> Text -> m () --- TODO this implementation isn't maximally efficient: -writerFn eol "-" = liftIO . UTF8.putStrWith eol . T.unpack -writerFn eol f = liftIO . UTF8.writeFileWith eol f . T.unpack +writerFn eol "-" = liftIO . UTF8.putStrWith eol +writerFn eol f = liftIO . UTF8.writeFileWith eol f diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs index a4c510d97..0a8193f6c 100644 --- a/src/Text/Pandoc/App/CommandLineOptions.hs +++ b/src/Text/Pandoc/App/CommandLineOptions.hs @@ -812,10 +812,10 @@ options = map (\c -> ['-',c]) shorts ++ map ("--" ++) longs let allopts = unwords (concatMap optnames options) - UTF8.hPutStrLn stdout $ printf tpl allopts - (unwords readersNames) - (unwords writersNames) - (unwords $ map (T.unpack . fst) highlightingStyles) + UTF8.hPutStrLn stdout $ T.pack $ printf tpl allopts + (T.unpack $ T.unwords readersNames) + (T.unpack $ T.unwords writersNames) + (T.unpack $ T.unwords $ map fst highlightingStyles) (unwords datafiles) exitSuccess )) "" -- "Print bash completion script" @@ -854,7 +854,7 @@ options = else if extensionEnabled x allExts then '-' else ' ') : drop 4 (show x) - mapM_ (UTF8.hPutStrLn stdout . showExt) + mapM_ (UTF8.hPutStrLn stdout . T.pack . showExt) [ex | ex <- extList, extensionEnabled ex allExts] exitSuccess ) "FORMAT") @@ -868,14 +868,14 @@ options = , sShortname s `notElem` [T.pack "Alert", T.pack "Alert_indent"] ] - mapM_ (UTF8.hPutStrLn stdout) (sort langs) + mapM_ (UTF8.hPutStrLn stdout . T.pack) (sort langs) exitSuccess )) "" , Option "" ["list-highlight-styles"] (NoArg (\_ -> do - mapM_ (UTF8.hPutStrLn stdout . T.unpack . fst) highlightingStyles + mapM_ (UTF8.hPutStrLn stdout . fst) highlightingStyles exitSuccess )) "" @@ -893,7 +893,7 @@ options = | T.null t -> -- e.g. for docx, odt, json: E.throwIO $ PandocCouldNotFindDataFileError $ T.pack ("templates/default." ++ arg) - | otherwise -> write . T.unpack $ t + | otherwise -> write t Left e -> E.throwIO e exitSuccess) "FORMAT") @@ -940,11 +940,13 @@ options = (\_ -> do prg <- getProgName defaultDatadirs <- defaultUserDataDirs - UTF8.hPutStrLn stdout (prg ++ " " ++ T.unpack pandocVersion ++ - compileInfo ++ - "\nUser data directory: " ++ - intercalate " or " defaultDatadirs ++ - ('\n':copyrightMessage)) + UTF8.hPutStrLn stdout + $ T.pack + $ prg ++ " " ++ T.unpack pandocVersion ++ + compileInfo ++ + "\nUser data directory: " ++ + intercalate " or " defaultDatadirs ++ + ('\n':copyrightMessage) exitSuccess )) "" -- "Print version" @@ -952,7 +954,7 @@ options = (NoArg (\_ -> do prg <- getProgName - UTF8.hPutStr stdout (usageMessage prg options) + UTF8.hPutStr stdout (T.pack $ usageMessage prg options) exitSuccess )) "" -- "Show help" ] @@ -1013,12 +1015,12 @@ handleUnrecognizedOption "-R" = handleUnrecognizedOption "--parse-raw" handleUnrecognizedOption x = (("Unknown option " ++ x ++ ".") :) -readersNames :: [String] -readersNames = sort (map (T.unpack . fst) (readers :: [(Text, Reader PandocIO)])) +readersNames :: [Text] +readersNames = sort (map fst (readers :: [(Text, Reader PandocIO)])) -writersNames :: [String] +writersNames :: [Text] writersNames = sort - ("pdf" : map (T.unpack . fst) (writers :: [(Text, Writer PandocIO)])) + ("pdf" : map fst (writers :: [(Text, Writer PandocIO)])) splitField :: String -> (String, String) splitField = second (tailDef "true") . break (`elemText` ":=") diff --git a/src/Text/Pandoc/App/OutputSettings.hs b/src/Text/Pandoc/App/OutputSettings.hs index 53c7d82ef..3864ab188 100644 --- a/src/Text/Pandoc/App/OutputSettings.hs +++ b/src/Text/Pandoc/App/OutputSettings.hs @@ -59,8 +59,8 @@ optToOutputSettings opts = do let outputFile = fromMaybe "-" (optOutputFile opts) when (optDumpArgs opts) . liftIO $ do - UTF8.hPutStrLn stdout outputFile - mapM_ (UTF8.hPutStrLn stdout) (fromMaybe [] $ optInputFiles opts) + UTF8.hPutStrLn stdout (T.pack outputFile) + mapM_ (UTF8.hPutStrLn stdout . T.pack) (fromMaybe [] $ optInputFiles opts) exitSuccess epubMetadata <- traverse readUtf8File $ optEpubMetadata opts diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs index eecda5711..bb4e2b732 100644 --- a/src/Text/Pandoc/Class/IO.hs +++ b/src/Text/Pandoc/Class/IO.hs @@ -183,7 +183,7 @@ getModificationTime = liftIOError System.Directory.getModificationTime logOutput :: (PandocMonad m, MonadIO m) => LogMessage -> m () logOutput msg = liftIO $ do UTF8.hPutStr stderr $ - "[" ++ show (messageVerbosity msg) ++ "] " + "[" <> T.pack (show (messageVerbosity msg)) <> "] " alertIndent $ T.lines $ showLogMessage msg -- | Prints the list of lines to @stderr@, indenting every but the first @@ -191,10 +191,10 @@ logOutput msg = liftIO $ do alertIndent :: [Text] -> IO () alertIndent [] = return () alertIndent (l:ls) = do - UTF8.hPutStrLn stderr $ unpack l + UTF8.hPutStrLn stderr l mapM_ go ls where go l' = do UTF8.hPutStr stderr " " - UTF8.hPutStrLn stderr $ unpack l' + UTF8.hPutStrLn stderr l' -- | Extract media from the mediabag into a directory. extractMedia :: (PandocMonad m, MonadIO m) => FilePath -> Pandoc -> m Pandoc diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 94c013cdb..0fdb7bfe5 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -191,6 +191,6 @@ handleError (Left e) = err :: Int -> Text -> IO a err exitCode msg = do - UTF8.hPutStrLn stderr (T.unpack msg) + UTF8.hPutStrLn stderr msg exitWith $ ExitFailure exitCode return undefined diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index 3f9dd8dad..6f462aad5 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -270,7 +270,7 @@ missingCharacterWarnings verbosity log' = do | isAscii c = T.singleton c | otherwise = T.pack $ c : " (U+" ++ printf "%04X" (ord c) ++ ")" let addCodePoint = T.concatMap toCodePoint - let warnings = [ addCodePoint (T.pack $ utf8ToString (BC.drop 19 l)) + let warnings = [ addCodePoint (utf8ToText (BC.drop 19 l)) | l <- ls , isMissingCharacterWarning l ] @@ -314,7 +314,7 @@ runTectonic verbosity program args' tmpDir' source = do env <- liftIO getEnvironment when (verbosity >= INFO) $ liftIO $ showVerboseInfo (Just tmpDir) program programArgs env - (utf8ToString sourceBL) + (utf8ToText sourceBL) (exit, out) <- liftIO $ E.catch (pipeProcess (Just env) program programArgs sourceBL) (handlePDFProgramNotFound program) @@ -385,7 +385,7 @@ runTeXProgram verbosity program args numRuns tmpDir' source = do (pipeProcess (Just env'') program programArgs BL.empty) (handlePDFProgramNotFound program) when (verbosity >= INFO) $ liftIO $ do - UTF8.hPutStrLn stderr $ "[makePDF] Run #" ++ show runNumber + UTF8.hPutStrLn stderr $ "[makePDF] Run #" <> tshow runNumber BL.hPutStr stderr out UTF8.hPutStr stderr "\n" if runNumber < numRuns @@ -405,7 +405,7 @@ generic2pdf :: Verbosity generic2pdf verbosity program args source = do env' <- getEnvironment when (verbosity >= INFO) $ - showVerboseInfo Nothing program args env' (T.unpack source) + showVerboseInfo Nothing program args env' source (exit, out) <- E.catch (pipeProcess (Just env') program args (BL.fromStrict $ UTF8.fromText source)) @@ -494,19 +494,20 @@ showVerboseInfo :: Maybe FilePath -> String -> [String] -> [(String, String)] - -> String + -> Text -> IO () showVerboseInfo mbTmpDir program programArgs env source = do case mbTmpDir of Just tmpDir -> do UTF8.hPutStrLn stderr "[makePDF] temp dir:" - UTF8.hPutStrLn stderr tmpDir + UTF8.hPutStrLn stderr (T.pack tmpDir) Nothing -> return () UTF8.hPutStrLn stderr "[makePDF] Command line:" - UTF8.hPutStrLn stderr $ program ++ " " ++ unwords (map show programArgs) + UTF8.hPutStrLn stderr $ + T.pack program <> " " <> T.pack (unwords (map show programArgs)) UTF8.hPutStr stderr "\n" UTF8.hPutStrLn stderr "[makePDF] Environment:" - mapM_ (UTF8.hPutStrLn stderr . show) env + mapM_ (UTF8.hPutStrLn stderr . tshow) env UTF8.hPutStr stderr "\n" UTF8.hPutStrLn stderr "[makePDF] Source:" UTF8.hPutStrLn stderr source @@ -517,8 +518,8 @@ handlePDFProgramNotFound program e E.throwIO $ PandocPDFProgramNotFoundError $ T.pack program | otherwise = E.throwIO e -utf8ToString :: ByteString -> String -utf8ToString lbs = +utf8ToText :: ByteString -> Text +utf8ToText lbs = case decodeUtf8' lbs of - Left _ -> BC.unpack lbs -- if decoding fails, treat as latin1 - Right t -> TL.unpack t + Left _ -> T.pack $ BC.unpack lbs -- if decoding fails, treat as latin1 + Right t -> TL.toStrict t diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 4c4dd531d..8d3799c3e 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -1148,7 +1148,7 @@ testStringWith :: Show a => ParserT Text ParserState Identity a -> Text -> IO () -testStringWith parser str = UTF8.putStrLn $ show $ +testStringWith parser str = UTF8.putStrLn $ tshow $ readWith parser defaultParserState str -- | Parsing options. diff --git a/src/Text/Pandoc/UTF8.hs b/src/Text/Pandoc/UTF8.hs index 567f5abe5..4d5921faf 100644 --- a/src/Text/Pandoc/UTF8.hs +++ b/src/Text/Pandoc/UTF8.hs @@ -39,67 +39,65 @@ where import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy.Char8 as BL +import Data.Text (Text) import qualified Data.Text as T +import qualified Data.Text.IO as TIO import qualified Data.Text.Encoding as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TL import Prelude hiding (getContents, putStr, putStrLn, readFile, writeFile) import System.IO hiding (getContents, hGetContents, hPutStr, hPutStrLn, putStr, putStrLn, readFile, writeFile) -import qualified System.IO as IO -readFile :: FilePath -> IO String +readFile :: FilePath -> IO Text readFile f = do h <- openFile (encodePath f) ReadMode hGetContents h -getContents :: IO String +getContents :: IO Text getContents = hGetContents stdin -writeFileWith :: Newline -> FilePath -> String -> IO () +writeFileWith :: Newline -> FilePath -> Text -> IO () writeFileWith eol f s = withFile (encodePath f) WriteMode $ \h -> hPutStrWith eol h s -writeFile :: FilePath -> String -> IO () +writeFile :: FilePath -> Text -> IO () writeFile = writeFileWith nativeNewline -putStrWith :: Newline -> String -> IO () +putStrWith :: Newline -> Text -> IO () putStrWith eol s = hPutStrWith eol stdout s -putStr :: String -> IO () +putStr :: Text -> IO () putStr = putStrWith nativeNewline -putStrLnWith :: Newline -> String -> IO () +putStrLnWith :: Newline -> Text -> IO () putStrLnWith eol s = hPutStrLnWith eol stdout s -putStrLn :: String -> IO () +putStrLn :: Text -> IO () putStrLn = putStrLnWith nativeNewline -hPutStrWith :: Newline -> Handle -> String -> IO () +hPutStrWith :: Newline -> Handle -> Text -> IO () hPutStrWith eol h s = hSetNewlineMode h (NewlineMode eol eol) >> - hSetEncoding h utf8 >> IO.hPutStr h s + hSetEncoding h utf8 >> TIO.hPutStr h s -hPutStr :: Handle -> String -> IO () +hPutStr :: Handle -> Text -> IO () hPutStr = hPutStrWith nativeNewline -hPutStrLnWith :: Newline -> Handle -> String -> IO () +hPutStrLnWith :: Newline -> Handle -> Text -> IO () hPutStrLnWith eol h s = hSetNewlineMode h (NewlineMode eol eol) >> - hSetEncoding h utf8 >> IO.hPutStrLn h s + hSetEncoding h utf8 >> TIO.hPutStrLn h s -hPutStrLn :: Handle -> String -> IO () +hPutStrLn :: Handle -> Text -> IO () hPutStrLn = hPutStrLnWith nativeNewline -hGetContents :: Handle -> IO String -hGetContents = fmap toString . B.hGetContents --- hGetContents h = hSetEncoding h utf8_bom --- >> hSetNewlineMode h universalNewlineMode --- >> IO.hGetContents h +hGetContents :: Handle -> IO Text +hGetContents = fmap toText . B.hGetContents -- | Convert UTF8-encoded ByteString to Text, also -- removing '\r' characters. -toText :: B.ByteString -> T.Text +toText :: B.ByteString -> Text toText = T.decodeUtf8 . filterCRs . dropBOM where dropBOM bs = if "\xEF\xBB\xBF" `B.isPrefixOf` bs @@ -127,7 +125,7 @@ toTextLazy = TL.decodeUtf8 . filterCRs . dropBOM toStringLazy :: BL.ByteString -> String toStringLazy = TL.unpack . toTextLazy -fromText :: T.Text -> B.ByteString +fromText :: Text -> B.ByteString fromText = T.encodeUtf8 fromTextLazy :: TL.Text -> BL.ByteString diff --git a/test/Tests/Command.hs b/test/Tests/Command.hs index 07d825f73..59b04eac1 100644 --- a/test/Tests/Command.hs +++ b/test/Tests/Command.hs @@ -130,7 +130,7 @@ runCommandTest testExePath fp num code = let cmdline = "% " <> cmd let x = cmdline <> "\n" <> input <> "^D\n" <> norm let y = cmdline <> "\n" <> input <> "^D\n" <> newnorm - let updated = T.unpack $ T.replace (T.pack x) (T.pack y) (T.pack raw) + let updated = T.replace (T.pack x) (T.pack y) raw UTF8.writeFile fp' updated extractCommandTest :: FilePath -> FilePath -> TestTree diff --git a/test/Tests/Old.hs b/test/Tests/Old.hs index 17ece49fd..160086be2 100644 --- a/test/Tests/Old.hs +++ b/test/Tests/Old.hs @@ -22,6 +22,7 @@ import Test.Tasty (TestTree, testGroup) import Test.Tasty.Golden.Advanced (goldenTest) import Tests.Helpers hiding (test) import qualified Text.Pandoc.UTF8 as UTF8 +import qualified Data.Text as T tests :: FilePath -> [TestTree] tests pandocPath = @@ -231,7 +232,7 @@ tests pandocPath = -- makes sure file is fully closed after reading readFile' :: FilePath -> IO String readFile' f = do s <- UTF8.readFile f - return $! (length s `seq` s) + return $! (T.length s `seq` T.unpack s) lhsWriterTests :: FilePath -> String -> [TestTree] lhsWriterTests pandocPath format @@ -333,7 +334,7 @@ testWithNormalize normalizer pandocPath testname opts inp norm = $ UTF8.toStringLazy out -- filter \r so the tests will work on Windows machines else fail $ "Pandoc failed with error code " ++ show ec - updateGolden = UTF8.writeFile norm + updateGolden = UTF8.writeFile norm . T.pack options = ["--data-dir=../data","--quiet"] ++ [inp] ++ opts compareValues :: FilePath -> [String] -> String -> String -> IO (Maybe String) diff --git a/test/Tests/Readers/LaTeX.hs b/test/Tests/Readers/LaTeX.hs index 77104c853..8385b751e 100644 --- a/test/Tests/Readers/LaTeX.hs +++ b/test/Tests/Readers/LaTeX.hs @@ -54,7 +54,7 @@ tokUntokRt s = untokenize (tokenize "random" t) == t tests :: [TestTree] tests = [ testGroup "tokenization" [ testCase "tokenizer round trip on test case" $ do - orig <- T.pack <$> UTF8.readFile "../test/latex-reader.latex" + orig <- UTF8.readFile "../test/latex-reader.latex" let new = untokenize $ tokenize "../test/latex-reader.latex" orig assertEqual "untokenize . tokenize is identity" orig new -- cgit v1.2.3 From 4617f229ea051fea50bce6307fe8221b246a23fe Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 22 Feb 2021 13:28:47 -0800 Subject: Text.Pandoc.MIME: add exported function getCharset. [API change] --- src/Text/Pandoc/MIME.hs | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index 53c5cd018..3d06e1579 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -10,8 +10,13 @@ Mime type lookup. -} -module Text.Pandoc.MIME ( MimeType, getMimeType, getMimeTypeDef, - extensionFromMimeType, mediaCategory ) where +module Text.Pandoc.MIME ( + MimeType, + getMimeType, + getMimeTypeDef, + getCharset, + extensionFromMimeType, + mediaCategory ) where import Data.List (isPrefixOf, isSuffixOf) import qualified Data.Map as M import qualified Data.Text as T @@ -54,6 +59,14 @@ reverseMimeTypes = M.fromList $ map swap mimeTypesList mimeTypes :: M.Map T.Text MimeType mimeTypes = M.fromList mimeTypesList +-- | Get the charset from a mime type, if one is present. +getCharset :: MimeType -> Maybe T.Text +getCharset mt = + let (_,y) = T.breakOn "charset=" mt + in if T.null y + then Nothing + else Just $ T.toUpper $ T.takeWhile (/= ';') $ T.drop 8 y + -- | Collection of common mime types. -- Except for first entry, list borrowed from -- <https://github.com/Happstack/happstack-server/blob/master/src/Happstack/Server/FileServe/BuildingBlocks.hs happstack-server> -- cgit v1.2.3 From bafccd5aa2dc977e5e49b67c587e1507dd73417c Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 22 Feb 2021 13:59:38 -0800 Subject: T.P.Error: Add PandocUnsupportedCharsetError constructor... ...for PandocError. [API change] --- MANUAL.txt | 1 + src/Text/Pandoc/Error.hs | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'src') diff --git a/MANUAL.txt b/MANUAL.txt index 51898f224..ba031bcb2 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -1477,6 +1477,7 @@ Nonzero exit codes have the following meanings: 91 PandocMacroLoop 92 PandocUTF8DecodingError 93 PandocIpynbDecodingError + 94 PandocUnsupportedCharsetError 97 PandocCouldNotFindDataFileError 99 PandocResourceNotFound ----- ------------------------------------ diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 0fdb7bfe5..8102f04cc 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -60,6 +60,7 @@ data PandocError = PandocIOError Text IOError | PandocMacroLoop Text | PandocUTF8DecodingError Text Int Word8 | PandocIpynbDecodingError Text + | PandocUnsupportedCharsetError Text | PandocUnknownReaderError Text | PandocUnknownWriterError Text | PandocUnsupportedExtensionError Text Text @@ -124,6 +125,8 @@ renderError e = "The input must be a UTF-8 encoded text." PandocIpynbDecodingError w -> "ipynb decoding error: " <> w + PandocUnsupportedCharsetError charset -> + "Unsupported charset " <> charset PandocUnknownReaderError r -> "Unknown input format " <> r <> case r of @@ -183,6 +186,7 @@ handleError (Left e) = PandocMacroLoop{} -> 91 PandocUTF8DecodingError{} -> 92 PandocIpynbDecodingError{} -> 93 + PandocUnsupportedCharsetError{} -> 94 PandocUnknownReaderError{} -> 21 PandocUnknownWriterError{} -> 22 PandocUnsupportedExtensionError{} -> 23 -- cgit v1.2.3 From 5a73c5d3f8136c7fba7429c3ae3a8ae31c58030b Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 22 Feb 2021 14:01:10 -0800 Subject: When downloading content from URL arguments, be sensitive to... the character encoding. We can properly handle UTF-8 and latin1 (ISO-8859-1); for others we raise an error. See #5600. --- src/Text/Pandoc/App.hs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 63996828e..59af029b5 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -28,6 +28,7 @@ import Control.Monad ( (>=>), when ) import Control.Monad.Trans ( MonadIO(..) ) import Control.Monad.Except (throwError) import qualified Data.ByteString as BS +import qualified Data.ByteString.Char8 as B8 import qualified Data.ByteString.Lazy as BL import Data.Char (toLower) import Data.Maybe (fromMaybe, isJust, isNothing) @@ -45,6 +46,7 @@ import System.FilePath ( takeBaseName, takeExtension ) import System.IO (nativeNewline, stdout) import qualified System.IO as IO (Newline (..)) import Text.Pandoc +import Text.Pandoc.MIME (getCharset) import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths) import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts, IpynbOutput (..) ) @@ -344,7 +346,13 @@ readSource src = case parseURI src of _ -> PandocAppError (tshow e)) readURI :: FilePath -> PandocIO Text -readURI src = UTF8.toText . fst <$> openURL (T.pack src) +readURI src = do + (bs, mt) <- openURL (T.pack src) + case mt >>= getCharset of + Just "UTF-8" -> return $ UTF8.toText bs + Just "ISO-8859-1" -> return $ T.pack $ B8.unpack bs + Just charset -> throwError $ PandocUnsupportedCharsetError charset + Nothing -> return $ UTF8.toText bs readFile' :: MonadIO m => FilePath -> m BL.ByteString readFile' "-" = liftIO BL.getContents -- cgit v1.2.3 From d30791a38166538be60a134196f1d2675275017d Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 22 Feb 2021 14:17:22 -0800 Subject: Fall back to latin1 if UTF-8 decoding fails... ...when handling URL argument served with no charset in the mime type. The assumption is that most pages that don't specify a charset in the mime type are either UTF-8 or latin1. I think that's a good assumption, though I'm not sure. --- src/Text/Pandoc/App.hs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 59af029b5..40fb34834 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE CPP #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -352,7 +353,12 @@ readURI src = do Just "UTF-8" -> return $ UTF8.toText bs Just "ISO-8859-1" -> return $ T.pack $ B8.unpack bs Just charset -> throwError $ PandocUnsupportedCharsetError charset - Nothing -> return $ UTF8.toText bs + Nothing -> liftIO $ -- try first as UTF-8, then as latin1 + E.catch (return $! UTF8.toText bs) + (\case + TSE.DecodeError{} -> + return $ T.pack $ B8.unpack bs + e -> E.throwIO e) readFile' :: MonadIO m => FilePath -> m BL.ByteString readFile' "-" = liftIO BL.getContents -- cgit v1.2.3 From f0a991a22be9b82e192d63bf80bbe39679bb2e07 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 22 Feb 2021 21:17:22 -0800 Subject: T.P.CSV: fix parsing of unquoted values. Previously we didn't allow unescaped quotes in unquoted values, but they are allowed. Closes #7112. --- src/Text/Pandoc/CSV.hs | 3 +-- test/command/7112.md | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) create mode 100644 test/command/7112.md (limited to 'src') diff --git a/src/Text/Pandoc/CSV.hs b/src/Text/Pandoc/CSV.hs index ec212fa9a..2bd21bcfb 100644 --- a/src/Text/Pandoc/CSV.hs +++ b/src/Text/Pandoc/CSV.hs @@ -68,8 +68,7 @@ escaped opts = try $ pCSVUnquotedCell :: CSVOptions -> Parser Text pCSVUnquotedCell opts = T.pack <$> - many (satisfy (\c -> c /= csvDelim opts && c /= '\r' && c /= '\n' - && c /= csvQuote opts)) + many (satisfy (\c -> c /= csvDelim opts && c /= '\r' && c /= '\n')) pCSVDelim :: CSVOptions -> Parser () pCSVDelim opts = do diff --git a/test/command/7112.md b/test/command/7112.md new file mode 100644 index 000000000..a75b9250a --- /dev/null +++ b/test/command/7112.md @@ -0,0 +1,15 @@ +``` +% pandoc -f rst +.. csv-table:: + + setting, ``echo PATH="path"`` +^D +<table> +<tbody> +<tr class="odd"> +<td>setting</td> +<td><code>echo PATH="path"</code></td> +</tr> +</tbody> +</table> +``` -- cgit v1.2.3 From fae6a204f1a072d5efe4dff35fd08fbad2071198 Mon Sep 17 00:00:00 2001 From: Salim B <salim@posteo.de> Date: Sat, 27 Feb 2021 01:56:04 +0000 Subject: Fix/update URLs and use HTTP**S** where possible (#7122) --- CONTRIBUTING.md | 16 ++++++++-------- COPYING.md | 2 +- COPYRIGHT | 6 +++--- INSTALL.md | 29 ++++++++++++++--------------- MANUAL.txt | 2 +- README.template | 10 +++++----- doc/lua-filters.md | 2 +- man/pandoc.1 | 2 +- man/pandoc.1.after | 2 +- src/Text/Pandoc/Citeproc.hs | 2 +- src/Text/Pandoc/Citeproc/BibTeX.hs | 8 ++++---- src/Text/Pandoc/Readers/Textile.hs | 2 +- src/Text/Pandoc/Writers/Markdown.hs | 2 +- test/txt2tags.t2t | 2 +- trypandoc/index.html | 2 +- windows/Makefile | 2 +- windows/pandoc.wxs | 4 ++-- 17 files changed, 47 insertions(+), 48 deletions(-) (limited to 'src') diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e87c96694..595fb94ff 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -372,7 +372,7 @@ The code -------- Pandoc has a publicly accessible git repository on -GitHub: <http://github.com/jgm/pandoc>. To get a local copy of the source: +GitHub: <https://github.com/jgm/pandoc>. To get a local copy of the source: git clone https://github.com/jgm/pandoc.git @@ -382,7 +382,7 @@ the pandoc library is in `src/`, the source for the tests is in The modules `Text.Pandoc.Definition`, `Text.Pandoc.Builder`, and `Text.Pandoc.Generic` are in a separate library `pandoc-types`. The code can -be found in <http://github.com/jgm/pandoc-types>. +be found in <https://github.com/jgm/pandoc-types>. To build pandoc, you will need a working installation of the [Haskell platform]. @@ -445,14 +445,14 @@ you may want to consider submitting a pull request to the [closed issues]: https://github.com/jgm/pandoc/issues?q=is%3Aissue+is%3Aclosed [latest released version]: https://github.com/jgm/pandoc/releases/latest [Nightly builds]: https://github.com/jgm/pandoc/actions?query=workflow%3ANightly -[pandoc-discuss]: http://groups.google.com/group/pandoc-discuss +[pandoc-discuss]: https://groups.google.com/group/pandoc-discuss [issue tracker]: https://github.com/jgm/pandoc/issues -[User's Guide]: http://pandoc.org/MANUAL.html -[FAQs]: http://pandoc.org/faqs.html -[EditorConfig]: http://editorconfig.org/ -[Haskell platform]: http://www.haskell.org/platform/ +[User's Guide]: https://pandoc.org/MANUAL.html +[FAQs]: https://pandoc.org/faqs.html +[EditorConfig]: https://editorconfig.org/ +[Haskell platform]: https://www.haskell.org/platform/ [hlint]: https://hackage.haskell.org/package/hlint -[hsb2hs]: http://hackage.haskell.org/package/hsb2hs +[hsb2hs]: https://hackage.haskell.org/package/hsb2hs [pre-commit hook]: https://git-scm.com/book/en/v2/Customizing-Git-Git-Hooks [GitHub labels]: https://github.com/jgm/pandoc/labels [good first issue]:https://github.com/jgm/pandoc/labels/good%20first%20issue diff --git a/COPYING.md b/COPYING.md index af5153d8f..90ae12017 100644 --- a/COPYING.md +++ b/COPYING.md @@ -357,5 +357,5 @@ into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the [GNU Lesser General Public -License](http://www.gnu.org/licenses/lgpl.html) instead of this +License](https://www.gnu.org/licenses/lgpl.html) instead of this License. \ No newline at end of file diff --git a/COPYRIGHT b/COPYRIGHT index a6e3a897c..9752c555d 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -22,11 +22,11 @@ The GNU General Public License is available in the file COPYING.md in the source distribution. On Debian systems, the complete text of the GPL can be found in `/usr/share/common-licenses/GPL`. -[GPL]: http://www.gnu.org/copyleft/gpl.html +[GPL]: https://www.gnu.org/copyleft/gpl.html Pandoc's complete source code is available from the [Pandoc home page]. -[Pandoc home page]: http://pandoc.org +[Pandoc home page]: https://pandoc.org Pandoc includes some code with different copyrights, or subject to different licenses. The copyright and license statements for these sources are included @@ -176,7 +176,7 @@ Released under the GNU General Public License version 2 or later. ---------------------------------------------------------------------- The dzslides template contains JavaScript and CSS from Paul Rouget's dzslides template. -http://github.com/paulrouget/dzslides +https://github.com/paulrouget/dzslides Released under the Do What the Fuck You Want To Public License. diff --git a/INSTALL.md b/INSTALL.md index 721dc5d66..d23c349c1 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -45,7 +45,7 @@ by downloading [this script][uninstaller] and running it with `perl uninstall-pandoc.pl`. Alternatively, you can install pandoc using -[Homebrew](http://brew.sh): +[Homebrew](https://brew.sh): brew install pandoc @@ -116,7 +116,7 @@ package repositories. For example, on Debian/Ubuntu, you can install it with `apt-get install haskell-platform`. For PDF output, you'll need LaTeX. We recommend installing -[TeX Live](http://www.tug.org/texlive/) via your package +[TeX Live](https://www.tug.org/texlive/) via your package manager. (On Debian/Ubuntu, `apt-get install texlive`.) ## Chrome OS @@ -238,7 +238,7 @@ The easiest way to build pandoc from source is to use [stack][stack]: pandoc --help - [Not sure where `$CABALDIR` is?](http://www.haskell.org/haskellwiki/Cabal-Install#The_cabal-install_configuration_file) + [Not sure where `$CABALDIR` is?](https://wiki.haskell.org/Cabal-Install#The_cabal-install_configuration_file) 5. By default `pandoc` uses the "i;unicode-casemap" method to sort bibliography entries (RFC 5051). If you would like to @@ -383,30 +383,29 @@ To run just the markdown benchmarks: [Arch]: https://www.archlinux.org/packages/community/x86_64/pandoc/ -[Cabal User's Guide]: http://www.haskell.org/cabal/release/latest/doc/users-guide/builders.html#setup-configure-paths +[Cabal User's Guide]: https://cabal.readthedocs.io/ [Debian]: https://packages.debian.org/pandoc [Fedora]: https://apps.fedoraproject.org/packages/pandoc -[FreeBSD ports]: http://www.freshports.org/textproc/hs-pandoc/ -[GHC]: http://www.haskell.org/ghc/ -[GPL]: http://www.gnu.org/copyleft/gpl.html -[Haskell platform]: http://hackage.haskell.org/platform/ -[MacPorts]: http://trac.macports.org/browser/trunk/dports/textproc/pandoc/Portfile +[FreeBSD ports]: https://www.freshports.org/textproc/hs-pandoc/ +[GHC]: https://www.haskell.org/ghc/ +[Haskell platform]: https://hackage.haskell.org/platform/ +[MacPorts]: https://trac.macports.org/browser/trunk/dports/textproc/pandoc/Portfile [MacTeX]: https://tug.org/mactex/ -[BasicTeX]: http://www.tug.org/mactex/morepackages.html +[BasicTeX]: https://www.tug.org/mactex/morepackages.html [LaTeX]: https://www.latex-project.org -[MiKTeX]: http://miktex.org/ +[MiKTeX]: https://miktex.org/ [librsvg]: https://wiki.gnome.org/Projects/LibRsvg [Python]: https://www.python.org -[NetBSD]: http://pkgsrc.se/wip/pandoc +[NetBSD]: https://pkgsrc.se/wip/pandoc [NixOS]: https://nixos.org/nixos/packages.html [Slackware]: https://www.slackbuilds.org/result/?search=pandoc&sv= [Ubuntu]: https://packages.ubuntu.com/pandoc [download page]: https://github.com/jgm/pandoc/releases/latest -[gentoo]: http://packages.gentoo.org/package/app-text/pandoc +[gentoo]: https://packages.gentoo.org/package/app-text/pandoc [haskell repository]: https://wiki.archlinux.org/index.php/Haskell_Package_Guidelines#.5Bhaskell.5D [openSUSE]: https://software.opensuse.org/package/pandoc -[source tarball]: http://hackage.haskell.org/package/pandoc +[source tarball]: https://hackage.haskell.org/package/pandoc [stack]: https://docs.haskellstack.org/en/stable/install_and_upgrade.html -[cabal-install]: http://hackage.haskell.org/package/cabal-install +[cabal-install]: https://hackage.haskell.org/package/cabal-install [Void]: https://voidlinux.org/ [uninstaller]: https://raw.githubusercontent.com/jgm/pandoc/master/macos/uninstall-pandoc.pl diff --git a/MANUAL.txt b/MANUAL.txt index ba031bcb2..a9465b20c 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -486,7 +486,7 @@ header when requesting a document from a URL: [Emacs Org mode]: https://orgmode.org [AsciiDoc]: https://www.methods.co.nz/asciidoc/ [AsciiDoctor]: https://asciidoctor.org/ -[DZSlides]: http://paulrouget.com/dzslides/ +[DZSlides]: https://paulrouget.com/dzslides/ [Word docx]: https://en.wikipedia.org/wiki/Office_Open_XML [PDF]: https://www.adobe.com/pdf/ [reveal.js]: https://revealjs.com/ diff --git a/README.template b/README.template index 53d14b584..4b469f22b 100644 --- a/README.template +++ b/README.template @@ -7,9 +7,9 @@ Pandoc ====== [](https://github.com/jgm/pandoc/releases) -[](http://hackage.haskell.org/package/pandoc) -[](http://brewformulas.org/Pandoc) -[](http://stackage.org/lts/package/pandoc) +[](https://hackage.haskell.org/package/pandoc) +[](https://formulae.brew.sh/formula/pandoc) +[](https://www.stackage.org/lts/package/pandoc-types) [](https://github.com/jgm/pandoc/actions) [](https://www.gnu.org/licenses/gpl.html) [](https://groups.google.com/forum/#!forum/pandoc-discuss) @@ -85,5 +85,5 @@ License [GPL], version 2 or greater. This software carries no warranty of any kind. (See COPYRIGHT for full copyright and warranty notices.) -[GPL]: http://www.gnu.org/copyleft/gpl.html "GNU General Public License" -[Haskell]: http://haskell.org +[GPL]: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html "GNU General Public License" +[Haskell]: https://haskell.org diff --git a/doc/lua-filters.md b/doc/lua-filters.md index a99dc9008..df55dc44e 100644 --- a/doc/lua-filters.md +++ b/doc/lua-filters.md @@ -3478,7 +3478,7 @@ Contract a filename, based on a relative path. Note that the resulting path will usually not introduce `..` paths, as the presence of symlinks means `../b` may not reach `a/b` if it starts from `a/c`. For a worked example see [this blog -post](http://neilmitchell.blogspot.co.uk/2015/10/filepaths-are-subtle-symlinks-are-hard.html). +post](https://neilmitchell.blogspot.co.uk/2015/10/filepaths-are-subtle-symlinks-are-hard.html). Set `unsafe` to a truthy value to a allow `..` in paths. diff --git a/man/pandoc.1 b/man/pandoc.1 index 491e24278..bcda05489 100644 --- a/man/pandoc.1 +++ b/man/pandoc.1 @@ -7495,4 +7495,4 @@ This software carries no warranty of any kind. of contributors, see the file AUTHORS.md in the pandoc source code. .PP The Pandoc source code and all documentation may be downloaded -from <http://pandoc.org>. +from <https://pandoc.org>. diff --git a/man/pandoc.1.after b/man/pandoc.1.after index e5eabb670..7cd7a93f0 100644 --- a/man/pandoc.1.after +++ b/man/pandoc.1.after @@ -1,3 +1,3 @@ .PP The Pandoc source code and all documentation may be downloaded -from <http://pandoc.org>. +from <https://pandoc.org>. diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index 6658c8c0c..38e992ba1 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -544,7 +544,7 @@ linkifyVariables ref = fixShortDOI x = let x' = extractText x in if "10/" `T.isPrefixOf` x' then TextVal $ T.drop 3 x' - -- see http://shortdoi.org + -- see https://shortdoi.org else TextVal x' tolink pref x = let x' = extractText x x'' = if "://" `T.isInfixOf` x' diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs index 416fe439e..c0752dadc 100644 --- a/src/Text/Pandoc/Citeproc/BibTeX.hs +++ b/src/Text/Pandoc/Citeproc/BibTeX.hs @@ -577,10 +577,10 @@ itemToReference locale variant item = do eprint <- getRawField "eprint" let baseUrl = case T.toLower etype of - "arxiv" -> "http://arxiv.org/abs/" - "jstor" -> "http://www.jstor.org/stable/" - "pubmed" -> "http://www.ncbi.nlm.nih.gov/pubmed/" - "googlebooks" -> "http://books.google.com?id=" + "arxiv" -> "https://arxiv.org/abs/" + "jstor" -> "https://www.jstor.org/stable/" + "pubmed" -> "https://www.ncbi.nlm.nih.gov/pubmed/" + "googlebooks" -> "https://books.google.com?id=" _ -> "" if T.null baseUrl then mzero diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 4991c6308..860da2dc3 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -11,7 +11,7 @@ Portability : portable Conversion from Textile to 'Pandoc' document, based on the spec -available at http://redcloth.org/textile. +available at https://www.promptworks.com/textile/. Implemented and parsed: - Paragraphs diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 898905603..d33246a63 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -14,7 +14,7 @@ Conversion of 'Pandoc' documents to markdown-formatted plain text. -Markdown: <http://daringfireball.net/projects/markdown/> +Markdown: <https://daringfireball.net/projects/markdown/> -} module Text.Pandoc.Writers.Markdown ( writeMarkdown, diff --git a/test/txt2tags.t2t b/test/txt2tags.t2t index f736cfa93..b636c1511 100644 --- a/test/txt2tags.t2t +++ b/test/txt2tags.t2t @@ -270,7 +270,7 @@ FTP.DOMAIN.COM [img.png] %%% Syntax: Image pointing to a link: [[img] link] -[[img.png] http://txt2tags.org] +[[img.png] https://txt2tags.org] %%% Align: Image position is preserved when inside paragraph [img.png] Image at the line beginning. diff --git a/trypandoc/index.html b/trypandoc/index.html index 792f522eb..5761153c9 100644 --- a/trypandoc/index.html +++ b/trypandoc/index.html @@ -14,7 +14,7 @@ <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script> <script> "use strict"; -(function($) { // http://stackoverflow.com/questions/901115/how-can-i-get-query-string-values +(function($) { // https://stackoverflow.com/questions/901115/how-can-i-get-query-string-values $.QueryString = (function(a) { if (a == "") return {}; var b = {}; diff --git a/windows/Makefile b/windows/Makefile index 354c1b37c..34dda34c2 100644 --- a/windows/Makefile +++ b/windows/Makefile @@ -6,5 +6,5 @@ all: $(SIGNED) .PHONY: all pandoc-%.msi: pandoc-%-UNSIGNED.msi - osslsigncode sign -pkcs12 $$HOME/Private/SectigoCodeSigning.exp2023.p12 -in $< -i http://johnmacfarlane.net/ -t http://timestamp.comodoca.com/ -out $@ -askpass && rm $< + osslsigncode sign -pkcs12 $$HOME/Private/SectigoCodeSigning.exp2023.p12 -in $< -i https://johnmacfarlane.net/ -t http://timestamp.comodoca.com/ -out $@ -askpass && rm $< diff --git a/windows/pandoc.wxs b/windows/pandoc.wxs index d9f6b836e..a582c4de9 100644 --- a/windows/pandoc.wxs +++ b/windows/pandoc.wxs @@ -123,8 +123,8 @@ <!-- Set properties for add/remove programs --> - <Property Id="ARPURLINFOABOUT" Value="http://pandoc.org" /> - <Property Id="ARPHELPLINK" Value="http://pandoc.org" /> + <Property Id="ARPURLINFOABOUT" Value="https://pandoc.org" /> + <Property Id="ARPHELPLINK" Value="https://pandoc.org" /> <Property Id="ARPNOREPAIR" Value="yes" Secure="yes" /> <!-- Remove repair --> <Property Id="ARPNOMODIFY" Value="yes" Secure="yes" /> <!-- Remove modify --> -- cgit v1.2.3 From 3327b225a1ef96543f912d200229d08940936528 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Sat, 27 Feb 2021 21:35:41 +0100 Subject: Lua: use strict evaluation when retrieving AST value from the stack Fixes: #6674 --- src/Text/Pandoc/Lua/Marshaling/AST.hs | 156 +++++++++++++++++----------------- 1 file changed, 77 insertions(+), 79 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Lua/Marshaling/AST.hs b/src/Text/Pandoc/Lua/Marshaling/AST.hs index 6485da661..8e12d232c 100644 --- a/src/Text/Pandoc/Lua/Marshaling/AST.hs +++ b/src/Text/Pandoc/Lua/Marshaling/AST.hs @@ -1,4 +1,5 @@ {-# OPTIONS_GHC -fno-warn-orphans #-} +{-# LANGUAGE BangPatterns #-} {-# LANGUAGE LambdaCase #-} {- | Module : Text.Pandoc.Lua.Marshaling.AST @@ -17,6 +18,7 @@ module Text.Pandoc.Lua.Marshaling.AST ) where import Control.Applicative ((<|>)) +import Control.Monad ((<$!>)) import Foreign.Lua (Lua, Peekable, Pushable, StackIndex) import Text.Pandoc.Definition import Text.Pandoc.Error (PandocError) @@ -32,17 +34,16 @@ instance Pushable Pandoc where pushViaConstructor "Pandoc" blocks meta instance Peekable Pandoc where - peek idx = defineHowTo "get Pandoc value" $ do - blocks <- LuaUtil.rawField idx "blocks" - meta <- LuaUtil.rawField idx "meta" - return $ Pandoc meta blocks + peek idx = defineHowTo "get Pandoc value" $! Pandoc + <$!> LuaUtil.rawField idx "meta" + <*> LuaUtil.rawField idx "blocks" instance Pushable Meta where push (Meta mmap) = pushViaConstructor "Meta" mmap instance Peekable Meta where - peek idx = defineHowTo "get Meta value" $ - Meta <$> Lua.peek idx + peek idx = defineHowTo "get Meta value" $! + Meta <$!> Lua.peek idx instance Pushable MetaValue where push = pushMetaValue @@ -68,14 +69,13 @@ instance Pushable Citation where pushViaConstructor "Citation" cid mode prefix suffix noteNum hash instance Peekable Citation where - peek idx = do - id' <- LuaUtil.rawField idx "id" - prefix <- LuaUtil.rawField idx "prefix" - suffix <- LuaUtil.rawField idx "suffix" - mode <- LuaUtil.rawField idx "mode" - num <- LuaUtil.rawField idx "note_num" - hash <- LuaUtil.rawField idx "hash" - return $ Citation id' prefix suffix mode num hash + peek idx = Citation + <$!> LuaUtil.rawField idx "id" + <*> LuaUtil.rawField idx "prefix" + <*> LuaUtil.rawField idx "suffix" + <*> LuaUtil.rawField idx "mode" + <*> LuaUtil.rawField idx "note_num" + <*> LuaUtil.rawField idx "hash" instance Pushable Alignment where push = Lua.push . show @@ -90,7 +90,7 @@ instance Peekable CitationMode where instance Pushable Format where push (Format f) = Lua.push f instance Peekable Format where - peek idx = Format <$> Lua.peek idx + peek idx = Format <$!> Lua.peek idx instance Pushable ListNumberDelim where push = Lua.push . show @@ -130,26 +130,26 @@ peekMetaValue idx = defineHowTo "get MetaValue" $ do elementContent = Lua.peek idx luatype <- Lua.ltype idx case luatype of - Lua.TypeBoolean -> MetaBool <$> Lua.peek idx - Lua.TypeString -> MetaString <$> Lua.peek idx + Lua.TypeBoolean -> MetaBool <$!> Lua.peek idx + Lua.TypeString -> MetaString <$!> Lua.peek idx Lua.TypeTable -> do tag <- try $ LuaUtil.getTag idx case tag of - Right "MetaBlocks" -> MetaBlocks <$> elementContent - Right "MetaBool" -> MetaBool <$> elementContent - Right "MetaMap" -> MetaMap <$> elementContent - Right "MetaInlines" -> MetaInlines <$> elementContent - Right "MetaList" -> MetaList <$> elementContent - Right "MetaString" -> MetaString <$> elementContent + Right "MetaBlocks" -> MetaBlocks <$!> elementContent + Right "MetaBool" -> MetaBool <$!> elementContent + Right "MetaMap" -> MetaMap <$!> elementContent + Right "MetaInlines" -> MetaInlines <$!> elementContent + Right "MetaList" -> MetaList <$!> elementContent + Right "MetaString" -> MetaString <$!> elementContent Right t -> Lua.throwMessage ("Unknown meta tag: " <> t) Left _ -> do -- no meta value tag given, try to guess. len <- Lua.rawlen idx if len <= 0 - then MetaMap <$> Lua.peek idx - else (MetaInlines <$> Lua.peek idx) - <|> (MetaBlocks <$> Lua.peek idx) - <|> (MetaList <$> Lua.peek idx) + then MetaMap <$!> Lua.peek idx + else (MetaInlines <$!> Lua.peek idx) + <|> (MetaBlocks <$!> Lua.peek idx) + <|> (MetaList <$!> Lua.peek idx) _ -> Lua.throwMessage "could not get meta value" -- | Push a block element to the top of the Lua stack. @@ -174,25 +174,25 @@ pushBlock = \case -- | Return the value at the given index as block if possible. peekBlock :: StackIndex -> Lua Block -peekBlock idx = defineHowTo "get Block value" $ do +peekBlock idx = defineHowTo "get Block value" $! do tag <- LuaUtil.getTag idx case tag of - "BlockQuote" -> BlockQuote <$> elementContent - "BulletList" -> BulletList <$> elementContent - "CodeBlock" -> withAttr CodeBlock <$> elementContent - "DefinitionList" -> DefinitionList <$> elementContent - "Div" -> withAttr Div <$> elementContent + "BlockQuote" -> BlockQuote <$!> elementContent + "BulletList" -> BulletList <$!> elementContent + "CodeBlock" -> withAttr CodeBlock <$!> elementContent + "DefinitionList" -> DefinitionList <$!> elementContent + "Div" -> withAttr Div <$!> elementContent "Header" -> (\(lvl, LuaAttr attr, lst) -> Header lvl attr lst) - <$> elementContent + <$!> elementContent "HorizontalRule" -> return HorizontalRule - "LineBlock" -> LineBlock <$> elementContent + "LineBlock" -> LineBlock <$!> elementContent "OrderedList" -> (\(LuaListAttributes lstAttr, lst) -> OrderedList lstAttr lst) - <$> elementContent + <$!> elementContent "Null" -> return Null - "Para" -> Para <$> elementContent - "Plain" -> Plain <$> elementContent - "RawBlock" -> uncurry RawBlock <$> elementContent + "Para" -> Para <$!> elementContent + "Plain" -> Plain <$!> elementContent + "RawBlock" -> uncurry RawBlock <$!> elementContent "Table" -> (\(attr, capt, colSpecs, thead, tbodies, tfoot) -> Table (fromLuaAttr attr) capt @@ -200,7 +200,7 @@ peekBlock idx = defineHowTo "get Block value" $ do thead tbodies tfoot) - <$> elementContent + <$!> elementContent _ -> Lua.throwMessage ("Unknown block type: " <> tag) where -- Get the contents of an AST element. @@ -222,15 +222,14 @@ pushCaption (Caption shortCaption longCaption) = do -- | Peek Caption element peekCaption :: StackIndex -> Lua Caption -peekCaption idx = do - short <- Lua.fromOptional <$> LuaUtil.rawField idx "short" - long <- LuaUtil.rawField idx "long" - return $ Caption short long +peekCaption idx = Caption + <$!> (Lua.fromOptional <$!> LuaUtil.rawField idx "short") + <*> LuaUtil.rawField idx "long" instance Peekable ColWidth where peek idx = do - width <- Lua.fromOptional <$> Lua.peek idx - return $ maybe ColWidthDefault ColWidth width + width <- Lua.fromOptional <$!> Lua.peek idx + return $! maybe ColWidthDefault ColWidth width instance Pushable ColWidth where push = \case @@ -252,12 +251,11 @@ instance Pushable TableBody where LuaUtil.addField "body" body instance Peekable TableBody where - peek idx = do - attr <- LuaUtil.rawField idx "attr" - rowHeadColumns <- LuaUtil.rawField idx "row_head_columns" - head' <- LuaUtil.rawField idx "head" - body <- LuaUtil.rawField idx "body" - return $ TableBody attr (RowHeadColumns rowHeadColumns) head' body + peek idx = TableBody + <$!> LuaUtil.rawField idx "attr" + <*> (RowHeadColumns <$!> LuaUtil.rawField idx "row_head_columns") + <*> LuaUtil.rawField idx "head" + <*> LuaUtil.rawField idx "body" instance Pushable TableHead where push (TableHead attr rows) = Lua.push (attr, rows) @@ -287,13 +285,12 @@ pushCell (Cell attr align (RowSpan rowSpan) (ColSpan colSpan) contents) = do LuaUtil.addField "contents" contents peekCell :: StackIndex -> Lua Cell -peekCell idx = do - attr <- fromLuaAttr <$> LuaUtil.rawField idx "attr" - align <- LuaUtil.rawField idx "alignment" - rowSpan <- LuaUtil.rawField idx "row_span" - colSpan <- LuaUtil.rawField idx "col_span" - contents <- LuaUtil.rawField idx "contents" - return $ Cell attr align (RowSpan rowSpan) (ColSpan colSpan) contents +peekCell idx = Cell + <$!> (fromLuaAttr <$!> LuaUtil.rawField idx "attr") + <*> LuaUtil.rawField idx "alignment" + <*> (RowSpan <$!> LuaUtil.rawField idx "row_span") + <*> (ColSpan <$!> LuaUtil.rawField idx "col_span") + <*> LuaUtil.rawField idx "contents" -- | Push an inline element to the top of the lua stack. pushInline :: Inline -> Lua () @@ -324,28 +321,29 @@ peekInline :: StackIndex -> Lua Inline peekInline idx = defineHowTo "get Inline value" $ do tag <- LuaUtil.getTag idx case tag of - "Cite" -> uncurry Cite <$> elementContent - "Code" -> withAttr Code <$> elementContent - "Emph" -> Emph <$> elementContent - "Underline" -> Underline <$> elementContent - "Image" -> (\(LuaAttr attr, lst, tgt) -> Image attr lst tgt) - <$> elementContent - "Link" -> (\(LuaAttr attr, lst, tgt) -> Link attr lst tgt) - <$> elementContent + "Cite" -> uncurry Cite <$!> elementContent + "Code" -> withAttr Code <$!> elementContent + "Emph" -> Emph <$!> elementContent + "Underline" -> Underline <$!> elementContent + "Image" -> (\(LuaAttr !attr, !lst, !tgt) -> Image attr lst tgt) + <$!> elementContent + "Link" -> (\(LuaAttr !attr, !lst, !tgt) -> Link attr lst tgt) + <$!> elementContent "LineBreak" -> return LineBreak - "Note" -> Note <$> elementContent - "Math" -> uncurry Math <$> elementContent - "Quoted" -> uncurry Quoted <$> elementContent - "RawInline" -> uncurry RawInline <$> elementContent - "SmallCaps" -> SmallCaps <$> elementContent + "Note" -> Note <$!> elementContent + "Math" -> uncurry Math <$!> elementContent + "Quoted" -> uncurry Quoted <$!> elementContent + "RawInline" -> uncurry RawInline <$!> elementContent + "SmallCaps" -> SmallCaps <$!> elementContent "SoftBreak" -> return SoftBreak "Space" -> return Space - "Span" -> withAttr Span <$> elementContent - "Str" -> Str <$> elementContent - "Strikeout" -> Strikeout <$> elementContent - "Strong" -> Strong <$> elementContent - "Subscript" -> Subscript <$> elementContent - "Superscript"-> Superscript <$> elementContent + "Span" -> withAttr Span <$!> elementContent + -- strict to Lua string is copied before gc + "Str" -> Str <$!> elementContent + "Strikeout" -> Strikeout <$!> elementContent + "Strong" -> Strong <$!> elementContent + "Subscript" -> Subscript <$!> elementContent + "Superscript"-> Superscript <$!> elementContent _ -> Lua.throwMessage ("Unknown inline type: " <> tag) where -- Get the contents of an AST element. @@ -366,7 +364,7 @@ instance Pushable LuaAttr where pushViaConstructor "Attr" id' classes kv instance Peekable LuaAttr where - peek idx = defineHowTo "get Attr value" (LuaAttr <$> Lua.peek idx) + peek idx = defineHowTo "get Attr value" $! (LuaAttr <$!> Lua.peek idx) -- | Wrapper for ListAttributes newtype LuaListAttributes = LuaListAttributes ListAttributes -- cgit v1.2.3 From 925815bb33b462e1a4c19a8e2c617d403dec0ce7 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 27 Feb 2021 17:02:44 -0800 Subject: Split off T.P.Readers.LaTeX.Accent. To help reduce memory demands compiling the main LaTeX reader. --- pandoc.cabal | 1 + src/Text/Pandoc/Readers/LaTeX.hs | 68 ++++------------------------ src/Text/Pandoc/Readers/LaTeX/Accent.hs | 78 +++++++++++++++++++++++++++++++++ 3 files changed, 87 insertions(+), 60 deletions(-) create mode 100644 src/Text/Pandoc/Readers/LaTeX/Accent.hs (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index 83c5c0120..68edb2b64 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -630,6 +630,7 @@ library Text.Pandoc.Readers.LaTeX.Parsing, Text.Pandoc.Readers.LaTeX.Lang, Text.Pandoc.Readers.LaTeX.SIunitx, + Text.Pandoc.Readers.LaTeX.Accent, Text.Pandoc.Readers.Odt.Base, Text.Pandoc.Readers.Odt.Namespaces, Text.Pandoc.Readers.Odt.StyleReader, diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 7d8dfab0e..51c031f78 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -57,6 +57,7 @@ import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline, import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..), ArgSpec (..), Tok (..), TokType (..)) import Text.Pandoc.Readers.LaTeX.Parsing +import Text.Pandoc.Readers.LaTeX.Accent (accentCommands) import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47, babelLangToBCP47) import Text.Pandoc.Readers.LaTeX.SIunitx @@ -64,7 +65,6 @@ import Text.Pandoc.Shared import qualified Text.Pandoc.Translations as Translations import Text.Pandoc.Walk import qualified Text.Pandoc.Builder as B -import qualified Data.Text.Normalize as Normalize import Safe -- for debugging: @@ -247,9 +247,6 @@ doxspace = startsWithLetter _ = False -lit :: Text -> LP m Inlines -lit = pure . str - removeDoubleQuotes :: Text -> Text removeDoubleQuotes t = Data.Maybe.fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\"" @@ -296,6 +293,9 @@ quoted' f starter ender = do cs -> cs) else lit startchs +lit :: Text -> LP m Inlines +lit = pure . str + enquote :: PandocMonad m => Bool -> Maybe Text -> LP m Inlines enquote starred mblang = do skipopts @@ -631,7 +631,10 @@ inlineEnvironments = M.fromList [ ] inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines) -inlineCommands = M.union inlineLanguageCommands $ M.fromList +inlineCommands = + M.union inlineLanguageCommands $ + M.union (accentCommands tok) $ + M.fromList [ ("emph", extractSpaces emph <$> tok) , ("textit", extractSpaces emph <$> tok) , ("textsl", extractSpaces emph <$> tok) @@ -703,48 +706,6 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList , ("MakeTextLowercase", makeLowercase <$> tok) , ("lowercase", makeLowercase <$> tok) , ("/", pure mempty) -- italic correction - , ("aa", lit "å") - , ("AA", lit "Å") - , ("ss", lit "ß") - , ("o", lit "ø") - , ("O", lit "Ø") - , ("L", lit "Ł") - , ("l", lit "ł") - , ("ae", lit "æ") - , ("AE", lit "Æ") - , ("oe", lit "œ") - , ("OE", lit "Œ") - , ("pounds", lit "£") - , ("euro", lit "€") - , ("copyright", lit "©") - , ("textasciicircum", lit "^") - , ("textasciitilde", lit "~") - , ("H", accent '\779' Nothing) -- hungarumlaut - , ("`", accent '\768' (Just '`')) -- grave - , ("'", accent '\769' (Just '\'')) -- acute - , ("^", accent '\770' (Just '^')) -- circ - , ("~", accent '\771' (Just '~')) -- tilde - , ("\"", accent '\776' Nothing) -- umlaut - , (".", accent '\775' Nothing) -- dot - , ("=", accent '\772' Nothing) -- macron - , ("|", accent '\781' Nothing) -- vertical line above - , ("b", accent '\817' Nothing) -- macron below - , ("c", accent '\807' Nothing) -- cedilla - , ("G", accent '\783' Nothing) -- doublegrave - , ("h", accent '\777' Nothing) -- hookabove - , ("d", accent '\803' Nothing) -- dotbelow - , ("f", accent '\785' Nothing) -- inverted breve - , ("r", accent '\778' Nothing) -- ringabove - , ("t", accent '\865' Nothing) -- double inverted breve - , ("U", accent '\782' Nothing) -- double vertical line above - , ("v", accent '\780' Nothing) -- hacek - , ("u", accent '\774' Nothing) -- breve - , ("k", accent '\808' Nothing) -- ogonek - , ("textogonekcentered", accent '\808' Nothing) -- ogonek - , ("i", lit "ı") -- dotless i - , ("j", lit "ȷ") -- dotless j - , ("newtie", accent '\785' Nothing) -- inverted breve - , ("textcircled", accent '\8413' Nothing) -- combining circle , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState guard $ not inTableCell optional opt @@ -960,19 +921,6 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList , ("hyphen", pure (str "-")) ] -accent :: PandocMonad m => Char -> Maybe Char -> LP m Inlines -accent combiningAccent fallBack = try $ do - ils <- tok - case toList ils of - (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $ - -- try to normalize to the combined character: - Str (Normalize.normalize Normalize.NFC - (T.pack [x, combiningAccent]) <> xs) : ys - [Space] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack - [] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack - _ -> return ils - - lettrine :: PandocMonad m => LP m Inlines lettrine = do optional opt diff --git a/src/Text/Pandoc/Readers/LaTeX/Accent.hs b/src/Text/Pandoc/Readers/LaTeX/Accent.hs new file mode 100644 index 000000000..f8c53491c --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX/Accent.hs @@ -0,0 +1,78 @@ +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} +module Text.Pandoc.Readers.LaTeX.Accent + ( accentCommands ) +where + +import Text.Pandoc.Class +import Text.Pandoc.Readers.LaTeX.Parsing +import Text.Pandoc.Builder as B +import qualified Data.Map as M +import Data.Text (Text) +import Data.Maybe (fromMaybe) +import Text.Pandoc.Parsing +import qualified Data.Text as T +import qualified Data.Text.Normalize as Normalize + +accentCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines) +accentCommands tok = + let accent = accentWith tok + lit = pure . str + in M.fromList + [ ("aa", lit "å") + , ("AA", lit "Å") + , ("ss", lit "ß") + , ("o", lit "ø") + , ("O", lit "Ø") + , ("L", lit "Ł") + , ("l", lit "ł") + , ("ae", lit "æ") + , ("AE", lit "Æ") + , ("oe", lit "œ") + , ("OE", lit "Œ") + , ("pounds", lit "£") + , ("euro", lit "€") + , ("copyright", lit "©") + , ("textasciicircum", lit "^") + , ("textasciitilde", lit "~") + , ("H", accent '\779' Nothing) -- hungarumlaut + , ("`", accent '\768' (Just '`')) -- grave + , ("'", accent '\769' (Just '\'')) -- acute + , ("^", accent '\770' (Just '^')) -- circ + , ("~", accent '\771' (Just '~')) -- tilde + , ("\"", accent '\776' Nothing) -- umlaut + , (".", accent '\775' Nothing) -- dot + , ("=", accent '\772' Nothing) -- macron + , ("|", accent '\781' Nothing) -- vertical line above + , ("b", accent '\817' Nothing) -- macron below + , ("c", accent '\807' Nothing) -- cedilla + , ("G", accent '\783' Nothing) -- doublegrave + , ("h", accent '\777' Nothing) -- hookabove + , ("d", accent '\803' Nothing) -- dotbelow + , ("f", accent '\785' Nothing) -- inverted breve + , ("r", accent '\778' Nothing) -- ringabove + , ("t", accent '\865' Nothing) -- double inverted breve + , ("U", accent '\782' Nothing) -- double vertical line above + , ("v", accent '\780' Nothing) -- hacek + , ("u", accent '\774' Nothing) -- breve + , ("k", accent '\808' Nothing) -- ogonek + , ("textogonekcentered", accent '\808' Nothing) -- ogonek + , ("i", lit "ı") -- dotless i + , ("j", lit "ȷ") -- dotless j + , ("newtie", accent '\785' Nothing) -- inverted breve + , ("textcircled", accent '\8413' Nothing) -- combining circle + ] + +accentWith :: PandocMonad m + => LP m Inlines -> Char -> Maybe Char -> LP m Inlines +accentWith tok combiningAccent fallBack = try $ do + ils <- tok + case toList ils of + (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $ + -- try to normalize to the combined character: + Str (Normalize.normalize Normalize.NFC + (T.pack [x, combiningAccent]) <> xs) : ys + [Space] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack + [] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack + _ -> return ils + -- cgit v1.2.3 From 08231f5cdd16e31d38d9d6bf59bc5ca12638b438 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 27 Feb 2021 21:40:56 -0800 Subject: Factor out T.P.Readers.LaTeX.Table. --- pandoc.cabal | 1 + src/Text/Pandoc/Readers/LaTeX.hs | 368 +----------------------------- src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 33 +++ src/Text/Pandoc/Readers/LaTeX/Table.hs | 373 +++++++++++++++++++++++++++++++ 4 files changed, 412 insertions(+), 363 deletions(-) create mode 100644 src/Text/Pandoc/Readers/LaTeX/Table.hs (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index 68edb2b64..9149c4f8f 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -631,6 +631,7 @@ library Text.Pandoc.Readers.LaTeX.Lang, Text.Pandoc.Readers.LaTeX.SIunitx, Text.Pandoc.Readers.LaTeX.Accent, + Text.Pandoc.Readers.LaTeX.Table, Text.Pandoc.Readers.Odt.Base, Text.Pandoc.Readers.Odt.Namespaces, Text.Pandoc.Readers.Odt.StyleReader, diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 51c031f78..831c5df05 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE BangPatterns #-} {-# LANGUAGE CPP #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} @@ -32,7 +31,6 @@ import Control.Monad import Control.Monad.Except (throwError) import Data.Char (isDigit, isLetter, toUpper, chr) import Data.Default -import Data.Functor (($>)) import Data.List (intercalate) import qualified Data.Map as M import Data.Maybe (fromMaybe, maybeToList) @@ -58,6 +56,7 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..), ArgSpec (..), Tok (..), TokType (..)) import Text.Pandoc.Readers.LaTeX.Parsing import Text.Pandoc.Readers.LaTeX.Accent (accentCommands) +import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments) import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47, babelLangToBCP47) import Text.Pandoc.Readers.LaTeX.SIunitx @@ -551,12 +550,8 @@ inlineCommand' = try $ do <|> ignore rawcommand lookupListDefault raw names inlineCommands - tok :: PandocMonad m => LP m Inlines -tok = try $ spaces >> grouped inline <|> inlineCommand' <|> singleChar' - where singleChar' = do - Tok _ _ t <- singleChar - return $ str t +tok = tokWith inline opt :: PandocMonad m => LP m Inlines opt = do @@ -1118,12 +1113,6 @@ treatAsInline = Set.fromList , "pagebreak" ] -label :: PandocMonad m => LP m () -label = do - controlSeq "label" - t <- braced - updateState $ \st -> st{ sLastLabel = Just $ untokenize t } - dolabel :: PandocMonad m => LP m Inlines dolabel = do v <- braced @@ -1421,13 +1410,6 @@ bracketedNum = do Just i -> return i _ -> return 0 -setCaption :: PandocMonad m => LP m () -setCaption = try $ do - skipopts - ils <- tok - optional $ try $ spaces *> label - updateState $ \st -> st{ sCaption = Just ils } - looseItem :: PandocMonad m => LP m Blocks looseItem = do inListItem <- sInListItem <$> getState @@ -1441,10 +1423,6 @@ epigraph = do p2 <- grouped block return $ divWith ("", ["epigraph"], []) (p1 <> p2) -resetCaption :: PandocMonad m => LP m () -resetCaption = updateState $ \st -> st{ sCaption = Nothing - , sLastLabel = Nothing } - section :: PandocMonad m => Attr -> Int -> LP m Blocks section (ident, classes, kvs) lvl = do skipopts @@ -1585,7 +1563,7 @@ blockCommands = M.fromList , ("item", looseItem) , ("documentclass", skipopts *> braced *> preamble) , ("centerline", para . trimInlines <$> (skipopts *> tok)) - , ("caption", mempty <$ setCaption) + , ("caption", mempty <$ setCaption inline) , ("bibliography", mempty <$ (skipopts *> braced >>= addMeta "bibliography" . splitBibs . untokenize)) , ("addbibresource", mempty <$ (skipopts *> braced >>= @@ -1623,7 +1601,8 @@ blockCommands = M.fromList environments :: PandocMonad m => M.Map Text (LP m Blocks) -environments = M.fromList +environments = M.union (tableEnvironments blocks inline) $ + M.fromList [ ("document", env "document" blocks <* skipMany anyTok) , ("abstract", mempty <$ (env "abstract" blocks >>= addMeta "abstract")) , ("sloppypar", env "sloppypar" blocks) @@ -1633,13 +1612,6 @@ environments = M.fromList , ("figure", env "figure" $ skipopts *> figure) , ("subfigure", env "subfigure" $ skipopts *> tok *> figure) , ("center", divWith ("", ["center"], []) <$> env "center" blocks) - , ("longtable", env "longtable" $ - resetCaption *> simpTable "longtable" False >>= addTableCaption) - , ("table", env "table" $ - skipopts *> resetCaption *> blocks >>= addTableCaption) - , ("tabular*", env "tabular*" $ simpTable "tabular*" True) - , ("tabularx", env "tabularx" $ simpTable "tabularx" True) - , ("tabular", env "tabular" $ simpTable "tabular" False) , ("quote", blockQuote <$> env "quote" blocks) , ("quotation", blockQuote <$> env "quotation" blocks) , ("verse", blockQuote <$> env "verse" blocks) @@ -1805,9 +1777,6 @@ italicize (Para ils) = Para [Emph ils] italicize (Plain ils) = Plain [Emph ils] italicize x = x -env :: PandocMonad m => Text -> LP m a -> LP m a -env name p = p <* end_ name - rawEnv :: PandocMonad m => Text -> LP m Blocks rawEnv name = do exts <- getOption readerExtensions @@ -2045,333 +2014,6 @@ orderedList' = try $ do bs <- listenv "enumerate" (many item) return $ orderedListWith (start, style, delim) bs --- tables - -hline :: PandocMonad m => LP m () -hline = try $ do - spaces - controlSeq "hline" <|> - -- booktabs rules: - controlSeq "toprule" <|> - controlSeq "bottomrule" <|> - controlSeq "midrule" <|> - controlSeq "endhead" <|> - controlSeq "endfirsthead" - spaces - optional opt - return () - -lbreak :: PandocMonad m => LP m Tok -lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline") - <* skipopts <* spaces - -amp :: PandocMonad m => LP m Tok -amp = symbol '&' - --- Split a Word into individual Symbols (for parseAligns) -splitWordTok :: PandocMonad m => LP m () -splitWordTok = do - inp <- getInput - case inp of - (Tok spos Word t : rest) -> - setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest - _ -> return () - -parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))] -parseAligns = try $ do - let maybeBar = skipMany - (try $ sp *> (() <$ symbol '|' <|> () <$ (symbol '@' >> braced))) - let cAlign = AlignCenter <$ symbol 'c' - let lAlign = AlignLeft <$ symbol 'l' - let rAlign = AlignRight <$ symbol 'r' - let parAlign = AlignLeft <$ symbol 'p' - -- aligns from tabularx - let xAlign = AlignLeft <$ symbol 'X' - let mAlign = AlignLeft <$ symbol 'm' - let bAlign = AlignLeft <$ symbol 'b' - let alignChar = splitWordTok *> ( cAlign <|> lAlign <|> rAlign <|> parAlign - <|> xAlign <|> mAlign <|> bAlign ) - let alignPrefix = symbol '>' >> braced - let alignSuffix = symbol '<' >> braced - let colWidth = try $ do - symbol '{' - ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth") - spaces - symbol '}' - return $ safeRead ds - let alignSpec = do - pref <- option [] alignPrefix - spaces - al <- alignChar - width <- colWidth <|> option Nothing (do s <- untokenize <$> braced - pos <- getPosition - report $ SkippedContent s pos - return Nothing) - spaces - suff <- option [] alignSuffix - return (al, width, (pref, suff)) - let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro - symbol '*' - spaces - ds <- trim . untokenize <$> braced - spaces - spec <- braced - case safeRead ds of - Just n -> - getInput >>= setInput . (mconcat (replicate n spec) ++) - Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number" - bgroup - spaces - maybeBar - aligns' <- many $ try $ spaces >> optional starAlign >> - (alignSpec <* maybeBar) - spaces - egroup - spaces - return $ map toSpec aligns' - where - toColWidth (Just w) | w > 0 = ColWidth w - toColWidth _ = ColWidthDefault - toSpec (x, y, z) = (x, toColWidth y, z) - --- N.B. this parser returns a Row that may have erroneous empty cells --- in it. See the note above fixTableHead for details. -parseTableRow :: PandocMonad m - => Text -- ^ table environment name - -> [([Tok], [Tok])] -- ^ pref/suffixes - -> LP m Row -parseTableRow envname prefsufs = do - notFollowedBy (spaces *> end_ envname) - -- add prefixes and suffixes in token stream: - let celltoks (pref, suff) = do - prefpos <- getPosition - contents <- mconcat <$> - many ( snd <$> withRaw (controlSeq "parbox" >> parbox) -- #5711 - <|> - snd <$> withRaw (inlineEnvironment <|> dollarsMath) - <|> - (do notFollowedBy - (() <$ amp <|> () <$ lbreak <|> end_ envname) - count 1 anyTok) ) - - suffpos <- getPosition - option [] (count 1 amp) - return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff - rawcells <- mapM celltoks prefsufs - cells <- mapM (parseFromToks parseTableCell) rawcells - spaces - return $ Row nullAttr cells - -parseTableCell :: PandocMonad m => LP m Cell -parseTableCell = do - spaces - updateState $ \st -> st{ sInTableCell = True } - cell' <- multicolumnCell - <|> multirowCell - <|> parseSimpleCell - <|> parseEmptyCell - updateState $ \st -> st{ sInTableCell = False } - spaces - return cell' - where - -- The parsing of empty cells is important in LaTeX, especially when dealing - -- with multirow/multicolumn. See #6603. - parseEmptyCell = spaces $> emptyCell - -cellAlignment :: PandocMonad m => LP m Alignment -cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|') - where - alignment = do - c <- untoken <$> singleChar - return $ case c of - "l" -> AlignLeft - "r" -> AlignRight - "c" -> AlignCenter - "*" -> AlignDefault - _ -> AlignDefault - -plainify :: Blocks -> Blocks -plainify bs = case toList bs of - [Para ils] -> plain (fromList ils) - _ -> bs - -multirowCell :: PandocMonad m => LP m Cell -multirowCell = controlSeq "multirow" >> do - -- Full prototype for \multirow macro is: - -- \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text} - -- However, everything except `nrows` and `text` make - -- sense in the context of the Pandoc AST - _ <- optional $ symbol '[' *> cellAlignment <* symbol ']' -- vertical position - nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced - _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- bigstrut-related - _ <- symbol '{' *> manyTill anyTok (symbol '}') -- Cell width - _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- Length used for fine-tuning - content <- symbol '{' *> (plainify <$> blocks) <* symbol '}' - return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content - -multicolumnCell :: PandocMonad m => LP m Cell -multicolumnCell = controlSeq "multicolumn" >> do - span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced - alignment <- symbol '{' *> cellAlignment <* symbol '}' - - let singleCell = do - content <- plainify <$> blocks - return $ cell alignment (RowSpan 1) (ColSpan span') content - - -- Two possible contents: either a \multirow cell, or content. - -- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}} - -- Note that a \multirow cell can be nested in a \multicolumn, - -- but not the other way around. See #6603 - let nestedCell = do - (Cell _ _ (RowSpan rs) _ bs) <- multirowCell - return $ cell - alignment - (RowSpan rs) - (ColSpan span') - (fromList bs) - - symbol '{' *> (nestedCell <|> singleCell) <* symbol '}' - --- Parse a simple cell, i.e. not multirow/multicol -parseSimpleCell :: PandocMonad m => LP m Cell -parseSimpleCell = simpleCell <$> (plainify <$> blocks) - --- LaTeX tables are stored with empty cells underneath multirow cells --- denoting the grid spaces taken up by them. More specifically, if a --- cell spans m rows, then it will overwrite all the cells in the --- columns it spans for (m-1) rows underneath it, requiring padding --- cells in these places. These padding cells need to be removed for --- proper table reading. See #6603. --- --- These fixTable functions do not otherwise fix up malformed --- input tables: that is left to the table builder. -fixTableHead :: TableHead -> TableHead -fixTableHead (TableHead attr rows) = TableHead attr rows' - where - rows' = fixTableRows rows - -fixTableBody :: TableBody -> TableBody -fixTableBody (TableBody attr rhc th tb) - = TableBody attr rhc th' tb' - where - th' = fixTableRows th - tb' = fixTableRows tb - -fixTableRows :: [Row] -> [Row] -fixTableRows = fixTableRows' $ repeat Nothing - where - fixTableRows' oldHang (Row attr cells : rs) - = let (newHang, cells') = fixTableRow oldHang cells - rs' = fixTableRows' newHang rs - in Row attr cells' : rs' - fixTableRows' _ [] = [] - --- The overhang is represented as Just (relative cell dimensions) or --- Nothing for an empty grid space. -fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell]) -fixTableRow oldHang cells - -- If there's overhang, drop cells until their total width meets the - -- width of the occupied grid spaces (or we run out) - | (n, prefHang, restHang) <- splitHang oldHang - , n > 0 - = let cells' = dropToWidth getCellW n cells - (restHang', cells'') = fixTableRow restHang cells' - in (prefHang restHang', cells'') - -- Otherwise record the overhang of a pending cell and fix the rest - -- of the row - | c@(Cell _ _ h w _):cells' <- cells - = let h' = max 1 h - w' = max 1 w - oldHang' = dropToWidth getHangW w' oldHang - (newHang, cells'') = fixTableRow oldHang' cells' - in (toHang w' h' <> newHang, c : cells'') - | otherwise - = (oldHang, []) - where - getCellW (Cell _ _ _ w _) = w - getHangW = maybe 1 fst - getCS (ColSpan n) = n - - toHang c r - | r > 1 = [Just (c, r)] - | otherwise = replicate (getCS c) Nothing - - -- Take the prefix of the overhang list representing filled grid - -- spaces. Also return the remainder and the length of this prefix. - splitHang = splitHang' 0 id - - splitHang' !n l (Just (c, r):xs) - = splitHang' (n + c) (l . (toHang c (r-1) ++)) xs - splitHang' n l xs = (n, l, xs) - - -- Drop list items until the total width of the dropped items - -- exceeds the passed width. - dropToWidth _ n l | n < 1 = l - dropToWidth wproj n (c:cs) = dropToWidth wproj (n - wproj c) cs - dropToWidth _ _ [] = [] - -simpTable :: PandocMonad m => Text -> Bool -> LP m Blocks -simpTable envname hasWidthParameter = try $ do - when hasWidthParameter $ () <$ (spaces >> tok) - skipopts - colspecs <- parseAligns - let (aligns, widths, prefsufs) = unzip3 colspecs - optional $ controlSeq "caption" *> setCaption - spaces - optional label - spaces - optional lbreak - spaces - skipMany hline - spaces - header' <- option [] . try . fmap (:[]) $ - parseTableRow envname prefsufs <* lbreak <* many1 hline - spaces - rows <- sepEndBy (parseTableRow envname prefsufs) - (lbreak <* optional (skipMany hline)) - spaces - optional $ controlSeq "caption" *> setCaption - spaces - optional label - spaces - optional lbreak - spaces - lookAhead $ controlSeq "end" -- make sure we're at end - let th = fixTableHead $ TableHead nullAttr header' - let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows] - let tf = TableFoot nullAttr [] - return $ table emptyCaption (zip aligns widths) th tbs tf - -addTableCaption :: PandocMonad m => Blocks -> LP m Blocks -addTableCaption = walkM go - where go (Table attr c spec th tb tf) = do - st <- getState - let mblabel = sLastLabel st - capt <- case (sCaption st, mblabel) of - (Just ils, Nothing) -> return $ caption Nothing (plain ils) - (Just ils, Just lab) -> do - num <- getNextNumber sLastTableNum - setState - st{ sLastTableNum = num - , sLabels = M.insert lab - [Str (renderDottedNum num)] - (sLabels st) } - return $ caption Nothing (plain ils) -- add number?? - (Nothing, _) -> return c - let attr' = case (attr, mblabel) of - ((_,classes,kvs), Just ident) -> - (ident,classes,kvs) - _ -> attr - return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf - go x = return x - --- TODO: For now we add a Div to contain table attributes, since --- most writers don't do anything yet with attributes on Table. --- This can be removed when that changes. -addAttrDiv :: Attr -> Block -> Block -addAttrDiv ("",[],[]) b = b -addAttrDiv attr b = Div attr [b] - block :: PandocMonad m => LP m Blocks block = do res <- (mempty <$ spaces1) diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index c2e10570d..4a9fa03ad 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -54,6 +54,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing , comment , anyTok , singleChar + , tokWith , specialChars , endline , blankline @@ -80,6 +81,10 @@ module Text.Pandoc.Readers.LaTeX.Parsing , rawopt , overlaySpecification , getNextNumber + , label + , setCaption + , resetCaption + , env ) where import Control.Applicative (many, (<|>)) @@ -914,3 +919,31 @@ getNextNumber getCurrentNum = do Just n -> [n, 1] Nothing -> [1] +label :: PandocMonad m => LP m () +label = do + controlSeq "label" + t <- braced + updateState $ \st -> st{ sLastLabel = Just $ untokenize t } + +setCaption :: PandocMonad m => LP m Inlines -> LP m () +setCaption inline = try $ do + skipopts + ils <- tokWith inline + optional $ try $ spaces *> label + updateState $ \st -> st{ sCaption = Just ils } + +resetCaption :: PandocMonad m => LP m () +resetCaption = updateState $ \st -> st{ sCaption = Nothing + , sLastLabel = Nothing } + +env :: PandocMonad m => Text -> LP m a -> LP m a +env name p = p <* end_ name + +tokWith :: PandocMonad m => LP m Inlines -> LP m Inlines +tokWith inlineParser = try $ spaces >> + grouped inlineParser + <|> (lookAhead anyControlSeq >> inlineParser) + <|> singleChar' + where singleChar' = do + Tok _ _ t <- singleChar + return $ str t diff --git a/src/Text/Pandoc/Readers/LaTeX/Table.hs b/src/Text/Pandoc/Readers/LaTeX/Table.hs new file mode 100644 index 000000000..2ea9caf58 --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX/Table.hs @@ -0,0 +1,373 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE OverloadedStrings #-} +module Text.Pandoc.Readers.LaTeX.Table + ( tableEnvironments ) +where + +import Data.Functor (($>)) +import Text.Pandoc.Class +import Text.Pandoc.Readers.LaTeX.Parsing +import Text.Pandoc.Readers.LaTeX.Types +import Text.Pandoc.Builder as B +import qualified Data.Map as M +import Data.Text (Text) +import Data.Maybe (fromMaybe) +import qualified Data.Text as T +import Control.Applicative ((<|>), optional, many) +import Control.Monad (when, void) +import Text.Pandoc.Shared (safeRead, trim) +import Text.Pandoc.Logging (LogMessage(SkippedContent)) +import Text.Pandoc.Walk (walkM) +import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline, + optional, space, spaces, withRaw, (<|>)) + +tableEnvironments :: PandocMonad m + => LP m Blocks + -> LP m Inlines + -> M.Map Text (LP m Blocks) +tableEnvironments blocks inline = + M.fromList + [ ("longtable", env "longtable" $ + resetCaption *> + simpTable blocks inline "longtable" False >>= addTableCaption) + , ("table", env "table" $ + skipopts *> resetCaption *> blocks >>= addTableCaption) + , ("tabular*", env "tabular*" $ simpTable blocks inline "tabular*" True) + , ("tabularx", env "tabularx" $ simpTable blocks inline "tabularx" True) + , ("tabular", env "tabular" $ simpTable blocks inline "tabular" False) + ] + +hline :: PandocMonad m => LP m () +hline = try $ do + spaces + controlSeq "hline" <|> + -- booktabs rules: + controlSeq "toprule" <|> + controlSeq "bottomrule" <|> + controlSeq "midrule" <|> + controlSeq "endhead" <|> + controlSeq "endfirsthead" + spaces + optional rawopt + return () + +lbreak :: PandocMonad m => LP m Tok +lbreak = (controlSeq "\\" <|> controlSeq "tabularnewline") + <* skipopts <* spaces + +amp :: PandocMonad m => LP m Tok +amp = symbol '&' + +-- Split a Word into individual Symbols (for parseAligns) +splitWordTok :: PandocMonad m => LP m () +splitWordTok = do + inp <- getInput + case inp of + (Tok spos Word t : rest) -> + setInput $ map (Tok spos Symbol . T.singleton) (T.unpack t) <> rest + _ -> return () + +parseAligns :: PandocMonad m => LP m [(Alignment, ColWidth, ([Tok], [Tok]))] +parseAligns = try $ do + let maybeBar = skipMany + (try $ sp *> (() <$ symbol '|' <|> () <$ (symbol '@' >> braced))) + let cAlign = AlignCenter <$ symbol 'c' + let lAlign = AlignLeft <$ symbol 'l' + let rAlign = AlignRight <$ symbol 'r' + let parAlign = AlignLeft <$ symbol 'p' + -- aligns from tabularx + let xAlign = AlignLeft <$ symbol 'X' + let mAlign = AlignLeft <$ symbol 'm' + let bAlign = AlignLeft <$ symbol 'b' + let alignChar = splitWordTok *> ( cAlign <|> lAlign <|> rAlign <|> parAlign + <|> xAlign <|> mAlign <|> bAlign ) + let alignPrefix = symbol '>' >> braced + let alignSuffix = symbol '<' >> braced + let colWidth = try $ do + symbol '{' + ds <- trim . untokenize <$> manyTill anyTok (controlSeq "linewidth") + spaces + symbol '}' + return $ safeRead ds + let alignSpec = do + pref <- option [] alignPrefix + spaces + al <- alignChar + width <- colWidth <|> option Nothing (do s <- untokenize <$> braced + pos <- getPosition + report $ SkippedContent s pos + return Nothing) + spaces + suff <- option [] alignSuffix + return (al, width, (pref, suff)) + let starAlign = do -- '*{2}{r}' == 'rr', we just expand like a macro + symbol '*' + spaces + ds <- trim . untokenize <$> braced + spaces + spec <- braced + case safeRead ds of + Just n -> + getInput >>= setInput . (mconcat (replicate n spec) ++) + Nothing -> Prelude.fail $ "Could not parse " <> T.unpack ds <> " as number" + bgroup + spaces + maybeBar + aligns' <- many $ try $ spaces >> optional starAlign >> + (alignSpec <* maybeBar) + spaces + egroup + spaces + return $ map toSpec aligns' + where + toColWidth (Just w) | w > 0 = ColWidth w + toColWidth _ = ColWidthDefault + toSpec (x, y, z) = (x, toColWidth y, z) + +-- N.B. this parser returns a Row that may have erroneous empty cells +-- in it. See the note above fixTableHead for details. +parseTableRow :: PandocMonad m + => LP m Blocks -- ^ block parser + -> LP m Inlines -- ^ inline parser + -> Text -- ^ table environment name + -> [([Tok], [Tok])] -- ^ pref/suffixes + -> LP m Row +parseTableRow blocks inline envname prefsufs = do + notFollowedBy (spaces *> end_ envname) + -- add prefixes and suffixes in token stream: + let celltoks (pref, suff) = do + prefpos <- getPosition + contents <- mconcat <$> + many ( snd <$> withRaw + ((lookAhead (controlSeq "parbox") >> + void blocks) -- #5711 + <|> + (lookAhead (controlSeq "begin") >> void inline) + <|> + (lookAhead (symbol '$') >> void inline)) + <|> + (do notFollowedBy + (() <$ amp <|> () <$ lbreak <|> end_ envname) + count 1 anyTok) ) + + suffpos <- getPosition + option [] (count 1 amp) + return $ map (setpos prefpos) pref ++ contents ++ map (setpos suffpos) suff + rawcells <- mapM celltoks prefsufs + cells <- mapM (parseFromToks (parseTableCell blocks)) rawcells + spaces + return $ Row nullAttr cells + +parseTableCell :: PandocMonad m => LP m Blocks -> LP m Cell +parseTableCell blocks = do + spaces + updateState $ \st -> st{ sInTableCell = True } + cell' <- multicolumnCell blocks + <|> multirowCell blocks + <|> parseSimpleCell + <|> parseEmptyCell + updateState $ \st -> st{ sInTableCell = False } + spaces + return cell' + where + -- The parsing of empty cells is important in LaTeX, especially when dealing + -- with multirow/multicolumn. See #6603. + parseEmptyCell = spaces $> emptyCell + parseSimpleCell = simpleCell <$> (plainify <$> blocks) + + +cellAlignment :: PandocMonad m => LP m Alignment +cellAlignment = skipMany (symbol '|') *> alignment <* skipMany (symbol '|') + where + alignment = do + c <- untoken <$> singleChar + return $ case c of + "l" -> AlignLeft + "r" -> AlignRight + "c" -> AlignCenter + "*" -> AlignDefault + _ -> AlignDefault + +plainify :: Blocks -> Blocks +plainify bs = case toList bs of + [Para ils] -> plain (fromList ils) + _ -> bs + +multirowCell :: PandocMonad m => LP m Blocks -> LP m Cell +multirowCell blocks = controlSeq "multirow" >> do + -- Full prototype for \multirow macro is: + -- \multirow[vpos]{nrows}[bigstruts]{width}[vmove]{text} + -- However, everything except `nrows` and `text` make + -- sense in the context of the Pandoc AST + _ <- optional $ symbol '[' *> cellAlignment <* symbol ']' -- vertical position + nrows <- fmap (fromMaybe 1 . safeRead . untokenize) braced + _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- bigstrut-related + _ <- symbol '{' *> manyTill anyTok (symbol '}') -- Cell width + _ <- optional $ symbol '[' *> manyTill anyTok (symbol ']') -- Length used for fine-tuning + content <- symbol '{' *> (plainify <$> blocks) <* symbol '}' + return $ cell AlignDefault (RowSpan nrows) (ColSpan 1) content + +multicolumnCell :: PandocMonad m => LP m Blocks -> LP m Cell +multicolumnCell blocks = controlSeq "multicolumn" >> do + span' <- fmap (fromMaybe 1 . safeRead . untokenize) braced + alignment <- symbol '{' *> cellAlignment <* symbol '}' + + let singleCell = do + content <- plainify <$> blocks + return $ cell alignment (RowSpan 1) (ColSpan span') content + + -- Two possible contents: either a \multirow cell, or content. + -- E.g. \multicol{1}{c}{\multirow{2}{1em}{content}} + -- Note that a \multirow cell can be nested in a \multicolumn, + -- but not the other way around. See #6603 + let nestedCell = do + (Cell _ _ (RowSpan rs) _ bs) <- multirowCell blocks + return $ cell + alignment + (RowSpan rs) + (ColSpan span') + (fromList bs) + + symbol '{' *> (nestedCell <|> singleCell) <* symbol '}' + +-- LaTeX tables are stored with empty cells underneath multirow cells +-- denoting the grid spaces taken up by them. More specifically, if a +-- cell spans m rows, then it will overwrite all the cells in the +-- columns it spans for (m-1) rows underneath it, requiring padding +-- cells in these places. These padding cells need to be removed for +-- proper table reading. See #6603. +-- +-- These fixTable functions do not otherwise fix up malformed +-- input tables: that is left to the table builder. +fixTableHead :: TableHead -> TableHead +fixTableHead (TableHead attr rows) = TableHead attr rows' + where + rows' = fixTableRows rows + +fixTableBody :: TableBody -> TableBody +fixTableBody (TableBody attr rhc th tb) + = TableBody attr rhc th' tb' + where + th' = fixTableRows th + tb' = fixTableRows tb + +fixTableRows :: [Row] -> [Row] +fixTableRows = fixTableRows' $ repeat Nothing + where + fixTableRows' oldHang (Row attr cells : rs) + = let (newHang, cells') = fixTableRow oldHang cells + rs' = fixTableRows' newHang rs + in Row attr cells' : rs' + fixTableRows' _ [] = [] + +-- The overhang is represented as Just (relative cell dimensions) or +-- Nothing for an empty grid space. +fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell]) +fixTableRow oldHang cells + -- If there's overhang, drop cells until their total width meets the + -- width of the occupied grid spaces (or we run out) + | (n, prefHang, restHang) <- splitHang oldHang + , n > 0 + = let cells' = dropToWidth getCellW n cells + (restHang', cells'') = fixTableRow restHang cells' + in (prefHang restHang', cells'') + -- Otherwise record the overhang of a pending cell and fix the rest + -- of the row + | c@(Cell _ _ h w _):cells' <- cells + = let h' = max 1 h + w' = max 1 w + oldHang' = dropToWidth getHangW w' oldHang + (newHang, cells'') = fixTableRow oldHang' cells' + in (toHang w' h' <> newHang, c : cells'') + | otherwise + = (oldHang, []) + where + getCellW (Cell _ _ _ w _) = w + getHangW = maybe 1 fst + getCS (ColSpan n) = n + + toHang c r + | r > 1 = [Just (c, r)] + | otherwise = replicate (getCS c) Nothing + + -- Take the prefix of the overhang list representing filled grid + -- spaces. Also return the remainder and the length of this prefix. + splitHang = splitHang' 0 id + + splitHang' !n l (Just (c, r):xs) + = splitHang' (n + c) (l . (toHang c (r-1) ++)) xs + splitHang' n l xs = (n, l, xs) + + -- Drop list items until the total width of the dropped items + -- exceeds the passed width. + dropToWidth _ n l | n < 1 = l + dropToWidth wproj n (c:cs) = dropToWidth wproj (n - wproj c) cs + dropToWidth _ _ [] = [] + +simpTable :: PandocMonad m + => LP m Blocks + -> LP m Inlines + -> Text + -> Bool + -> LP m Blocks +simpTable blocks inline envname hasWidthParameter = try $ do + when hasWidthParameter $ () <$ tokWith inline + skipopts + colspecs <- parseAligns + let (aligns, widths, prefsufs) = unzip3 colspecs + optional $ controlSeq "caption" *> setCaption inline + spaces + optional label + spaces + optional lbreak + spaces + skipMany hline + spaces + header' <- option [] . try . fmap (:[]) $ + parseTableRow blocks inline envname prefsufs <* + lbreak <* many1 hline + spaces + rows <- sepEndBy (parseTableRow blocks inline envname prefsufs) + (lbreak <* optional (skipMany hline)) + spaces + optional $ controlSeq "caption" *> setCaption inline + spaces + optional label + spaces + optional lbreak + spaces + lookAhead $ controlSeq "end" -- make sure we're at end + let th = fixTableHead $ TableHead nullAttr header' + let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows] + let tf = TableFoot nullAttr [] + return $ table emptyCaption (zip aligns widths) th tbs tf + +addTableCaption :: PandocMonad m => Blocks -> LP m Blocks +addTableCaption = walkM go + where go (Table attr c spec th tb tf) = do + st <- getState + let mblabel = sLastLabel st + capt <- case (sCaption st, mblabel) of + (Just ils, Nothing) -> return $ caption Nothing (plain ils) + (Just ils, Just lab) -> do + num <- getNextNumber sLastTableNum + setState + st{ sLastTableNum = num + , sLabels = M.insert lab + [Str (renderDottedNum num)] + (sLabels st) } + return $ caption Nothing (plain ils) -- add number?? + (Nothing, _) -> return c + let attr' = case (attr, mblabel) of + ((_,classes,kvs), Just ident) -> + (ident,classes,kvs) + _ -> attr + return $ addAttrDiv attr' $ Table nullAttr capt spec th tb tf + go x = return x + +-- TODO: For now we add a Div to contain table attributes, since +-- most writers don't do anything yet with attributes on Table. +-- This can be removed when that changes. +addAttrDiv :: Attr -> Block -> Block +addAttrDiv ("",[],[]) b = b +addAttrDiv attr b = Div attr [b] -- cgit v1.2.3 From 2faa57e8e96d9905676e30f62d34c06b074acf76 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 28 Feb 2021 09:12:09 -0800 Subject: Factor out T.P.Readers.LaTeX.Citation. --- pandoc.cabal | 1 + src/Text/Pandoc/Readers/LaTeX.hs | 202 +++------------------------- src/Text/Pandoc/Readers/LaTeX/Citation.hs | 210 ++++++++++++++++++++++++++++++ src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 5 + 4 files changed, 232 insertions(+), 186 deletions(-) create mode 100644 src/Text/Pandoc/Readers/LaTeX/Citation.hs (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index 9149c4f8f..567b650a1 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -631,6 +631,7 @@ library Text.Pandoc.Readers.LaTeX.Lang, Text.Pandoc.Readers.LaTeX.SIunitx, Text.Pandoc.Readers.LaTeX.Accent, + Text.Pandoc.Readers.LaTeX.Citation, Text.Pandoc.Readers.LaTeX.Table, Text.Pandoc.Readers.Odt.Base, Text.Pandoc.Readers.Odt.Namespaces, diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 831c5df05..2d1b83486 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -56,6 +56,7 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..), ArgSpec (..), Tok (..), TokType (..)) import Text.Pandoc.Readers.LaTeX.Parsing import Text.Pandoc.Readers.LaTeX.Accent (accentCommands) +import Text.Pandoc.Readers.LaTeX.Citation (citationCommands, cites) import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments) import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47, babelLangToBCP47) @@ -169,7 +170,7 @@ rawLaTeXInline = do let toks = tokenize "source" inp raw <- snd <$> ( rawLaTeXParser toks True - (mempty <$ (controlSeq "input" >> skipMany opt >> braced)) + (mempty <$ (controlSeq "input" >> skipMany rawopt >> braced)) inlines <|> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand') inlines @@ -311,7 +312,7 @@ blockquote :: PandocMonad m => Bool -> Maybe Text -> LP m Blocks blockquote cvariant mblang = do citepar <- if cvariant then (\xs -> para (cite xs mempty)) - <$> cites NormalCitation False + <$> cites inline NormalCitation False else option mempty $ para <$> bracketed inline let lang = mblang >>= babelLangToBCP47 let langdiv = case lang of @@ -425,116 +426,6 @@ pDollarsMath n = do else mzero _ -> (tk :) <$> pDollarsMath n --- citations - -addPrefix :: [Inline] -> [Citation] -> [Citation] -addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks -addPrefix _ _ = [] - -addSuffix :: [Inline] -> [Citation] -> [Citation] -addSuffix s ks@(_:_) = - let k = last ks - in init ks ++ [k {citationSuffix = citationSuffix k ++ s}] -addSuffix _ _ = [] - -simpleCiteArgs :: PandocMonad m => LP m [Citation] -simpleCiteArgs = try $ do - first <- optionMaybe $ toList <$> opt - second <- optionMaybe $ toList <$> opt - keys <- try $ bgroup *> manyTill citationLabel egroup - let (pre, suf) = case (first , second ) of - (Just s , Nothing) -> (mempty, s ) - (Just s , Just t ) -> (s , t ) - _ -> (mempty, mempty) - conv k = Citation { citationId = k - , citationPrefix = [] - , citationSuffix = [] - , citationMode = NormalCitation - , citationHash = 0 - , citationNoteNum = 0 - } - return $ addPrefix pre $ addSuffix suf $ map conv keys - -citationLabel :: PandocMonad m => LP m Text -citationLabel = do - sp - untokenize <$> - (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar) - <* sp - <* optional (symbol ',') - <* sp) - where bibtexKeyChar = ".:;?!`'()/*@_+=-&[]" :: [Char] - -cites :: PandocMonad m => CitationMode -> Bool -> LP m [Citation] -cites mode multi = try $ do - cits <- if multi - then do - multiprenote <- optionMaybe $ toList <$> paropt - multipostnote <- optionMaybe $ toList <$> paropt - let (pre, suf) = case (multiprenote, multipostnote) of - (Just s , Nothing) -> (mempty, s) - (Nothing , Just t) -> (mempty, t) - (Just s , Just t ) -> (s, t) - _ -> (mempty, mempty) - tempCits <- many1 simpleCiteArgs - case tempCits of - (k:ks) -> case ks of - (_:_) -> return $ (addMprenote pre k : init ks) ++ - [addMpostnote suf (last ks)] - _ -> return [addMprenote pre (addMpostnote suf k)] - _ -> return [[]] - else count 1 simpleCiteArgs - let cs = concat cits - return $ case mode of - AuthorInText -> case cs of - (c:rest) -> c {citationMode = mode} : rest - [] -> [] - _ -> map (\a -> a {citationMode = mode}) cs - where mprenote (k:ks) = (k:ks) ++ [Space] - mprenote _ = mempty - mpostnote (k:ks) = [Str ",", Space] ++ (k:ks) - mpostnote _ = mempty - addMprenote mpn (k:ks) = - let mpnfinal = case citationPrefix k of - (_:_) -> mprenote mpn - _ -> mpn - in addPrefix mpnfinal (k:ks) - addMprenote _ _ = [] - addMpostnote = addSuffix . mpostnote - -citation :: PandocMonad m => Text -> CitationMode -> Bool -> LP m Inlines -citation name mode multi = do - (c,raw) <- withRaw $ cites mode multi - return $ cite c (rawInline "latex" $ "\\" <> name <> untokenize raw) - -handleCitationPart :: Inlines -> [Citation] -handleCitationPart ils = - let isCite Cite{} = True - isCite _ = False - (pref, rest) = break isCite (toList ils) - in case rest of - (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs - _ -> [] - -complexNatbibCitation :: PandocMonad m => CitationMode -> LP m Inlines -complexNatbibCitation mode = try $ do - (cs, raw) <- - withRaw $ concat <$> do - bgroup - items <- mconcat <$> - many1 (notFollowedBy (symbol ';') >> inline) - `sepBy1` symbol ';' - egroup - return $ map handleCitationPart items - case cs of - [] -> mzero - (c:cits) -> return $ cite (c{ citationMode = mode }:cits) - (rawInline "latex" $ "\\citetext" <> untokenize raw) - -inNote :: Inlines -> Inlines -inNote ils = - note $ para $ ils <> str "." - inlineCommand' :: PandocMonad m => LP m Inlines inlineCommand' = try $ do Tok _ (CtrlSeq name) cmd <- anyControlSeq @@ -553,19 +444,6 @@ inlineCommand' = try $ do tok :: PandocMonad m => LP m Inlines tok = tokWith inline -opt :: PandocMonad m => LP m Inlines -opt = do - toks <- try (sp *> bracketedToks <* sp) - -- now parse the toks as inlines - st <- getState - parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks - case parsed of - Right result -> return result - Left e -> throwError $ PandocParsecError (untokenize toks) e - -paropt :: PandocMonad m => LP m Inlines -paropt = parenWrapped inline - inBrackets :: Inlines -> Inlines inBrackets x = str "[" <> x <> str "]" @@ -629,6 +507,7 @@ inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines) inlineCommands = M.union inlineLanguageCommands $ M.union (accentCommands tok) $ + M.union (citationCommands inline) $ M.fromList [ ("emph", extractSpaces emph <$> tok) , ("textit", extractSpaces emph <$> tok) @@ -703,7 +582,7 @@ inlineCommands = , ("/", pure mempty) -- italic correction , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState guard $ not inTableCell - optional opt + optional rawopt spaces)) , (",", lit "\8198") , ("@", pure mempty) @@ -761,61 +640,6 @@ inlineCommands = , ("proofname", doTerm Translations.Proof) , ("glossaryname", doTerm Translations.Glossary) , ("lstlistingname", doTerm Translations.Listing) - , ("cite", citation "cite" NormalCitation False) - , ("Cite", citation "Cite" NormalCitation False) - , ("citep", citation "citep" NormalCitation False) - , ("citep*", citation "citep*" NormalCitation False) - , ("citeal", citation "citeal" NormalCitation False) - , ("citealp", citation "citealp" NormalCitation False) - , ("citealp*", citation "citealp*" NormalCitation False) - , ("autocite", citation "autocite" NormalCitation False) - , ("smartcite", citation "smartcite" NormalCitation False) - , ("footcite", inNote <$> citation "footcite" NormalCitation False) - , ("parencite", citation "parencite" NormalCitation False) - , ("supercite", citation "supercite" NormalCitation False) - , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False) - , ("citeyearpar", citation "citeyearpar" SuppressAuthor False) - , ("citeyear", citation "citeyear" SuppressAuthor False) - , ("autocite*", citation "autocite*" SuppressAuthor False) - , ("cite*", citation "cite*" SuppressAuthor False) - , ("parencite*", citation "parencite*" SuppressAuthor False) - , ("textcite", citation "textcite" AuthorInText False) - , ("citet", citation "citet" AuthorInText False) - , ("citet*", citation "citet*" AuthorInText False) - , ("citealt", citation "citealt" AuthorInText False) - , ("citealt*", citation "citealt*" AuthorInText False) - , ("textcites", citation "textcites" AuthorInText True) - , ("cites", citation "cites" NormalCitation True) - , ("autocites", citation "autocites" NormalCitation True) - , ("footcites", inNote <$> citation "footcites" NormalCitation True) - , ("parencites", citation "parencites" NormalCitation True) - , ("supercites", citation "supercites" NormalCitation True) - , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True) - , ("Autocite", citation "Autocite" NormalCitation False) - , ("Smartcite", citation "Smartcite" NormalCitation False) - , ("Footcite", inNote <$> citation "Footcite" NormalCitation False) - , ("Parencite", citation "Parencite" NormalCitation False) - , ("Supercite", citation "Supercite" NormalCitation False) - , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False) - , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False) - , ("Citeyear", citation "Citeyear" SuppressAuthor False) - , ("Autocite*", citation "Autocite*" SuppressAuthor False) - , ("Cite*", citation "Cite*" SuppressAuthor False) - , ("Parencite*", citation "Parencite*" SuppressAuthor False) - , ("Textcite", citation "Textcite" AuthorInText False) - , ("Textcites", citation "Textcites" AuthorInText True) - , ("Cites", citation "Cites" NormalCitation True) - , ("Autocites", citation "Autocites" NormalCitation True) - , ("Footcites", inNote <$> citation "Footcites" NormalCitation True) - , ("Parencites", citation "Parencites" NormalCitation True) - , ("Supercites", citation "Supercites" NormalCitation True) - , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True) - , ("citetext", complexNatbibCitation NormalCitation) - , ("citeauthor", (try (tok *> sp *> controlSeq "citetext") *> - complexNatbibCitation AuthorInText) - <|> citation "citeauthor" AuthorInText False) - , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>= - addMeta "nocite")) , ("hyperlink", hyperlink) , ("hypertarget", hypertargetInline) -- glossaries package @@ -918,7 +742,7 @@ inlineCommands = lettrine :: PandocMonad m => LP m Inlines lettrine = do - optional opt + optional rawopt x <- tok y <- tok return $ extractSpaces (spanWith ("",["lettrine"],[])) x <> smallcaps y @@ -1168,6 +992,16 @@ inline = (mempty <$ comment) inlines :: PandocMonad m => LP m Inlines inlines = mconcat <$> many inline +opt :: PandocMonad m => LP m Inlines +opt = do + toks <- try (sp *> bracketedToks <* sp) + -- now parse the toks as inlines + st <- getState + parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks + case parsed of + Right result -> return result + Left e -> throwError $ PandocParsecError (untokenize toks) e + -- block elements: preamble :: PandocMonad m => LP m Blocks @@ -1261,10 +1095,6 @@ insertIncluded defaultExtension f' = do getInput >>= setInput . (tokenize f contents ++) updateState dropLatestIncludeFile -addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m () -addMeta field val = updateState $ \st -> - st{ sMeta = addMetaField field val $ sMeta st } - authors :: PandocMonad m => LP m () authors = try $ do bgroup diff --git a/src/Text/Pandoc/Readers/LaTeX/Citation.hs b/src/Text/Pandoc/Readers/LaTeX/Citation.hs new file mode 100644 index 000000000..655823dab --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX/Citation.hs @@ -0,0 +1,210 @@ +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE OverloadedStrings #-} +module Text.Pandoc.Readers.LaTeX.Citation + ( citationCommands + , cites + ) +where + +import Text.Pandoc.Class +import Text.Pandoc.Readers.LaTeX.Parsing +import Text.Pandoc.Builder as B +import qualified Data.Map as M +import Data.Text (Text) +import Control.Applicative ((<|>), optional, many) +import Control.Monad (mzero) +import Control.Monad.Trans (lift) +import Control.Monad.Except (throwError) +import Text.Pandoc.Error (PandocError(PandocParsecError)) +import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline, + optional, space, spaces, withRaw, (<|>)) + +citationCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines) +citationCommands inline = + let citation = citationWith inline + tok = spaces *> grouped inline + in M.fromList + [ ("cite", citation "cite" NormalCitation False) + , ("Cite", citation "Cite" NormalCitation False) + , ("citep", citation "citep" NormalCitation False) + , ("citep*", citation "citep*" NormalCitation False) + , ("citeal", citation "citeal" NormalCitation False) + , ("citealp", citation "citealp" NormalCitation False) + , ("citealp*", citation "citealp*" NormalCitation False) + , ("autocite", citation "autocite" NormalCitation False) + , ("smartcite", citation "smartcite" NormalCitation False) + , ("footcite", inNote <$> citation "footcite" NormalCitation False) + , ("parencite", citation "parencite" NormalCitation False) + , ("supercite", citation "supercite" NormalCitation False) + , ("footcitetext", inNote <$> citation "footcitetext" NormalCitation False) + , ("citeyearpar", citation "citeyearpar" SuppressAuthor False) + , ("citeyear", citation "citeyear" SuppressAuthor False) + , ("autocite*", citation "autocite*" SuppressAuthor False) + , ("cite*", citation "cite*" SuppressAuthor False) + , ("parencite*", citation "parencite*" SuppressAuthor False) + , ("textcite", citation "textcite" AuthorInText False) + , ("citet", citation "citet" AuthorInText False) + , ("citet*", citation "citet*" AuthorInText False) + , ("citealt", citation "citealt" AuthorInText False) + , ("citealt*", citation "citealt*" AuthorInText False) + , ("textcites", citation "textcites" AuthorInText True) + , ("cites", citation "cites" NormalCitation True) + , ("autocites", citation "autocites" NormalCitation True) + , ("footcites", inNote <$> citation "footcites" NormalCitation True) + , ("parencites", citation "parencites" NormalCitation True) + , ("supercites", citation "supercites" NormalCitation True) + , ("footcitetexts", inNote <$> citation "footcitetexts" NormalCitation True) + , ("Autocite", citation "Autocite" NormalCitation False) + , ("Smartcite", citation "Smartcite" NormalCitation False) + , ("Footcite", inNote <$> citation "Footcite" NormalCitation False) + , ("Parencite", citation "Parencite" NormalCitation False) + , ("Supercite", citation "Supercite" NormalCitation False) + , ("Footcitetext", inNote <$> citation "Footcitetext" NormalCitation False) + , ("Citeyearpar", citation "Citeyearpar" SuppressAuthor False) + , ("Citeyear", citation "Citeyear" SuppressAuthor False) + , ("Autocite*", citation "Autocite*" SuppressAuthor False) + , ("Cite*", citation "Cite*" SuppressAuthor False) + , ("Parencite*", citation "Parencite*" SuppressAuthor False) + , ("Textcite", citation "Textcite" AuthorInText False) + , ("Textcites", citation "Textcites" AuthorInText True) + , ("Cites", citation "Cites" NormalCitation True) + , ("Autocites", citation "Autocites" NormalCitation True) + , ("Footcites", inNote <$> citation "Footcites" NormalCitation True) + , ("Parencites", citation "Parencites" NormalCitation True) + , ("Supercites", citation "Supercites" NormalCitation True) + , ("Footcitetexts", inNote <$> citation "Footcitetexts" NormalCitation True) + , ("citetext", complexNatbibCitation inline NormalCitation) + , ("citeauthor", (try (tok *> sp *> controlSeq "citetext") *> + complexNatbibCitation inline AuthorInText) + <|> citation "citeauthor" AuthorInText False) + , ("nocite", mempty <$ (citation "nocite" NormalCitation False >>= + addMeta "nocite")) + ] + +-- citations + +addPrefix :: [Inline] -> [Citation] -> [Citation] +addPrefix p (k:ks) = k {citationPrefix = p ++ citationPrefix k} : ks +addPrefix _ _ = [] + +addSuffix :: [Inline] -> [Citation] -> [Citation] +addSuffix s ks@(_:_) = + let k = last ks + in init ks ++ [k {citationSuffix = citationSuffix k ++ s}] +addSuffix _ _ = [] + +simpleCiteArgs :: forall m . PandocMonad m => LP m Inlines -> LP m [Citation] +simpleCiteArgs inline = try $ do + first <- optionMaybe $ toList <$> opt + second <- optionMaybe $ toList <$> opt + keys <- try $ bgroup *> manyTill citationLabel egroup + let (pre, suf) = case (first , second ) of + (Just s , Nothing) -> (mempty, s ) + (Just s , Just t ) -> (s , t ) + _ -> (mempty, mempty) + conv k = Citation { citationId = k + , citationPrefix = [] + , citationSuffix = [] + , citationMode = NormalCitation + , citationHash = 0 + , citationNoteNum = 0 + } + return $ addPrefix pre $ addSuffix suf $ map conv keys + where + opt :: PandocMonad m => LP m Inlines + opt = do + toks <- try (sp *> bracketedToks <* sp) + -- now parse the toks as inlines + st <- getState + parsed <- lift $ + runParserT (mconcat <$> many inline) st "bracketed option" toks + case parsed of + Right result -> return result + Left e -> throwError $ PandocParsecError (untokenize toks) e + + + +citationLabel :: PandocMonad m => LP m Text +citationLabel = do + sp + untokenize <$> + (many1 (satisfyTok isWordTok <|> symbolIn bibtexKeyChar) + <* sp + <* optional (symbol ',') + <* sp) + where bibtexKeyChar = ".:;?!`'()/*@_+=-&[]" :: [Char] + +cites :: PandocMonad m + => LP m Inlines -> CitationMode -> Bool -> LP m [Citation] +cites inline mode multi = try $ do + let paropt = parenWrapped inline + cits <- if multi + then do + multiprenote <- optionMaybe $ toList <$> paropt + multipostnote <- optionMaybe $ toList <$> paropt + let (pre, suf) = case (multiprenote, multipostnote) of + (Just s , Nothing) -> (mempty, s) + (Nothing , Just t) -> (mempty, t) + (Just s , Just t ) -> (s, t) + _ -> (mempty, mempty) + tempCits <- many1 $ simpleCiteArgs inline + case tempCits of + (k:ks) -> case ks of + (_:_) -> return $ (addMprenote pre k : init ks) ++ + [addMpostnote suf (last ks)] + _ -> return [addMprenote pre (addMpostnote suf k)] + _ -> return [[]] + else count 1 $ simpleCiteArgs inline + let cs = concat cits + return $ case mode of + AuthorInText -> case cs of + (c:rest) -> c {citationMode = mode} : rest + [] -> [] + _ -> map (\a -> a {citationMode = mode}) cs + where mprenote (k:ks) = (k:ks) ++ [Space] + mprenote _ = mempty + mpostnote (k:ks) = [Str ",", Space] ++ (k:ks) + mpostnote _ = mempty + addMprenote mpn (k:ks) = + let mpnfinal = case citationPrefix k of + (_:_) -> mprenote mpn + _ -> mpn + in addPrefix mpnfinal (k:ks) + addMprenote _ _ = [] + addMpostnote = addSuffix . mpostnote + +citationWith :: PandocMonad m + => LP m Inlines -> Text -> CitationMode -> Bool -> LP m Inlines +citationWith inline name mode multi = do + (c,raw) <- withRaw $ cites inline mode multi + return $ cite c (rawInline "latex" $ "\\" <> name <> untokenize raw) + +handleCitationPart :: Inlines -> [Citation] +handleCitationPart ils = + let isCite Cite{} = True + isCite _ = False + (pref, rest) = break isCite (toList ils) + in case rest of + (Cite cs _:suff) -> addPrefix pref $ addSuffix suff cs + _ -> [] + +complexNatbibCitation :: PandocMonad m + => LP m Inlines -> CitationMode -> LP m Inlines +complexNatbibCitation inline mode = try $ do + (cs, raw) <- + withRaw $ concat <$> do + bgroup + items <- mconcat <$> + many1 (notFollowedBy (symbol ';') >> inline) + `sepBy1` symbol ';' + egroup + return $ map handleCitationPart items + case cs of + [] -> mzero + (c:cits) -> return $ cite (c{ citationMode = mode }:cits) + (rawInline "latex" $ "\\citetext" <> untokenize raw) + +inNote :: Inlines -> Inlines +inNote ils = + note $ para $ ils <> str "." + diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index 4a9fa03ad..a5a39d3c9 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -85,6 +85,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing , setCaption , resetCaption , env + , addMeta ) where import Control.Applicative (many, (<|>)) @@ -947,3 +948,7 @@ tokWith inlineParser = try $ spaces >> where singleChar' = do Tok _ _ t <- singleChar return $ str t + +addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> LP m () +addMeta field val = updateState $ \st -> + st{ sMeta = addMetaField field val $ sMeta st } -- cgit v1.2.3 From 5e571d963587866957a26d382aeab9935311fb9d Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 28 Feb 2021 09:39:01 -0800 Subject: LaTeX reader: remove two unnecessary parsers in inline. These are handled anyway by regularSymbol. --- src/Text/Pandoc/Readers/LaTeX.hs | 2 -- 1 file changed, 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 2d1b83486..3935c92ef 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -975,9 +975,7 @@ inline = (mempty <$ comment) <|> doubleQuote <|> singleQuote <|> (str "”" <$ try (symbol '\'' >> symbol '\'')) - <|> (str "”" <$ symbol '”') <|> (str "’" <$ symbol '\'') - <|> (str "’" <$ symbol '’') <|> (str "\160" <$ symbol '~') <|> dollarsMath <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb) -- cgit v1.2.3 From 564c39beef36bf008fa5d2c840560ef064152e7d Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 28 Feb 2021 09:49:34 -0800 Subject: Move setDefaultLanguage to T.P.Readers.LaTeX.Lang. --- src/Text/Pandoc/Readers/LaTeX.hs | 16 ++-------------- src/Text/Pandoc/Readers/LaTeX/Lang.hs | 22 ++++++++++++++++++++-- 2 files changed, 22 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 3935c92ef..2155379db 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -44,7 +44,7 @@ import Text.Pandoc.Builder import Text.Pandoc.Class.PandocPure (PandocPure) import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath, readFileFromDirs, report, setResourcePath, - setTranslations, translateTerm) + translateTerm) import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError)) import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension) import Text.Pandoc.ImageSize (numUnit, showFl) @@ -59,7 +59,7 @@ import Text.Pandoc.Readers.LaTeX.Accent (accentCommands) import Text.Pandoc.Readers.LaTeX.Citation (citationCommands, cites) import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments) import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47, - babelLangToBCP47) + babelLangToBCP47, setDefaultLanguage) import Text.Pandoc.Readers.LaTeX.SIunitx import Text.Pandoc.Shared import qualified Text.Pandoc.Translations as Translations @@ -1856,15 +1856,3 @@ block = do blocks :: PandocMonad m => LP m Blocks blocks = mconcat <$> many block -setDefaultLanguage :: PandocMonad m => LP m Blocks -setDefaultLanguage = do - o <- option "" $ T.filter (\c -> c /= '[' && c /= ']') - <$> rawopt - polylang <- untokenize <$> braced - case M.lookup polylang polyglossiaLangToBCP47 of - Nothing -> return mempty -- TODO mzero? warning? - Just langFunc -> do - let l = langFunc o - setTranslations l - updateState $ setMeta "lang" $ str (renderLang l) - return mempty diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs index 5f634818e..adbeaa6d4 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs @@ -12,13 +12,31 @@ Functions for parsing polyglossia and babel language specifiers to BCP47 'Lang'. -} module Text.Pandoc.Readers.LaTeX.Lang - ( polyglossiaLangToBCP47 + ( setDefaultLanguage + , polyglossiaLangToBCP47 , babelLangToBCP47 ) where import qualified Data.Map as M import qualified Data.Text as T -import Text.Pandoc.BCP47 (Lang(..)) +import Text.Pandoc.BCP47 (Lang(..), renderLang) +import Text.Pandoc.Class (PandocMonad(..), setTranslations) +import Text.Pandoc.Readers.LaTeX.Parsing +import Text.Pandoc.Parsing (updateState, option) +import Text.Pandoc.Builder (Blocks, setMeta, str) + +setDefaultLanguage :: PandocMonad m => LP m Blocks +setDefaultLanguage = do + o <- option "" $ T.filter (\c -> c /= '[' && c /= ']') + <$> rawopt + polylang <- untokenize <$> braced + case M.lookup polylang polyglossiaLangToBCP47 of + Nothing -> return mempty -- TODO mzero? warning? + Just langFunc -> do + let l = langFunc o + setTranslations l + updateState $ setMeta "lang" $ str (renderLang l) + return mempty polyglossiaLangToBCP47 :: M.Map T.Text (T.Text -> Lang) polyglossiaLangToBCP47 = M.fromList -- cgit v1.2.3 From f6cf03857b59776f4f44ea831787231f7f93da96 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 28 Feb 2021 12:52:41 -0800 Subject: LaTeX reader efficiency improvements. In conjunction with other changes this makes the reader almost twice as fast on our benchmark as it was on Feb. 10. --- src/Text/Pandoc/Readers/LaTeX.hs | 73 +++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 31 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 2155379db..4956b90cb 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -191,12 +191,6 @@ inlineCommand = do word :: PandocMonad m => LP m Inlines word = str . untoken <$> satisfyTok isWordTok -regularSymbol :: PandocMonad m => LP m Inlines -regularSymbol = str . untoken <$> satisfyTok isRegularSymbol - where isRegularSymbol (Tok _ Symbol t) = not $ T.any isSpecial t - isRegularSymbol _ = False - isSpecial c = c `Set.member` specialChars - inlineGroup :: PandocMonad m => LP m Inlines inlineGroup = do ils <- grouped inline @@ -961,31 +955,48 @@ lookupListDefault d = (fromMaybe d .) . lookupList where lookupList l m = msum $ map (`M.lookup` m) l inline :: PandocMonad m => LP m Inlines -inline = (mempty <$ comment) - <|> (space <$ whitespace) - <|> (softbreak <$ endline) - <|> word - <|> macroDef (rawInline "latex") - <|> inlineCommand' - <|> inlineEnvironment - <|> inlineGroup - <|> (symbol '-' *> - option (str "-") (symbol '-' *> - option (str "–") (str "—" <$ symbol '-'))) - <|> doubleQuote - <|> singleQuote - <|> (str "”" <$ try (symbol '\'' >> symbol '\'')) - <|> (str "’" <$ symbol '\'') - <|> (str "\160" <$ symbol '~') - <|> dollarsMath - <|> (guardEnabled Ext_literate_haskell *> symbol '|' *> doLHSverb) - <|> (str . T.singleton <$> primEscape) - <|> regularSymbol - <|> (do res <- symbolIn "#^'`\"[]&" - pos <- getPosition - let s = untoken res - report $ ParsingUnescaped s pos - return $ str s) +inline = do + Tok pos toktype t <- lookAhead anyTok + let symbolAsString = str . untoken <$> anySymbol + let unescapedSymbolAsString = + do s <- untoken <$> anySymbol + report $ ParsingUnescaped s pos + return $ str s + case toktype of + Comment -> mempty <$ comment + Spaces -> space <$ whitespace + Newline -> softbreak <$ endline + Word -> word + Esc1 -> str . T.singleton <$> primEscape + Esc2 -> str . T.singleton <$> primEscape + Symbol -> + case t of + "-" -> symbol '-' *> + option (str "-") (symbol '-' *> + option (str "–") (str "—" <$ symbol '-')) + "'" -> symbol '\'' *> + option (str "’") (str "”" <$ symbol '\'') + "~" -> str "\160" <$ symbol '~' + "`" -> doubleQuote <|> singleQuote <|> symbolAsString + "\"" -> doubleQuote <|> singleQuote <|> symbolAsString + "“" -> doubleQuote <|> symbolAsString + "‘" -> singleQuote <|> symbolAsString + "$" -> dollarsMath <|> unescapedSymbolAsString + "|" -> (guardEnabled Ext_literate_haskell *> + symbol '|' *> doLHSverb) <|> symbolAsString + "{" -> inlineGroup + "#" -> unescapedSymbolAsString + "&" -> unescapedSymbolAsString + "_" -> unescapedSymbolAsString + "^" -> unescapedSymbolAsString + "\\" -> mzero + "}" -> mzero + _ -> symbolAsString + CtrlSeq _ -> macroDef (rawInline "latex") + <|> inlineCommand' + <|> inlineEnvironment + <|> inlineGroup + _ -> mzero inlines :: PandocMonad m => LP m Inlines inlines = mconcat <$> many inline -- cgit v1.2.3 From cc543cf5b64cccc91ad6a1a455831b8c3f6d80b2 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 28 Feb 2021 14:34:04 -0800 Subject: LaTeX reader: another small efficiency improvement. --- src/Text/Pandoc/Readers/LaTeX.hs | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 4956b90cb..4062e8a53 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1855,12 +1855,18 @@ orderedList' = try $ do block :: PandocMonad m => LP m Blocks block = do - res <- (mempty <$ spaces1) - <|> environment - <|> macroDef (rawBlock "latex") - <|> blockCommand - <|> paragraph - <|> grouped block + Tok _ toktype _ <- lookAhead anyTok + res <- (case toktype of + Newline -> mempty <$ spaces1 + Spaces -> mempty <$ spaces1 + Comment -> mempty <$ spaces1 + Word -> paragraph + CtrlSeq "begin" -> environment + CtrlSeq _ -> macroDef (rawBlock "latex") + <|> blockCommand + _ -> mzero) + <|> paragraph + <|> grouped block trace (T.take 60 $ tshow $ B.toList res) return res -- cgit v1.2.3 From 7229d068c9f63b6f5cda198815a1bd03473cfdcf Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 28 Feb 2021 15:18:31 -0800 Subject: Markdown reader efficiency improvements. Benchmarks show that these make the reader 13-17% faster, depending on extensions. --- src/Text/Pandoc/Readers/Markdown.hs | 390 +++++++++++++++++++----------------- 1 file changed, 208 insertions(+), 182 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index e46553dd8..91691c675 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -247,51 +247,60 @@ titleBlock :: PandocMonad m => MarkdownParser m () titleBlock = pandocTitleBlock <|> mmdTitleBlock pandocTitleBlock :: PandocMonad m => MarkdownParser m () -pandocTitleBlock = try $ do +pandocTitleBlock = do guardEnabled Ext_pandoc_title_block lookAhead (char '%') - title <- option mempty titleLine - author <- option (return []) authorsLine - date <- option mempty dateLine - optional blanklines - let meta' = do title' <- title - author' <- author - date' <- date - return $ - (if null title' then id else B.setMeta "title" title') - . (if null author' then id else B.setMeta "author" author') - . (if null date' then id else B.setMeta "date" date') - $ nullMeta - updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } + try $ do + title <- option mempty titleLine + author <- option (return []) authorsLine + date <- option mempty dateLine + optional blanklines + let meta' = do title' <- title + author' <- author + date' <- date + return $ + (if null title' + then id + else B.setMeta "title" title') + . (if null author' + then id + else B.setMeta "author" author') + . (if null date' + then id + else B.setMeta "date" date') + $ nullMeta + updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } yamlMetaBlock :: PandocMonad m => MarkdownParser m (F Blocks) -yamlMetaBlock = try $ do +yamlMetaBlock = do guardEnabled Ext_yaml_metadata_block - string "---" - blankline - notFollowedBy blankline -- if --- is followed by a blank it's an HRULE - rawYamlLines <- manyTill anyLine stopLine - -- by including --- and ..., we allow yaml blocks with just comments: - let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."])) - optional blanklines - newMetaF <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks) - $ UTF8.fromTextLazy $ TL.fromStrict rawYaml - -- Since `<>` is left-biased, existing values are not touched: - updateState $ \st -> st{ stateMeta' = stateMeta' st <> newMetaF } - return mempty + try $ do + string "---" + blankline + notFollowedBy blankline -- if --- is followed by a blank it's an HRULE + rawYamlLines <- manyTill anyLine stopLine + -- by including --- and ..., we allow yaml blocks with just comments: + let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."])) + optional blanklines + newMetaF <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks) + $ UTF8.fromTextLazy $ TL.fromStrict rawYaml + -- Since `<>` is left-biased, existing values are not touched: + updateState $ \st -> st{ stateMeta' = stateMeta' st <> newMetaF } + return mempty stopLine :: PandocMonad m => MarkdownParser m () stopLine = try $ (string "---" <|> string "...") >> blankline >> return () mmdTitleBlock :: PandocMonad m => MarkdownParser m () -mmdTitleBlock = try $ do +mmdTitleBlock = do guardEnabled Ext_mmd_title_block - firstPair <- kvPair False - restPairs <- many (kvPair True) - let kvPairs = firstPair : restPairs - blanklines - updateState $ \st -> st{ stateMeta' = stateMeta' st <> - return (Meta $ M.fromList kvPairs) } + try $ do + firstPair <- kvPair False + restPairs <- many (kvPair True) + let kvPairs = firstPair : restPairs + blanklines + updateState $ \st -> st{ stateMeta' = stateMeta' st <> + return (Meta $ M.fromList kvPairs) } kvPair :: PandocMonad m => Bool -> MarkdownParser m (Text, MetaValue) kvPair allowEmpty = try $ do @@ -661,15 +670,15 @@ codeBlockFenced :: PandocMonad m => MarkdownParser m (F Blocks) codeBlockFenced = try $ do indentchars <- nonindentSpaces let indentLevel = T.length indentchars - c <- try (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~')) + c <- (guardEnabled Ext_fenced_code_blocks >> lookAhead (char '~')) <|> (guardEnabled Ext_backtick_code_blocks >> lookAhead (char '`')) size <- blockDelimiter (== c) Nothing skipMany spaceChar rawattr <- - (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute)) + (Left <$> (guardEnabled Ext_raw_attribute >> try rawAttribute)) <|> (Right <$> option ("",[],[]) - (try (guardEnabled Ext_fenced_code_attributes >> attributes) + ((guardEnabled Ext_fenced_code_attributes >> try attributes) <|> ((\x -> ("",[toLanguageId x],[])) <$> many1Char nonspaceChar))) blankline contents <- T.intercalate "\n" <$> @@ -1157,11 +1166,12 @@ stripMarkdownAttribute s = renderTags' $ map filterAttrib $ parseTags s -- lineBlock :: PandocMonad m => MarkdownParser m (F Blocks) -lineBlock = try $ do +lineBlock = do guardEnabled Ext_line_blocks - lines' <- lineBlockLines >>= - mapM (parseFromString' (trimInlinesF <$> inlines)) - return $ B.lineBlock <$> sequence lines' + try $ do + lines' <- lineBlockLines >>= + mapM (parseFromString' (trimInlinesF <$> inlines)) + return $ B.lineBlock <$> sequence lines' -- -- Tables @@ -1263,11 +1273,12 @@ multilineRow indices = do -- Parses a table caption: inlines beginning with 'Table:' -- and followed by blank lines. tableCaption :: PandocMonad m => MarkdownParser m (F Inlines) -tableCaption = try $ do +tableCaption = do guardEnabled Ext_table_captions - skipNonindentSpaces - (string ":" <* notFollowedBy (satisfy isPunctuation)) <|> string "Table:" - trimInlinesF <$> inlines1 <* blanklines + try $ do + skipNonindentSpaces + (string ":" <* notFollowedBy (satisfy isPunctuation)) <|> string "Table:" + trimInlinesF <$> inlines1 <* blanklines -- Parse a simple table with '---' header and one line per row. simpleTable :: PandocMonad m @@ -1436,15 +1447,14 @@ table :: PandocMonad m => MarkdownParser m (F Blocks) table = try $ do frontCaption <- option Nothing (Just <$> tableCaption) (aligns, widths, heads, lns) <- - try (guardEnabled Ext_pipe_tables >> scanForPipe >> pipeTable) <|> - try (guardEnabled Ext_multiline_tables >> - multilineTable False) <|> - try (guardEnabled Ext_simple_tables >> - (simpleTable True <|> simpleTable False)) <|> - try (guardEnabled Ext_multiline_tables >> - multilineTable True) <|> - try (guardEnabled Ext_grid_tables >> - (gridTable False <|> gridTable True)) <?> "table" + (guardEnabled Ext_pipe_tables >> try (scanForPipe >> pipeTable)) <|> + (guardEnabled Ext_multiline_tables >> try (multilineTable False)) <|> + (guardEnabled Ext_simple_tables >> + try (simpleTable True <|> simpleTable False)) <|> + (guardEnabled Ext_multiline_tables >> + try (multilineTable True)) <|> + (guardEnabled Ext_grid_tables >> + try (gridTable False <|> gridTable True)) <?> "table" optional blanklines caption <- case frontCaption of Nothing -> option (return mempty) tableCaption @@ -1478,35 +1488,37 @@ inlines1 :: PandocMonad m => MarkdownParser m (F Inlines) inlines1 = mconcat <$> many1 inline inline :: PandocMonad m => MarkdownParser m (F Inlines) -inline = choice [ whitespace - , bareURL - , str - , endline - , code - , strongOrEmph - , note - , cite - , bracketedSpan - , link - , image - , math - , strikeout - , subscript - , superscript - , inlineNote -- after superscript because of ^[link](/foo)^ - , autoLink - , spanHtml - , rawHtmlInline - , escapedNewline - , escapedChar - , rawLaTeXInline' - , exampleRef - , smart - , return . B.singleton <$> charRef - , emoji - , symbol - , ltSign - ] <?> "inline" +inline = do + c <- lookAhead anyChar + ((case c of + ' ' -> whitespace + '\t' -> whitespace + '\n' -> endline + '`' -> code + '_' -> strongOrEmph + '*' -> strongOrEmph + '^' -> superscript <|> inlineNote -- in this order bc ^[link](/foo)^ + '[' -> note <|> cite <|> bracketedSpan <|> link + '!' -> image + '$' -> math + '~' -> strikeout <|> subscript + '<' -> autoLink <|> spanHtml <|> rawHtmlInline <|> ltSign + '\\' -> escapedNewline <|> escapedChar <|> rawLaTeXInline' + '@' -> exampleRef + '"' -> smart + '\'' -> smart + '\8216' -> smart + '\145' -> smart + '\8220' -> smart + '\147' -> smart + '-' -> smart + '.' -> smart + '&' -> return . B.singleton <$> charRef + ':' -> emoji + _ -> mzero) + <|> bareURL + <|> str + <|> symbol) <?> "inline" escapedChar' :: PandocMonad m => MarkdownParser m Char escapedChar' = try $ do @@ -1517,11 +1529,12 @@ escapedChar' = try $ do <|> oneOf "\\`*_{}[]()>#+-.!~\"" escapedNewline :: PandocMonad m => MarkdownParser m (F Inlines) -escapedNewline = try $ do +escapedNewline = do guardEnabled Ext_escaped_line_breaks - char '\\' - lookAhead (char '\n') -- don't consume the newline (see #3730) - return $ return B.linebreak + try $ do + char '\\' + lookAhead (char '\n') -- don't consume the newline (see #3730) + return $ return B.linebreak escapedChar :: PandocMonad m => MarkdownParser m (F Inlines) escapedChar = do @@ -1543,19 +1556,20 @@ ltSign = do -- whole document has been parsed. But we need this parser -- here in case citations is disabled. exampleRef :: PandocMonad m => MarkdownParser m (F Inlines) -exampleRef = try $ do +exampleRef = do guardEnabled Ext_example_lists - char '@' - lab <- mconcat . map T.pack <$> - many (many1 alphaNum <|> - try (do c <- char '_' <|> char '-' - cs <- many1 alphaNum - return (c:cs))) - return $ do - st <- askF - return $ case M.lookup lab (stateExamples st) of - Just n -> B.str $ tshow n - Nothing -> B.str $ "@" <> lab + try $ do + char '@' + lab <- mconcat . map T.pack <$> + many (many1 alphaNum <|> + try (do c <- char '_' <|> char '-' + cs <- many1 alphaNum + return (c:cs))) + return $ do + st <- askF + return $ case M.lookup lab (stateExamples st) of + Just n -> B.str $ tshow n + Nothing -> B.str $ "@" <> lab symbol :: PandocMonad m => MarkdownParser m (F Inlines) symbol = do @@ -1582,10 +1596,10 @@ code = try $ do >> count (length starts) (char '`') >> notFollowedBy (char '`')) rawattr <- - (Left <$> try (guardEnabled Ext_raw_attribute >> rawAttribute)) + (Left <$> (guardEnabled Ext_raw_attribute >> try rawAttribute)) <|> (Right <$> option ("",[],[]) - (try (guardEnabled Ext_inline_code_attributes >> attributes))) + (guardEnabled Ext_inline_code_attributes >> try attributes)) return $ return $ case rawattr of Left syn -> B.rawInline syn result @@ -1678,20 +1692,22 @@ strikeout = fmap B.strikeout <$> strikeEnd = try $ string "~~" superscript :: PandocMonad m => MarkdownParser m (F Inlines) -superscript = fmap B.superscript <$> try (do +superscript = do guardEnabled Ext_superscript - char '^' - mconcat <$> many1Till (do notFollowedBy spaceChar - notFollowedBy newline - inline) (char '^')) + fmap B.superscript <$> try (do + char '^' + mconcat <$> many1Till (do notFollowedBy spaceChar + notFollowedBy newline + inline) (char '^')) subscript :: PandocMonad m => MarkdownParser m (F Inlines) -subscript = fmap B.subscript <$> try (do +subscript = do guardEnabled Ext_subscript - char '~' - mconcat <$> many1Till (do notFollowedBy spaceChar - notFollowedBy newline - inline) (char '~')) + fmap B.subscript <$> try (do + char '~' + mconcat <$> many1Till (do notFollowedBy spaceChar + notFollowedBy newline + inline) (char '~')) whitespace :: PandocMonad m => MarkdownParser m (F Inlines) whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace" @@ -1792,15 +1808,16 @@ link = try $ do regLink B.linkWith lab <|> referenceLink B.linkWith (lab,raw) bracketedSpan :: PandocMonad m => MarkdownParser m (F Inlines) -bracketedSpan = try $ do +bracketedSpan = do guardEnabled Ext_bracketed_spans - (lab,_) <- reference - attr <- attributes - return $ if isSmallCaps attr - then B.smallcaps <$> lab - else if isUnderline attr - then B.underline <$> lab - else B.spanWith attr <$> lab + try $ do + (lab,_) <- reference + attr <- attributes + return $ if isSmallCaps attr + then B.smallcaps <$> lab + else if isUnderline attr + then B.underline <$> lab + else B.spanWith attr <$> lab -- | We treat a span as SmallCaps if class is "smallcaps" (and -- no other attributes are set or if style is "font-variant:small-caps" @@ -1879,12 +1896,13 @@ dropBrackets = dropRB . dropLB dropLB xs = xs bareURL :: PandocMonad m => MarkdownParser m (F Inlines) -bareURL = try $ do +bareURL = do guardEnabled Ext_autolink_bare_uris getState >>= guard . stateAllowLinks - (cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress) - notFollowedBy $ try $ spaces >> htmlTag (~== TagClose ("a" :: Text)) - return $ return $ B.linkWith ("",[cls],[]) src "" (B.str orig) + try $ do + (cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress) + notFollowedBy $ try $ spaces >> htmlTag (~== TagClose ("a" :: Text)) + return $ return $ B.linkWith ("",[cls],[]) src "" (B.str orig) autoLink :: PandocMonad m => MarkdownParser m (F Inlines) autoLink = try $ do @@ -1937,21 +1955,23 @@ note = try $ do return $ B.note $ walk adjustCite contents' inlineNote :: PandocMonad m => MarkdownParser m (F Inlines) -inlineNote = try $ do +inlineNote = do guardEnabled Ext_inline_notes - char '^' - updateState $ \st -> st{ stateInNote = True - , stateNoteNumber = stateNoteNumber st + 1 } - contents <- inlinesInBalancedBrackets - updateState $ \st -> st{ stateInNote = False } - return $ B.note . B.para <$> contents + try $ do + char '^' + updateState $ \st -> st{ stateInNote = True + , stateNoteNumber = stateNoteNumber st + 1 } + contents <- inlinesInBalancedBrackets + updateState $ \st -> st{ stateInNote = False } + return $ B.note . B.para <$> contents rawLaTeXInline' :: PandocMonad m => MarkdownParser m (F Inlines) -rawLaTeXInline' = try $ do +rawLaTeXInline' = do guardEnabled Ext_raw_tex notFollowedBy' rawConTeXtEnvironment - s <- rawLaTeXInline - return $ return $ B.rawInline "tex" s -- "tex" because it might be context + try $ do + s <- rawLaTeXInline + return $ return $ B.rawInline "tex" s -- "tex" because it might be context rawConTeXtEnvironment :: PandocMonad m => ParserT Text st m Text rawConTeXtEnvironment = try $ do @@ -1970,55 +1990,60 @@ inBrackets parser = do return $ "[" <> contents <> "]" spanHtml :: PandocMonad m => MarkdownParser m (F Inlines) -spanHtml = try $ do +spanHtml = do guardEnabled Ext_native_spans - (TagOpen _ attrs, _) <- htmlTag (~== TagOpen ("span" :: Text) []) - contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose ("span" :: Text))) - let ident = fromMaybe "" $ lookup "id" attrs - let classes = maybe [] T.words $ lookup "class" attrs - let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] - return $ if isSmallCaps (ident, classes, keyvals) - then B.smallcaps <$> contents - else if isUnderline (ident, classes, keyvals) - then B.underline <$> contents - else B.spanWith (ident, classes, keyvals) <$> contents + try $ do + (TagOpen _ attrs, _) <- htmlTag (~== TagOpen ("span" :: Text) []) + contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose ("span" :: Text))) + let ident = fromMaybe "" $ lookup "id" attrs + let classes = maybe [] T.words $ lookup "class" attrs + let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] + return $ if isSmallCaps (ident, classes, keyvals) + then B.smallcaps <$> contents + else if isUnderline (ident, classes, keyvals) + then B.underline <$> contents + else B.spanWith (ident, classes, keyvals) <$> contents divHtml :: PandocMonad m => MarkdownParser m (F Blocks) -divHtml = try $ do +divHtml = do guardEnabled Ext_native_divs - (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen ("div" :: Text) []) - -- we set stateInHtmlBlock so that closing tags that can be either block or - -- inline will not be parsed as inline tags - oldInHtmlBlock <- stateInHtmlBlock <$> getState - updateState $ \st -> st{ stateInHtmlBlock = Just "div" } - bls <- option "" (blankline >> option "" blanklines) - contents <- mconcat <$> - many (notFollowedBy' (htmlTag (~== TagClose ("div" :: Text))) >> block) - closed <- option False (True <$ htmlTag (~== TagClose ("div" :: Text))) - if closed - then do - updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock } - let ident = fromMaybe "" $ lookup "id" attrs - let classes = maybe [] T.words $ lookup "class" attrs - let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] - return $ B.divWith (ident, classes, keyvals) <$> contents - else -- avoid backtracing - return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents + try $ do + (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen ("div" :: Text) []) + -- we set stateInHtmlBlock so that closing tags that can be either block + -- or inline will not be parsed as inline tags + oldInHtmlBlock <- stateInHtmlBlock <$> getState + updateState $ \st -> st{ stateInHtmlBlock = Just "div" } + bls <- option "" (blankline >> option "" blanklines) + contents <- mconcat <$> + many (notFollowedBy' (htmlTag (~== TagClose ("div" :: Text))) >> block) + closed <- option False (True <$ htmlTag (~== TagClose ("div" :: Text))) + if closed + then do + updateState $ \st -> st{ stateInHtmlBlock = oldInHtmlBlock } + let ident = fromMaybe "" $ lookup "id" attrs + let classes = maybe [] T.words $ lookup "class" attrs + let keyvals = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"] + return $ B.divWith (ident, classes, keyvals) <$> contents + else -- avoid backtracing + return $ return (B.rawBlock "html" (rawtag <> bls)) <> contents divFenced :: PandocMonad m => MarkdownParser m (F Blocks) -divFenced = try $ do +divFenced = do guardEnabled Ext_fenced_divs - string ":::" - skipMany (char ':') - skipMany spaceChar - attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1Char nonspaceChar) - skipMany spaceChar - skipMany (char ':') - blankline - updateState $ \st -> st{ stateFencedDivLevel = stateFencedDivLevel st + 1 } - bs <- mconcat <$> manyTill block divFenceEnd - updateState $ \st -> st{ stateFencedDivLevel = stateFencedDivLevel st - 1 } - return $ B.divWith attribs <$> bs + try $ do + string ":::" + skipMany (char ':') + skipMany spaceChar + attribs <- attributes <|> ((\x -> ("",[x],[])) <$> many1Char nonspaceChar) + skipMany spaceChar + skipMany (char ':') + blankline + updateState $ \st -> + st{ stateFencedDivLevel = stateFencedDivLevel st + 1 } + bs <- mconcat <$> manyTill block divFenceEnd + updateState $ \st -> + st{ stateFencedDivLevel = stateFencedDivLevel st - 1 } + return $ B.divWith attribs <$> bs divFenceEnd :: PandocMonad m => MarkdownParser m () divFenceEnd = try $ do @@ -2050,14 +2075,15 @@ emojiChars :: [Char] emojiChars = ['a'..'z'] ++ ['0'..'9'] ++ ['_','+','-'] emoji :: PandocMonad m => MarkdownParser m (F Inlines) -emoji = try $ do +emoji = do guardEnabled Ext_emoji - char ':' - emojikey <- many1Char (oneOf emojiChars) - char ':' - case emojiToInline emojikey of - Just i -> return (return $ B.singleton i) - Nothing -> mzero + try $ do + char ':' + emojikey <- many1Char (oneOf emojiChars) + char ':' + case emojiToInline emojikey of + Just i -> return (return $ B.singleton i) + Nothing -> mzero -- Citations -- cgit v1.2.3 From 36456070c4cf8a6a122bc2ec05e86dc75fe49551 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 28 Feb 2021 15:36:46 -0800 Subject: Fix bug in last commit. --- src/Text/Pandoc/Readers/Markdown.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 91691c675..b1b99dfe5 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1504,7 +1504,7 @@ inline = do '~' -> strikeout <|> subscript '<' -> autoLink <|> spanHtml <|> rawHtmlInline <|> ltSign '\\' -> escapedNewline <|> escapedChar <|> rawLaTeXInline' - '@' -> exampleRef + '@' -> cite <|> exampleRef '"' -> smart '\'' -> smart '\8216' -> smart -- cgit v1.2.3 From d2bb0c7c8d599e6cd2aaef787b207bbfa66d4b9e Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 28 Feb 2021 21:05:25 -0800 Subject: Factor out T.P.Readers.LaTeX.Math. --- pandoc.cabal | 1 + src/Text/Pandoc/Readers/LaTeX.hs | 201 ++----------------------------- src/Text/Pandoc/Readers/LaTeX/Math.hs | 221 ++++++++++++++++++++++++++++++++++ 3 files changed, 230 insertions(+), 193 deletions(-) create mode 100644 src/Text/Pandoc/Readers/LaTeX/Math.hs (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index 61b1de0dd..56a9491f2 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -632,6 +632,7 @@ library Text.Pandoc.Readers.LaTeX.SIunitx, Text.Pandoc.Readers.LaTeX.Accent, Text.Pandoc.Readers.LaTeX.Citation, + Text.Pandoc.Readers.LaTeX.Math, Text.Pandoc.Readers.LaTeX.Table, Text.Pandoc.Readers.Odt.Base, Text.Pandoc.Readers.Odt.Namespaces, diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 4062e8a53..772263578 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -35,7 +35,6 @@ import Data.List (intercalate) import qualified Data.Map as M import Data.Maybe (fromMaybe, maybeToList) import qualified Data.Set as Set -import qualified Data.Sequence as Seq import Data.Text (Text) import qualified Data.Text as T import System.FilePath (addExtension, replaceExtension, takeExtension) @@ -57,6 +56,11 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..), import Text.Pandoc.Readers.LaTeX.Parsing import Text.Pandoc.Readers.LaTeX.Accent (accentCommands) import Text.Pandoc.Readers.LaTeX.Citation (citationCommands, cites) +import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments, + inlineEnvironment, + mathDisplay, mathInline, + newtheorem, theoremstyle, proof, + theoremEnvironment) import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments) import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47, babelLangToBCP47, setDefaultLanguage) @@ -387,39 +391,6 @@ nlToSpace :: Char -> Char nlToSpace '\n' = ' ' nlToSpace x = x -mathDisplay :: Text -> Inlines -mathDisplay = displayMath . trimMath - -mathInline :: Text -> Inlines -mathInline = math . trimMath - -dollarsMath :: PandocMonad m => LP m Inlines -dollarsMath = do - symbol '$' - display <- option False (True <$ symbol '$') - (do contents <- try $ untokenize <$> pDollarsMath 0 - if display - then mathDisplay contents <$ symbol '$' - else return $ mathInline contents) - <|> (guard display >> return (mathInline "")) - --- Int is number of embedded groupings -pDollarsMath :: PandocMonad m => Int -> LP m [Tok] -pDollarsMath n = do - tk@(Tok _ toktype t) <- anyTok - case toktype of - Symbol | t == "$" - , n == 0 -> return [] - | t == "\\" -> do - tk' <- anyTok - (tk :) . (tk' :) <$> pDollarsMath n - | t == "{" -> (tk :) <$> pDollarsMath (n+1) - | t == "}" -> - if n > 0 - then (tk :) <$> pDollarsMath (n-1) - else mzero - _ -> (tk :) <$> pDollarsMath n - inlineCommand' :: PandocMonad m => LP m Inlines inlineCommand' = try $ do Tok _ (CtrlSeq name) cmd <- anyControlSeq @@ -452,51 +423,6 @@ unescapeURL = T.concat . go . T.splitOn "\\" , isEscapable c = t | otherwise = "\\" <> t -mathEnvWith :: PandocMonad m - => (Inlines -> a) -> Maybe Text -> Text -> LP m a -mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name - where inner x = case innerEnv of - Nothing -> x - Just y -> "\\begin{" <> y <> "}\n" <> x <> - "\\end{" <> y <> "}" - -mathEnv :: PandocMonad m => Text -> LP m Text -mathEnv name = do - skipopts - optional blankline - res <- manyTill anyTok (end_ name) - return $ stripTrailingNewlines $ untokenize res - -inlineEnvironment :: PandocMonad m => LP m Inlines -inlineEnvironment = try $ do - controlSeq "begin" - name <- untokenize <$> braced - M.findWithDefault mzero name inlineEnvironments - -inlineEnvironments :: PandocMonad m => M.Map Text (LP m Inlines) -inlineEnvironments = M.fromList [ - ("displaymath", mathEnvWith id Nothing "displaymath") - , ("math", math <$> mathEnv "math") - , ("equation", mathEnvWith id Nothing "equation") - , ("equation*", mathEnvWith id Nothing "equation*") - , ("gather", mathEnvWith id (Just "gathered") "gather") - , ("gather*", mathEnvWith id (Just "gathered") "gather*") - , ("multline", mathEnvWith id (Just "gathered") "multline") - , ("multline*", mathEnvWith id (Just "gathered") "multline*") - , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray") - , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*") - , ("align", mathEnvWith id (Just "aligned") "align") - , ("align*", mathEnvWith id (Just "aligned") "align*") - , ("alignat", mathEnvWith id (Just "aligned") "alignat") - , ("alignat*", mathEnvWith id (Just "aligned") "alignat*") - , ("dmath", mathEnvWith id Nothing "dmath") - , ("dmath*", mathEnvWith id Nothing "dmath*") - , ("dgroup", mathEnvWith id (Just "aligned") "dgroup") - , ("dgroup*", mathEnvWith id (Just "aligned") "dgroup*") - , ("darray", mathEnvWith id (Just "aligned") "darray") - , ("darray*", mathEnvWith id (Just "aligned") "darray*") - ] - inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines) inlineCommands = M.union inlineLanguageCommands $ @@ -1354,7 +1280,7 @@ blockCommands = M.fromList , ("address", mempty <$ (skipopts *> tok >>= addMeta "address")) , ("signature", mempty <$ (skipopts *> authors)) , ("date", mempty <$ (skipopts *> tok >>= addMeta "date")) - , ("newtheorem", newtheorem) + , ("newtheorem", newtheorem inline) , ("theoremstyle", theoremstyle) -- KOMA-Script metadata commands , ("extratitle", mempty <$ (skipopts *> tok >>= addMeta "extratitle")) @@ -1473,7 +1399,7 @@ environments = M.union (tableEnvironments blocks inline) $ , ("lilypond", rawVerbEnv "lilypond") , ("ly", rawVerbEnv "ly") -- amsthm - , ("proof", proof) + , ("proof", proof blocks opt) -- etoolbox , ("ifstrequal", ifstrequal) , ("newtoggle", braced >>= newToggle) @@ -1494,128 +1420,17 @@ filecontents = try $ do st{ sFileContents = M.insert fp txt (sFileContents st) } return mempty -theoremstyle :: PandocMonad m => LP m Blocks -theoremstyle = do - stylename <- untokenize <$> braced - let mbstyle = case stylename of - "plain" -> Just PlainStyle - "definition" -> Just DefinitionStyle - "remark" -> Just RemarkStyle - _ -> Nothing - case mbstyle of - Nothing -> return () - Just sty -> updateState $ \s -> s{ sLastTheoremStyle = sty } - return mempty - -newtheorem :: PandocMonad m => LP m Blocks -newtheorem = do - number <- option True (False <$ symbol '*' <* sp) - name <- untokenize <$> braced - sp - series <- option Nothing $ Just . untokenize <$> bracketedToks - sp - showName <- tok - sp - syncTo <- option Nothing $ Just . untokenize <$> bracketedToks - sty <- sLastTheoremStyle <$> getState - let spec = TheoremSpec { theoremName = showName - , theoremStyle = sty - , theoremSeries = series - , theoremSyncTo = syncTo - , theoremNumber = number - , theoremLastNum = DottedNum [0] } - tmap <- sTheoremMap <$> getState - updateState $ \s -> s{ sTheoremMap = - M.insert name spec tmap } - return mempty - -proof :: PandocMonad m => LP m Blocks -proof = do - title <- option (B.text "Proof") opt - bs <- env "proof" blocks - return $ - B.divWith ("", ["proof"], []) $ - addQed $ addTitle (B.emph (title <> ".")) bs - -addTitle :: Inlines -> Blocks -> Blocks -addTitle ils bs = - case B.toList bs of - (Para xs : rest) - -> B.fromList (Para (B.toList ils ++ (Space : xs)) : rest) - _ -> B.para ils <> bs - -addQed :: Blocks -> Blocks -addQed bs = - case Seq.viewr (B.unMany bs) of - s Seq.:> Para ils - -> B.Many (s Seq.|> Para (ils ++ B.toList qedSign)) - _ -> bs <> B.para qedSign - where - qedSign = B.str "\xa0\x25FB" - environment :: PandocMonad m => LP m Blocks environment = try $ do controlSeq "begin" name <- untokenize <$> braced M.findWithDefault mzero name environments <|> - theoremEnvironment name <|> + theoremEnvironment blocks opt name <|> if M.member name (inlineEnvironments :: M.Map Text (LP PandocPure Inlines)) then mzero else try (rawEnv name) <|> rawVerbEnv name -theoremEnvironment :: PandocMonad m => Text -> LP m Blocks -theoremEnvironment name = do - tmap <- sTheoremMap <$> getState - case M.lookup name tmap of - Nothing -> mzero - Just tspec -> do - optTitle <- option mempty $ (\x -> space <> "(" <> x <> ")") <$> opt - mblabel <- option Nothing $ Just . untokenize <$> - try (spaces >> controlSeq "label" >> spaces >> braced) - bs <- env name blocks - number <- - if theoremNumber tspec - then do - let name' = fromMaybe name $ theoremSeries tspec - num <- getNextNumber - (maybe (DottedNum [0]) theoremLastNum . - M.lookup name' . sTheoremMap) - updateState $ \s -> - s{ sTheoremMap = - M.adjust - (\spec -> spec{ theoremLastNum = num }) - name' - (sTheoremMap s) - } - - case mblabel of - Just ident -> - updateState $ \s -> - s{ sLabels = M.insert ident - (B.toList $ - theoremName tspec <> "\160" <> - str (renderDottedNum num)) (sLabels s) } - Nothing -> return () - return $ space <> B.text (renderDottedNum num) - else return mempty - let titleEmph = case theoremStyle tspec of - PlainStyle -> B.strong - DefinitionStyle -> B.strong - RemarkStyle -> B.emph - let title = titleEmph (theoremName tspec <> number) - <> optTitle <> "." <> space - return $ divWith (fromMaybe "" mblabel, [name], []) $ addTitle title - $ case theoremStyle tspec of - PlainStyle -> walk italicize bs - _ -> bs - -italicize :: Block -> Block -italicize x@(Para [Image{}]) = x -- see #6925 -italicize (Para ils) = Para [Emph ils] -italicize (Plain ils) = Plain [Emph ils] -italicize x = x - rawEnv :: PandocMonad m => Text -> LP m Blocks rawEnv name = do exts <- getOption readerExtensions diff --git a/src/Text/Pandoc/Readers/LaTeX/Math.hs b/src/Text/Pandoc/Readers/LaTeX/Math.hs new file mode 100644 index 000000000..5b49a0376 --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX/Math.hs @@ -0,0 +1,221 @@ +{-# LANGUAGE OverloadedStrings #-} +module Text.Pandoc.Readers.LaTeX.Math + ( dollarsMath + , inlineEnvironments + , inlineEnvironment + , mathInline + , mathDisplay + , theoremstyle + , theoremEnvironment + , newtheorem + , proof + ) +where +import Data.Maybe (fromMaybe) +import Text.Pandoc.Walk (walk) +import Text.Pandoc.Builder as B +import qualified Data.Sequence as Seq +import Text.Pandoc.Readers.LaTeX.Parsing +import Text.Pandoc.Readers.LaTeX.Types +import Text.Pandoc.Class +import Text.Pandoc.Shared (trimMath, stripTrailingNewlines) +import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline, + optional, space, spaces, withRaw, (<|>)) +import Control.Applicative ((<|>), optional) +import Control.Monad (guard, mzero) +import qualified Data.Map as M +import Data.Text (Text) + +dollarsMath :: PandocMonad m => LP m Inlines +dollarsMath = do + symbol '$' + display <- option False (True <$ symbol '$') + (do contents <- try $ untokenize <$> pDollarsMath 0 + if display + then mathDisplay contents <$ symbol '$' + else return $ mathInline contents) + <|> (guard display >> return (mathInline "")) + +-- Int is number of embedded groupings +pDollarsMath :: PandocMonad m => Int -> LP m [Tok] +pDollarsMath n = do + tk@(Tok _ toktype t) <- anyTok + case toktype of + Symbol | t == "$" + , n == 0 -> return [] + | t == "\\" -> do + tk' <- anyTok + (tk :) . (tk' :) <$> pDollarsMath n + | t == "{" -> (tk :) <$> pDollarsMath (n+1) + | t == "}" -> + if n > 0 + then (tk :) <$> pDollarsMath (n-1) + else mzero + _ -> (tk :) <$> pDollarsMath n + +mathDisplay :: Text -> Inlines +mathDisplay = displayMath . trimMath + +mathInline :: Text -> Inlines +mathInline = math . trimMath + +mathEnvWith :: PandocMonad m + => (Inlines -> a) -> Maybe Text -> Text -> LP m a +mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name + where inner x = case innerEnv of + Nothing -> x + Just y -> "\\begin{" <> y <> "}\n" <> x <> + "\\end{" <> y <> "}" + +mathEnv :: PandocMonad m => Text -> LP m Text +mathEnv name = do + skipopts + optional blankline + res <- manyTill anyTok (end_ name) + return $ stripTrailingNewlines $ untokenize res + +inlineEnvironment :: PandocMonad m => LP m Inlines +inlineEnvironment = try $ do + controlSeq "begin" + name <- untokenize <$> braced + M.findWithDefault mzero name inlineEnvironments + +inlineEnvironments :: PandocMonad m => M.Map Text (LP m Inlines) +inlineEnvironments = M.fromList [ + ("displaymath", mathEnvWith id Nothing "displaymath") + , ("math", math <$> mathEnv "math") + , ("equation", mathEnvWith id Nothing "equation") + , ("equation*", mathEnvWith id Nothing "equation*") + , ("gather", mathEnvWith id (Just "gathered") "gather") + , ("gather*", mathEnvWith id (Just "gathered") "gather*") + , ("multline", mathEnvWith id (Just "gathered") "multline") + , ("multline*", mathEnvWith id (Just "gathered") "multline*") + , ("eqnarray", mathEnvWith id (Just "aligned") "eqnarray") + , ("eqnarray*", mathEnvWith id (Just "aligned") "eqnarray*") + , ("align", mathEnvWith id (Just "aligned") "align") + , ("align*", mathEnvWith id (Just "aligned") "align*") + , ("alignat", mathEnvWith id (Just "aligned") "alignat") + , ("alignat*", mathEnvWith id (Just "aligned") "alignat*") + , ("dmath", mathEnvWith id Nothing "dmath") + , ("dmath*", mathEnvWith id Nothing "dmath*") + , ("dgroup", mathEnvWith id (Just "aligned") "dgroup") + , ("dgroup*", mathEnvWith id (Just "aligned") "dgroup*") + , ("darray", mathEnvWith id (Just "aligned") "darray") + , ("darray*", mathEnvWith id (Just "aligned") "darray*") + ] + +theoremstyle :: PandocMonad m => LP m Blocks +theoremstyle = do + stylename <- untokenize <$> braced + let mbstyle = case stylename of + "plain" -> Just PlainStyle + "definition" -> Just DefinitionStyle + "remark" -> Just RemarkStyle + _ -> Nothing + case mbstyle of + Nothing -> return () + Just sty -> updateState $ \s -> s{ sLastTheoremStyle = sty } + return mempty + +newtheorem :: PandocMonad m => LP m Inlines -> LP m Blocks +newtheorem inline = do + number <- option True (False <$ symbol '*' <* sp) + name <- untokenize <$> braced + sp + series <- option Nothing $ Just . untokenize <$> bracketedToks + sp + showName <- tokWith inline + sp + syncTo <- option Nothing $ Just . untokenize <$> bracketedToks + sty <- sLastTheoremStyle <$> getState + let spec = TheoremSpec { theoremName = showName + , theoremStyle = sty + , theoremSeries = series + , theoremSyncTo = syncTo + , theoremNumber = number + , theoremLastNum = DottedNum [0] } + tmap <- sTheoremMap <$> getState + updateState $ \s -> s{ sTheoremMap = + M.insert name spec tmap } + return mempty + +theoremEnvironment :: PandocMonad m + => LP m Blocks -> LP m Inlines -> Text -> LP m Blocks +theoremEnvironment blocks opt name = do + tmap <- sTheoremMap <$> getState + case M.lookup name tmap of + Nothing -> mzero + Just tspec -> do + optTitle <- option mempty $ (\x -> space <> "(" <> x <> ")") <$> opt + mblabel <- option Nothing $ Just . untokenize <$> + try (spaces >> controlSeq "label" >> spaces >> braced) + bs <- env name blocks + number <- + if theoremNumber tspec + then do + let name' = fromMaybe name $ theoremSeries tspec + num <- getNextNumber + (maybe (DottedNum [0]) theoremLastNum . + M.lookup name' . sTheoremMap) + updateState $ \s -> + s{ sTheoremMap = + M.adjust + (\spec -> spec{ theoremLastNum = num }) + name' + (sTheoremMap s) + } + + case mblabel of + Just ident -> + updateState $ \s -> + s{ sLabels = M.insert ident + (B.toList $ + theoremName tspec <> "\160" <> + str (renderDottedNum num)) (sLabels s) } + Nothing -> return () + return $ space <> B.text (renderDottedNum num) + else return mempty + let titleEmph = case theoremStyle tspec of + PlainStyle -> B.strong + DefinitionStyle -> B.strong + RemarkStyle -> B.emph + let title = titleEmph (theoremName tspec <> number) + <> optTitle <> "." <> space + return $ divWith (fromMaybe "" mblabel, [name], []) $ addTitle title + $ case theoremStyle tspec of + PlainStyle -> walk italicize bs + _ -> bs + + + +proof :: PandocMonad m => LP m Blocks -> LP m Inlines -> LP m Blocks +proof blocks opt = do + title <- option (B.text "Proof") opt + bs <- env "proof" blocks + return $ + B.divWith ("", ["proof"], []) $ + addQed $ addTitle (B.emph (title <> ".")) bs + +addTitle :: Inlines -> Blocks -> Blocks +addTitle ils bs = + case B.toList bs of + (Para xs : rest) + -> B.fromList (Para (B.toList ils ++ (Space : xs)) : rest) + _ -> B.para ils <> bs + +addQed :: Blocks -> Blocks +addQed bs = + case Seq.viewr (B.unMany bs) of + s Seq.:> Para ils + -> B.Many (s Seq.|> Para (ils ++ B.toList qedSign)) + _ -> bs <> B.para qedSign + where + qedSign = B.str "\xa0\x25FB" + +italicize :: Block -> Block +italicize x@(Para [Image{}]) = x -- see #6925 +italicize (Para ils) = Para [Emph ils] +italicize (Plain ils) = Plain [Emph ils] +italicize x = x + + -- cgit v1.2.3 From 2463fbf61d2ea8636e70c44624dc5bc1668fa4fd Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 28 Feb 2021 21:43:11 -0800 Subject: LaTeX writer: use function instead of map for accent lookup. --- src/Text/Pandoc/Writers/LaTeX.hs | 52 +++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index e8a187599..e31ec9d52 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -262,7 +262,7 @@ stringToLaTeX context zs = do isUrl = ctx == URLString mbAccentCmd = if writerPreferAscii opts && ctx == TextString - then uncons xs >>= \(c,_) -> M.lookup c accents + then uncons xs >>= \(c,_) -> lookupAccent c else Nothing emits s = case mbAccentCmd of @@ -350,32 +350,30 @@ stringToLaTeX context zs = do _ -> emitc x | otherwise -> emitc x -accents :: M.Map Char String -accents = M.fromList - [ ('\779' , "\\H") - , ('\768' , "\\`") - , ('\769' , "\\'") - , ('\770' , "\\^") - , ('\771' , "\\~") - , ('\776' , "\\\"") - , ('\775' , "\\.") - , ('\772' , "\\=") - , ('\781' , "\\|") - , ('\817' , "\\b") - , ('\807' , "\\c") - , ('\783' , "\\G") - , ('\777' , "\\h") - , ('\803' , "\\d") - , ('\785' , "\\f") - , ('\778' , "\\r") - , ('\865' , "\\t") - , ('\782' , "\\U") - , ('\780' , "\\v") - , ('\774' , "\\u") - , ('\808' , "\\k") - , ('\785' , "\\newtie") - , ('\8413', "\\textcircled") - ] +lookupAccent :: Char -> Maybe String +lookupAccent '\779' = Just "\\H" +lookupAccent '\768' = Just "\\`" +lookupAccent '\769' = Just "\\'" +lookupAccent '\770' = Just "\\^" +lookupAccent '\771' = Just "\\~" +lookupAccent '\776' = Just "\\\"" +lookupAccent '\775' = Just "\\." +lookupAccent '\772' = Just "\\=" +lookupAccent '\781' = Just "\\|" +lookupAccent '\817' = Just "\\b" +lookupAccent '\807' = Just "\\c" +lookupAccent '\783' = Just "\\G" +lookupAccent '\777' = Just "\\h" +lookupAccent '\803' = Just "\\d" +lookupAccent '\785' = Just "\\f" +lookupAccent '\778' = Just "\\r" +lookupAccent '\865' = Just "\\t" +lookupAccent '\782' = Just "\\U" +lookupAccent '\780' = Just "\\v" +lookupAccent '\774' = Just "\\u" +lookupAccent '\808' = Just "\\k" +lookupAccent '\8413' = Just "\\textcircled" +lookupAccent _ = Nothing toLabel :: PandocMonad m => Text -> LW m Text toLabel z = go `fmap` stringToLaTeX URLString z -- cgit v1.2.3 From 7e38b8e55a49f027b6aaa690d84225b0374eb057 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 28 Feb 2021 22:51:26 -0800 Subject: T.P.Readers.LaTeX: Don't export tokenize, untokenize. [API change] These were only exported for testing, which seems the wrong thing to do. They don't belong in the public API and are not really usable as they are, without access to the Tok type which is not exported. Removed the tokenize/untokenize roundtrip test. We put a quickcheck property in the comments which may be used when this code is touched (if it is). --- src/Text/Pandoc/Readers/LaTeX.hs | 2 -- src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 9 +++++++++ test/Tests/Readers/LaTeX.hs | 17 +---------------- 3 files changed, 10 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 772263578..e63fbc185 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -22,8 +22,6 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX, rawLaTeXInline, rawLaTeXBlock, inlineCommand, - tokenize, - untokenize ) where import Control.Applicative (many, optional, (<|>)) diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index a5a39d3c9..db58b333d 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -292,6 +292,15 @@ applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|> Left e -> Prelude.fail (show e) Right s' -> return s' +{- +When tokenize or untokenize change, test with this +QuickCheck property: + +> tokUntokRoundtrip :: String -> Bool +> tokUntokRoundtrip s = +> let t = T.pack s in untokenize (tokenize "random" t) == t +-} + tokenize :: SourceName -> Text -> [Tok] tokenize sourcename = totoks (initialPos sourcename) diff --git a/test/Tests/Readers/LaTeX.hs b/test/Tests/Readers/LaTeX.hs index 8385b751e..9388fd040 100644 --- a/test/Tests/Readers/LaTeX.hs +++ b/test/Tests/Readers/LaTeX.hs @@ -15,10 +15,8 @@ module Tests.Readers.LaTeX (tests) where import Data.Text (Text) import qualified Data.Text as T import qualified Text.Pandoc.UTF8 as UTF8 -import Text.Pandoc.Readers.LaTeX (tokenize, untokenize) import Test.Tasty import Test.Tasty.HUnit -import Test.Tasty.QuickCheck import Tests.Helpers import Text.Pandoc import Text.Pandoc.Arbitrary () @@ -47,21 +45,8 @@ simpleTable' aligns rows where toRow = Row nullAttr . map simpleCell -tokUntokRt :: String -> Bool -tokUntokRt s = untokenize (tokenize "random" t) == t - where t = T.pack s - tests :: [TestTree] -tests = [ testGroup "tokenization" - [ testCase "tokenizer round trip on test case" $ do - orig <- UTF8.readFile "../test/latex-reader.latex" - let new = untokenize $ tokenize "../test/latex-reader.latex" - orig - assertEqual "untokenize . tokenize is identity" orig new - , testProperty "untokenize . tokenize is identity" tokUntokRt - ] - - , testGroup "basic" +tests = [ testGroup "basic" [ "simple" =: "word" =?> para "word" , "space" =: -- cgit v1.2.3 From 6a6291d9e3c1ce9c3453318d87239b6f9260d924 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 28 Feb 2021 23:05:35 -0800 Subject: Change T.P.Readers.LaTeX.SIunitx to export a command map... instead of individual commands. --- src/Text/Pandoc/Readers/LaTeX.hs | 11 ++--------- src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 21 ++++++++++++++------- 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index e63fbc185..304584072 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -62,7 +62,7 @@ import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments, import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments) import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47, babelLangToBCP47, setDefaultLanguage) -import Text.Pandoc.Readers.LaTeX.SIunitx +import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands) import Text.Pandoc.Shared import qualified Text.Pandoc.Translations as Translations import Text.Pandoc.Walk @@ -426,6 +426,7 @@ inlineCommands = M.union inlineLanguageCommands $ M.union (accentCommands tok) $ M.union (citationCommands inline) $ + M.union (siunitxCommands tok) $ M.fromList [ ("emph", extractSpaces emph <$> tok) , ("textit", extractSpaces emph <$> tok) @@ -594,14 +595,6 @@ inlineCommands = , ("Acfp", doAcronymPlural "full") , ("Acsp", doAcronymPlural "abbrv") , ("Aclp", doAcronymPlural "long") - -- siuntix - , ("si", skipopts *> dosi tok) - , ("SI", doSI tok) - , ("SIrange", doSIrange True tok) - , ("numrange", doSIrange False tok) - , ("numlist", doSInumlist) - , ("num", doSInum) - , ("ang", doSIang) -- hyphenat , ("bshyp", lit "\\\173") , ("fshyp", lit "/\173") diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs index db9c276e7..1952f4e1a 100644 --- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs +++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs @@ -1,12 +1,6 @@ {-# LANGUAGE OverloadedStrings #-} module Text.Pandoc.Readers.LaTeX.SIunitx - ( dosi - , doSI - , doSIrange - , doSInum - , doSInumlist - , doSIang - ) + ( siunitxCommands ) where import Text.Pandoc.Builder import Text.Pandoc.Readers.LaTeX.Parsing @@ -21,6 +15,19 @@ import Data.Text (Text) import qualified Data.Text as T import Data.List (intersperse) + +siunitxCommands :: PandocMonad m + => LP m Inlines -> M.Map Text (LP m Inlines) +siunitxCommands tok = M.fromList + [ ("si", skipopts *> dosi tok) + , ("SI", doSI tok) + , ("SIrange", doSIrange True tok) + , ("numrange", doSIrange False tok) + , ("numlist", doSInumlist) + , ("num", doSInum) + , ("ang", doSIang) + ] + dosi :: PandocMonad m => LP m Inlines -> LP m Inlines dosi tok = grouped (siUnit tok) <|> siUnit tok -- cgit v1.2.3 From 3793ed8beb5da0a8afbe8d3b121c2a1cd5bece44 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 28 Feb 2021 23:43:55 -0800 Subject: Removed unnecessary pragmas. --- src/Text/Pandoc/Readers/LaTeX.hs | 2 -- 1 file changed, 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 304584072..9ad94e417 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1,6 +1,4 @@ {-# LANGUAGE CPP #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE ScopedTypeVariables #-} -- cgit v1.2.3 From e1454fe0d0e2f1cb4e9c5753f095a1f0a8580ffe Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Mon, 1 Mar 2021 14:14:58 +0100 Subject: Jira writer: use Span identifiers as anchors Closes: tarleb/jira-wiki-markup#3. --- src/Text/Pandoc/Writers/Jira.hs | 4 +++- test/Tests/Writers/Jira.hs | 9 ++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Jira.hs b/src/Text/Pandoc/Writers/Jira.hs index c21085a4f..131896201 100644 --- a/src/Text/Pandoc/Writers/Jira.hs +++ b/src/Text/Pandoc/Writers/Jira.hs @@ -292,7 +292,9 @@ quotedToJira qtype xs = do spanToJira :: PandocMonad m => Attr -> [Inline] -> JiraConverter m [Jira.Inline] -spanToJira (_, _classes, _) = toJiraInlines +spanToJira (ident, _classes, _attribs) inls = case ident of + "" -> toJiraInlines inls + _ -> (Jira.Anchor ident :) <$> toJiraInlines inls registerNotes :: PandocMonad m => [Block] -> JiraConverter m [Jira.Inline] registerNotes contents = do diff --git a/test/Tests/Writers/Jira.hs b/test/Tests/Writers/Jira.hs index 93d830c94..aff8348d4 100644 --- a/test/Tests/Writers/Jira.hs +++ b/test/Tests/Writers/Jira.hs @@ -3,6 +3,7 @@ module Tests.Writers.Jira (tests) where import Data.Text (unpack) import Test.Tasty +import Test.Tasty.HUnit (HasCallStack) import Tests.Helpers import Text.Pandoc import Text.Pandoc.Arbitrary () @@ -12,7 +13,7 @@ jira :: (ToPandoc a) => a -> String jira = unpack . purely (writeJira def) . toPandoc infix 4 =: -(=:) :: (ToString a, ToPandoc a) +(=:) :: (ToString a, ToPandoc a, HasCallStack) => String -> (a, String) -> TestTree (=:) = test jira @@ -61,5 +62,11 @@ tests = linkWith ("", ["user-account"], []) "~johndoe" "" "~johndoe" =?> "[~johndoe]" ] + + , testGroup "spans" + [ "id is used as anchor" =: + spanWith ("unicorn", [], []) (str "Unicorn") =?> + "{anchor:unicorn}Unicorn" + ] ] ] -- cgit v1.2.3 From 382f0e23d22b15aaa9fe2aeb6117ef0a102e379d Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 1 Mar 2021 08:55:42 -0800 Subject: Factor out T.P.Readers.LaTeX.Macro. --- pandoc.cabal | 1 + src/Text/Pandoc/Readers/LaTeX.hs | 141 +----------------------------- src/Text/Pandoc/Readers/LaTeX/Macro.hs | 153 +++++++++++++++++++++++++++++++++ 3 files changed, 156 insertions(+), 139 deletions(-) create mode 100644 src/Text/Pandoc/Readers/LaTeX/Macro.hs (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index 56a9491f2..c3317f24b 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -634,6 +634,7 @@ library Text.Pandoc.Readers.LaTeX.Citation, Text.Pandoc.Readers.LaTeX.Math, Text.Pandoc.Readers.LaTeX.Table, + Text.Pandoc.Readers.LaTeX.Macro, Text.Pandoc.Readers.Odt.Base, Text.Pandoc.Readers.Odt.Namespaces, Text.Pandoc.Readers.Odt.StyleReader, diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 9ad94e417..fa77595b9 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -47,8 +47,7 @@ import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline, optional, space, spaces, withRaw, (<|>)) -import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..), - ArgSpec (..), Tok (..), TokType (..)) +import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..)) import Text.Pandoc.Readers.LaTeX.Parsing import Text.Pandoc.Readers.LaTeX.Accent (accentCommands) import Text.Pandoc.Readers.LaTeX.Citation (citationCommands, cites) @@ -58,6 +57,7 @@ import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments, newtheorem, theoremstyle, proof, theoremEnvironment) import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments) +import Text.Pandoc.Readers.LaTeX.Macro (macroDef) import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47, babelLangToBCP47, setDefaultLanguage) import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands) @@ -1027,143 +1027,6 @@ authors = try $ do egroup addMeta "author" (map trimInlines auths) -macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a -macroDef constructor = do - (_, s) <- withRaw (commandDef <|> environmentDef) - (constructor (untokenize s) <$ - guardDisabled Ext_latex_macros) - <|> return mempty - where commandDef = do - (name, macro') <- newcommand <|> letmacro <|> defmacro - guardDisabled Ext_latex_macros <|> - updateState (\s -> s{ sMacros = M.insert name macro' (sMacros s) }) - environmentDef = do - mbenv <- newenvironment - case mbenv of - Nothing -> return () - Just (name, macro1, macro2) -> - guardDisabled Ext_latex_macros <|> - do updateState $ \s -> s{ sMacros = - M.insert name macro1 (sMacros s) } - updateState $ \s -> s{ sMacros = - M.insert ("end" <> name) macro2 (sMacros s) } - -- @\newenvironment{envname}[n-args][default]{begin}{end}@ - -- is equivalent to - -- @\newcommand{\envname}[n-args][default]{begin}@ - -- @\newcommand{\endenvname}@ - -letmacro :: PandocMonad m => LP m (Text, Macro) -letmacro = do - controlSeq "let" - (name, contents) <- withVerbatimMode $ do - Tok _ (CtrlSeq name) _ <- anyControlSeq - optional $ symbol '=' - spaces - -- we first parse in verbatim mode, and then expand macros, - -- because we don't want \let\foo\bar to turn into - -- \let\foo hello if we have previously \def\bar{hello} - contents <- bracedOrToken - return (name, contents) - contents' <- doMacros' 0 contents - return (name, Macro ExpandWhenDefined [] Nothing contents') - -defmacro :: PandocMonad m => LP m (Text, Macro) -defmacro = try $ - -- we use withVerbatimMode, because macros are to be expanded - -- at point of use, not point of definition - withVerbatimMode $ do - controlSeq "def" - Tok _ (CtrlSeq name) _ <- anyControlSeq - argspecs <- many (argspecArg <|> argspecPattern) - contents <- bracedOrToken - return (name, Macro ExpandWhenUsed argspecs Nothing contents) - -argspecArg :: PandocMonad m => LP m ArgSpec -argspecArg = do - Tok _ (Arg i) _ <- satisfyTok isArgTok - return $ ArgNum i - -argspecPattern :: PandocMonad m => LP m ArgSpec -argspecPattern = - Pattern <$> many1 (satisfyTok (\(Tok _ toktype' txt) -> - (toktype' == Symbol || toktype' == Word) && - (txt /= "{" && txt /= "\\" && txt /= "}"))) - -newcommand :: PandocMonad m => LP m (Text, Macro) -newcommand = do - pos <- getPosition - Tok _ (CtrlSeq mtype) _ <- controlSeq "newcommand" <|> - controlSeq "renewcommand" <|> - controlSeq "providecommand" <|> - controlSeq "DeclareMathOperator" <|> - controlSeq "DeclareRobustCommand" - withVerbatimMode $ do - Tok _ (CtrlSeq name) txt <- do - optional (symbol '*') - anyControlSeq <|> - (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}') - spaces - numargs <- option 0 $ try bracketedNum - let argspecs = map ArgNum [1..numargs] - spaces - optarg <- option Nothing $ Just <$> try bracketedToks - spaces - contents' <- bracedOrToken - let contents = - case mtype of - "DeclareMathOperator" -> - Tok pos (CtrlSeq "mathop") "\\mathop" - : Tok pos Symbol "{" - : Tok pos (CtrlSeq "mathrm") "\\mathrm" - : Tok pos Symbol "{" - : (contents' ++ - [ Tok pos Symbol "}", Tok pos Symbol "}" ]) - _ -> contents' - macros <- sMacros <$> getState - case M.lookup name macros of - Just macro - | mtype == "newcommand" -> do - report $ MacroAlreadyDefined txt pos - return (name, macro) - | mtype == "providecommand" -> return (name, macro) - _ -> return (name, Macro ExpandWhenUsed argspecs optarg contents) - -newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro)) -newenvironment = do - pos <- getPosition - Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|> - controlSeq "renewenvironment" <|> - controlSeq "provideenvironment" - withVerbatimMode $ do - optional $ symbol '*' - spaces - name <- untokenize <$> braced - spaces - numargs <- option 0 $ try bracketedNum - spaces - optarg <- option Nothing $ Just <$> try bracketedToks - let argspecs = map (\i -> ArgNum i) [1..numargs] - startcontents <- spaces >> bracedOrToken - endcontents <- spaces >> bracedOrToken - macros <- sMacros <$> getState - case M.lookup name macros of - Just _ - | mtype == "newenvironment" -> do - report $ MacroAlreadyDefined name pos - return Nothing - | mtype == "provideenvironment" -> - return Nothing - _ -> return $ Just (name, - Macro ExpandWhenUsed argspecs optarg startcontents, - Macro ExpandWhenUsed [] Nothing endcontents) - -bracketedNum :: PandocMonad m => LP m Int -bracketedNum = do - ds <- untokenize <$> bracketedToks - case safeRead ds of - Just i -> return i - _ -> return 0 - looseItem :: PandocMonad m => LP m Blocks looseItem = do inListItem <- sInListItem <$> getState diff --git a/src/Text/Pandoc/Readers/LaTeX/Macro.hs b/src/Text/Pandoc/Readers/LaTeX/Macro.hs new file mode 100644 index 000000000..607f5438c --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX/Macro.hs @@ -0,0 +1,153 @@ +{-# LANGUAGE OverloadedStrings #-} +module Text.Pandoc.Readers.LaTeX.Macro + ( macroDef + ) +where +import Text.Pandoc.Extensions (Extension(..)) +import Text.Pandoc.Logging (LogMessage(MacroAlreadyDefined)) +import Text.Pandoc.Readers.LaTeX.Parsing +import Text.Pandoc.Readers.LaTeX.Types +import Text.Pandoc.Class +import Text.Pandoc.Shared (safeRead) +import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline, + optional, space, spaces, withRaw, (<|>)) +import Control.Applicative ((<|>), optional) +import qualified Data.Map as M +import Data.Text (Text) + +macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a +macroDef constructor = do + (_, s) <- withRaw (commandDef <|> environmentDef) + (constructor (untokenize s) <$ + guardDisabled Ext_latex_macros) + <|> return mempty + where commandDef = do + (name, macro') <- newcommand <|> letmacro <|> defmacro + guardDisabled Ext_latex_macros <|> + updateState (\s -> s{ sMacros = M.insert name macro' (sMacros s) }) + environmentDef = do + mbenv <- newenvironment + case mbenv of + Nothing -> return () + Just (name, macro1, macro2) -> + guardDisabled Ext_latex_macros <|> + do updateState $ \s -> s{ sMacros = + M.insert name macro1 (sMacros s) } + updateState $ \s -> s{ sMacros = + M.insert ("end" <> name) macro2 (sMacros s) } + -- @\newenvironment{envname}[n-args][default]{begin}{end}@ + -- is equivalent to + -- @\newcommand{\envname}[n-args][default]{begin}@ + -- @\newcommand{\endenvname}@ + +letmacro :: PandocMonad m => LP m (Text, Macro) +letmacro = do + controlSeq "let" + (name, contents) <- withVerbatimMode $ do + Tok _ (CtrlSeq name) _ <- anyControlSeq + optional $ symbol '=' + spaces + -- we first parse in verbatim mode, and then expand macros, + -- because we don't want \let\foo\bar to turn into + -- \let\foo hello if we have previously \def\bar{hello} + contents <- bracedOrToken + return (name, contents) + contents' <- doMacros' 0 contents + return (name, Macro ExpandWhenDefined [] Nothing contents') + +defmacro :: PandocMonad m => LP m (Text, Macro) +defmacro = try $ + -- we use withVerbatimMode, because macros are to be expanded + -- at point of use, not point of definition + withVerbatimMode $ do + controlSeq "def" + Tok _ (CtrlSeq name) _ <- anyControlSeq + argspecs <- many (argspecArg <|> argspecPattern) + contents <- bracedOrToken + return (name, Macro ExpandWhenUsed argspecs Nothing contents) + +argspecArg :: PandocMonad m => LP m ArgSpec +argspecArg = do + Tok _ (Arg i) _ <- satisfyTok isArgTok + return $ ArgNum i + +argspecPattern :: PandocMonad m => LP m ArgSpec +argspecPattern = + Pattern <$> many1 (satisfyTok (\(Tok _ toktype' txt) -> + (toktype' == Symbol || toktype' == Word) && + (txt /= "{" && txt /= "\\" && txt /= "}"))) + +newcommand :: PandocMonad m => LP m (Text, Macro) +newcommand = do + pos <- getPosition + Tok _ (CtrlSeq mtype) _ <- controlSeq "newcommand" <|> + controlSeq "renewcommand" <|> + controlSeq "providecommand" <|> + controlSeq "DeclareMathOperator" <|> + controlSeq "DeclareRobustCommand" + withVerbatimMode $ do + Tok _ (CtrlSeq name) txt <- do + optional (symbol '*') + anyControlSeq <|> + (symbol '{' *> spaces *> anyControlSeq <* spaces <* symbol '}') + spaces + numargs <- option 0 $ try bracketedNum + let argspecs = map ArgNum [1..numargs] + spaces + optarg <- option Nothing $ Just <$> try bracketedToks + spaces + contents' <- bracedOrToken + let contents = + case mtype of + "DeclareMathOperator" -> + Tok pos (CtrlSeq "mathop") "\\mathop" + : Tok pos Symbol "{" + : Tok pos (CtrlSeq "mathrm") "\\mathrm" + : Tok pos Symbol "{" + : (contents' ++ + [ Tok pos Symbol "}", Tok pos Symbol "}" ]) + _ -> contents' + macros <- sMacros <$> getState + case M.lookup name macros of + Just macro + | mtype == "newcommand" -> do + report $ MacroAlreadyDefined txt pos + return (name, macro) + | mtype == "providecommand" -> return (name, macro) + _ -> return (name, Macro ExpandWhenUsed argspecs optarg contents) + +newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro)) +newenvironment = do + pos <- getPosition + Tok _ (CtrlSeq mtype) _ <- controlSeq "newenvironment" <|> + controlSeq "renewenvironment" <|> + controlSeq "provideenvironment" + withVerbatimMode $ do + optional $ symbol '*' + spaces + name <- untokenize <$> braced + spaces + numargs <- option 0 $ try bracketedNum + spaces + optarg <- option Nothing $ Just <$> try bracketedToks + let argspecs = map (\i -> ArgNum i) [1..numargs] + startcontents <- spaces >> bracedOrToken + endcontents <- spaces >> bracedOrToken + macros <- sMacros <$> getState + case M.lookup name macros of + Just _ + | mtype == "newenvironment" -> do + report $ MacroAlreadyDefined name pos + return Nothing + | mtype == "provideenvironment" -> + return Nothing + _ -> return $ Just (name, + Macro ExpandWhenUsed argspecs optarg startcontents, + Macro ExpandWhenUsed [] Nothing endcontents) + +bracketedNum :: PandocMonad m => LP m Int +bracketedNum = do + ds <- untokenize <$> bracketedToks + case safeRead ds of + Just i -> return i + _ -> return 0 -- cgit v1.2.3 From 7f1b933aaacf4a01fb23fa4989f190098e96e702 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 1 Mar 2021 09:02:10 -0800 Subject: Make T.P.Readers.LaTeX.Types an unexported module. [API change] This is really an implementation detail that shouldn't be exposed in the public API. --- pandoc.cabal | 2 +- src/Text/Pandoc/Readers/LaTeX.hs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index c3317f24b..ad325ee24 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -518,7 +518,6 @@ library Text.Pandoc.Readers, Text.Pandoc.Readers.HTML, Text.Pandoc.Readers.LaTeX, - Text.Pandoc.Readers.LaTeX.Types, Text.Pandoc.Readers.Markdown, Text.Pandoc.Readers.CommonMark, Text.Pandoc.Readers.Creole, @@ -627,6 +626,7 @@ library Text.Pandoc.Readers.HTML.Table, Text.Pandoc.Readers.HTML.TagCategories, Text.Pandoc.Readers.HTML.Types, + Text.Pandoc.Readers.LaTeX.Types, Text.Pandoc.Readers.LaTeX.Parsing, Text.Pandoc.Readers.LaTeX.Lang, Text.Pandoc.Readers.LaTeX.SIunitx, diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index fa77595b9..fc85f0545 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -19,7 +19,7 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX, applyMacros, rawLaTeXInline, rawLaTeXBlock, - inlineCommand, + inlineCommand ) where import Control.Applicative (many, optional, (<|>)) -- cgit v1.2.3 From 2097411e4f4da0f0cd2fb4fdbb4759b6da600289 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 2 Mar 2021 21:06:56 -0800 Subject: Split up T.P.Writers.Markdown... with T.P.Writers.Markdown.Types and T.P.Writers.Markdown.Inline. The module was difficult to compile on low-memory system.s --- pandoc.cabal | 2 + src/Text/Pandoc/Writers/Markdown.hs | 597 +--------------------------- src/Text/Pandoc/Writers/Markdown/Inline.hs | 601 +++++++++++++++++++++++++++++ src/Text/Pandoc/Writers/Markdown/Types.hs | 81 ++++ 4 files changed, 690 insertions(+), 591 deletions(-) create mode 100644 src/Text/Pandoc/Writers/Markdown/Inline.hs create mode 100644 src/Text/Pandoc/Writers/Markdown/Types.hs (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index ad325ee24..3aa29b477 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -665,6 +665,8 @@ library Text.Pandoc.Writers.LaTeX.Notes, Text.Pandoc.Writers.LaTeX.Table, Text.Pandoc.Writers.LaTeX.Types, + Text.Pandoc.Writers.Markdown.Types, + Text.Pandoc.Writers.Markdown.Inline, Text.Pandoc.Writers.Roff, Text.Pandoc.Writers.Powerpoint.Presentation, Text.Pandoc.Writers.Powerpoint.Output, diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index d33246a63..533bcc071 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -22,15 +22,13 @@ module Text.Pandoc.Writers.Markdown ( writePlain) where import Control.Monad.Reader import Control.Monad.State.Strict -import Data.Char (isAlphaNum, isDigit) import Data.Default -import Data.List (find, intersperse, sortOn, transpose) +import Data.List (intersperse, sortOn, transpose) import qualified Data.Map as M import Data.Maybe (fromMaybe, mapMaybe) import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T -import Network.HTTP (urlEncode) import Text.HTML.TagSoup (Tag (..), isTagText, parseTags) import Text.Pandoc.Class.PandocMonad (PandocMonad, report) import Text.Pandoc.Definition @@ -44,59 +42,11 @@ import Text.Pandoc.Templates (renderTemplate) import Text.DocTemplates (Val(..), Context(..), FromContext(..)) import Text.Pandoc.Walk import Text.Pandoc.Writers.HTML (writeHtml5String) -import Text.Pandoc.Writers.Math (texMathToInlines) -import Text.Pandoc.XML (toHtml5Entities) -import Data.Coerce (coerce) - -type Notes = [[Block]] -type Ref = (Text, Target, Attr) -type Refs = [Ref] - -type MD m = ReaderT WriterEnv (StateT WriterState m) - -evalMD :: PandocMonad m => MD m a -> WriterEnv -> WriterState -> m a -evalMD md env st = evalStateT (runReaderT md env) st - -data WriterEnv = WriterEnv { envInList :: Bool - , envVariant :: MarkdownVariant - , envRefShortcutable :: Bool - , envBlockLevel :: Int - , envEscapeSpaces :: Bool - } - -data MarkdownVariant = - PlainText - | Commonmark - | Markdown - deriving (Show, Eq) - -instance Default WriterEnv - where def = WriterEnv { envInList = False - , envVariant = Markdown - , envRefShortcutable = True - , envBlockLevel = 0 - , envEscapeSpaces = False - } - -data WriterState = WriterState { stNotes :: Notes - , stPrevRefs :: Refs - , stRefs :: Refs - , stKeys :: M.Map Key - (M.Map (Target, Attr) Int) - , stLastIdx :: Int - , stIds :: Set.Set Text - , stNoteNum :: Int - } - -instance Default WriterState - where def = WriterState{ stNotes = [] - , stPrevRefs = [] - , stRefs = [] - , stKeys = M.empty - , stLastIdx = 0 - , stIds = Set.empty - , stNoteNum = 1 - } +import Text.Pandoc.Writers.Markdown.Inline (inlineListToMarkdown) +import Text.Pandoc.Writers.Markdown.Types (MarkdownVariant(..), + WriterState(..), + WriterEnv(..), + Ref, Refs, MD, evalMD) -- | Convert Pandoc to Markdown. writeMarkdown :: PandocMonad m => WriterOptions -> Pandoc -> m Text @@ -297,49 +247,6 @@ noteToMarkdown opts num blocks = do then hang (writerTabStop opts) (marker <> spacer) contents else marker <> spacer <> contents --- | Escape special characters for Markdown. -escapeText :: WriterOptions -> Text -> Text -escapeText opts = T.pack . go . T.unpack - where - go [] = [] - go (c:cs) = - case c of - '<' | isEnabled Ext_all_symbols_escapable opts -> - '\\' : '<' : go cs - | otherwise -> "<" ++ go cs - '>' | isEnabled Ext_all_symbols_escapable opts -> - '\\' : '>' : go cs - | otherwise -> ">" ++ go cs - '@' | isEnabled Ext_citations opts -> - case cs of - (d:_) - | isAlphaNum d || d == '_' - -> '\\':'@':go cs - _ -> '@':go cs - _ | c `elem` ['\\','`','*','_','[',']','#'] -> - '\\':c:go cs - '|' | isEnabled Ext_pipe_tables opts -> '\\':'|':go cs - '^' | isEnabled Ext_superscript opts -> '\\':'^':go cs - '~' | isEnabled Ext_subscript opts || - isEnabled Ext_strikeout opts -> '\\':'~':go cs - '$' | isEnabled Ext_tex_math_dollars opts -> '\\':'$':go cs - '\'' | isEnabled Ext_smart opts -> '\\':'\'':go cs - '"' | isEnabled Ext_smart opts -> '\\':'"':go cs - '-' | isEnabled Ext_smart opts -> - case cs of - '-':_ -> '\\':'-':go cs - _ -> '-':go cs - '.' | isEnabled Ext_smart opts -> - case cs of - '.':'.':rest -> '\\':'.':'.':'.':go rest - _ -> '.':go cs - _ -> case cs of - '_':x:xs - | isEnabled Ext_intraword_underscores opts - , isAlphaNum c - , isAlphaNum x -> c : '_' : x : go xs - _ -> c : go cs - attrsToMarkdown :: Attr -> Doc Text attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys] where attribId = case attribs of @@ -912,499 +819,7 @@ blockListToMarkdown opts blocks = do | otherwise = RawBlock "markdown" " \n" mconcat <$> mapM (blockToMarkdown opts) (fixBlocks blocks) -getKey :: Doc Text -> Key -getKey = toKey . render Nothing - -findUsableIndex :: [Text] -> Int -> Int -findUsableIndex lbls i = if tshow i `elem` lbls - then findUsableIndex lbls (i + 1) - else i - -getNextIndex :: PandocMonad m => MD m Int -getNextIndex = do - prevRefs <- gets stPrevRefs - refs <- gets stRefs - i <- (+ 1) <$> gets stLastIdx - modify $ \s -> s{ stLastIdx = i } - let refLbls = map (\(r,_,_) -> r) $ prevRefs ++ refs - return $ findUsableIndex refLbls i - --- | Get reference for target; if none exists, create unique one and return. --- Prefer label if possible; otherwise, generate a unique key. -getReference :: PandocMonad m => Attr -> Doc Text -> Target -> MD m Text -getReference attr label target = do - refs <- gets stRefs - case find (\(_,t,a) -> t == target && a == attr) refs of - Just (ref, _, _) -> return ref - Nothing -> do - keys <- gets stKeys - let key = getKey label - let rawkey = coerce key - case M.lookup key keys of - Nothing -> do -- no other refs with this label - (lab', idx) <- if T.null rawkey || - T.length rawkey > 999 || - T.any (\c -> c == '[' || c == ']') rawkey - then do - i <- getNextIndex - return (tshow i, i) - else - return (render Nothing label, 0) - modify (\s -> s{ - stRefs = (lab', target, attr) : refs, - stKeys = M.insert (getKey label) - (M.insert (target, attr) idx mempty) - (stKeys s) }) - return lab' - - Just km -> -- we have refs with this label - case M.lookup (target, attr) km of - Just i -> do - let lab' = render Nothing $ - label <> if i == 0 - then mempty - else literal (tshow i) - -- make sure it's in stRefs; it may be - -- a duplicate that was printed in a previous - -- block: - when ((lab', target, attr) `notElem` refs) $ - modify (\s -> s{ - stRefs = (lab', target, attr) : refs }) - return lab' - Nothing -> do -- but this one is to a new target - i <- getNextIndex - let lab' = tshow i - modify (\s -> s{ - stRefs = (lab', target, attr) : refs, - stKeys = M.insert key - (M.insert (target, attr) i km) - (stKeys s) }) - return lab' - --- | Convert list of Pandoc inline elements to markdown. -inlineListToMarkdown :: PandocMonad m => WriterOptions -> [Inline] -> MD m (Doc Text) -inlineListToMarkdown opts lst = do - inlist <- asks envInList - go (if inlist then avoidBadWrapsInList lst else lst) - where go [] = return empty - go (x@Math{}:y@(Str t):zs) - | T.all isDigit (T.take 1 t) -- starts with digit -- see #7058 - = liftM2 (<>) (inlineToMarkdown opts x) - (go (RawInline (Format "html") "<!-- -->" : y : zs)) - go (i:is) = case i of - Link {} -> case is of - -- If a link is followed by another link, or '[', '(' or ':' - -- then we don't shortcut - Link {}:_ -> unshortcutable - Space:Link {}:_ -> unshortcutable - Space:(Str(thead -> Just '[')):_ -> unshortcutable - Space:(RawInline _ (thead -> Just '[')):_ -> unshortcutable - Space:(Cite _ _):_ -> unshortcutable - SoftBreak:Link {}:_ -> unshortcutable - SoftBreak:(Str(thead -> Just '[')):_ -> unshortcutable - SoftBreak:(RawInline _ (thead -> Just '[')):_ -> unshortcutable - SoftBreak:(Cite _ _):_ -> unshortcutable - LineBreak:Link {}:_ -> unshortcutable - LineBreak:(Str(thead -> Just '[')):_ -> unshortcutable - LineBreak:(RawInline _ (thead -> Just '[')):_ -> unshortcutable - LineBreak:(Cite _ _):_ -> unshortcutable - (Cite _ _):_ -> unshortcutable - Str (thead -> Just '['):_ -> unshortcutable - Str (thead -> Just '('):_ -> unshortcutable - Str (thead -> Just ':'):_ -> unshortcutable - (RawInline _ (thead -> Just '[')):_ -> unshortcutable - (RawInline _ (thead -> Just '(')):_ -> unshortcutable - (RawInline _ (thead -> Just ':')):_ -> unshortcutable - (RawInline _ (T.stripPrefix " [" -> Just _ )):_ -> unshortcutable - _ -> shortcutable - _ -> shortcutable - where shortcutable = liftM2 (<>) (inlineToMarkdown opts i) (go is) - unshortcutable = do - iMark <- local - (\env -> env { envRefShortcutable = False }) - (inlineToMarkdown opts i) - fmap (iMark <>) (go is) - thead = fmap fst . T.uncons - -isSp :: Inline -> Bool -isSp Space = True -isSp SoftBreak = True -isSp _ = False - -avoidBadWrapsInList :: [Inline] -> [Inline] -avoidBadWrapsInList [] = [] -avoidBadWrapsInList (s:Str (T.uncons -> Just ('>',cs)):xs) | isSp s = - Str (" >" <> cs) : avoidBadWrapsInList xs -avoidBadWrapsInList [s, Str (T.uncons -> Just (c, cs))] - | T.null cs && isSp s && c `elem` ['-','*','+'] = [Str $ T.pack [' ', c]] -avoidBadWrapsInList (s:Str (T.uncons -> Just (c, cs)):Space:xs) - | T.null cs && isSp s && c `elem` ['-','*','+'] = - Str (T.pack [' ', c]) : Space : avoidBadWrapsInList xs -avoidBadWrapsInList (s:Str cs:Space:xs) - | isSp s && isOrderedListMarker cs = - Str (" " <> cs) : Space : avoidBadWrapsInList xs -avoidBadWrapsInList [s, Str cs] - | isSp s && isOrderedListMarker cs = [Str $ " " <> cs] -avoidBadWrapsInList (x:xs) = x : avoidBadWrapsInList xs - -isOrderedListMarker :: Text -> Bool -isOrderedListMarker xs = not (T.null xs) && (T.last xs `elem` ['.',')']) && - isRight (runParser (anyOrderedListMarker >> eof) - defaultParserState "" xs) - -isRight :: Either a b -> Bool -isRight (Right _) = True -isRight (Left _) = False - --- | Convert Pandoc inline element to markdown. -inlineToMarkdown :: PandocMonad m => WriterOptions -> Inline -> MD m (Doc Text) -inlineToMarkdown opts (Span ("",["emoji"],kvs) [Str s]) = - case lookup "data-emoji" kvs of - Just emojiname | isEnabled Ext_emoji opts -> - return $ ":" <> literal emojiname <> ":" - _ -> inlineToMarkdown opts (Str s) -inlineToMarkdown opts (Span attrs ils) = do - variant <- asks envVariant - contents <- inlineListToMarkdown opts ils - return $ case attrs of - (_,["csl-block"],_) -> (cr <>) - (_,["csl-left-margin"],_) -> (cr <>) - (_,["csl-indent"],_) -> (cr <>) - _ -> id - $ case variant of - PlainText -> contents - _ | attrs == nullAttr -> contents - | isEnabled Ext_bracketed_spans opts -> - let attrs' = if attrs /= nullAttr - then attrsToMarkdown attrs - else empty - in "[" <> contents <> "]" <> attrs' - | isEnabled Ext_raw_html opts || - isEnabled Ext_native_spans opts -> - tagWithAttrs "span" attrs <> contents <> literal "</span>" - | otherwise -> contents -inlineToMarkdown _ (Emph []) = return empty -inlineToMarkdown opts (Emph lst) = do - variant <- asks envVariant - contents <- inlineListToMarkdown opts lst - return $ case variant of - PlainText - | isEnabled Ext_gutenberg opts -> "_" <> contents <> "_" - | otherwise -> contents - _ -> "*" <> contents <> "*" -inlineToMarkdown _ (Underline []) = return empty -inlineToMarkdown opts (Underline lst) = do - variant <- asks envVariant - contents <- inlineListToMarkdown opts lst - case variant of - PlainText -> return contents - _ | isEnabled Ext_bracketed_spans opts -> - return $ "[" <> contents <> "]" <> "{.ul}" - | isEnabled Ext_native_spans opts -> - return $ tagWithAttrs "span" ("", ["underline"], []) - <> contents - <> literal "</span>" - | isEnabled Ext_raw_html opts -> - return $ "<u>" <> contents <> "</u>" - | otherwise -> inlineToMarkdown opts (Emph lst) -inlineToMarkdown _ (Strong []) = return empty -inlineToMarkdown opts (Strong lst) = do - variant <- asks envVariant - case variant of - PlainText -> - inlineListToMarkdown opts $ - if isEnabled Ext_gutenberg opts - then capitalize lst - else lst - _ -> do - contents <- inlineListToMarkdown opts lst - return $ "**" <> contents <> "**" -inlineToMarkdown _ (Strikeout []) = return empty -inlineToMarkdown opts (Strikeout lst) = do - contents <- inlineListToMarkdown opts lst - return $ if isEnabled Ext_strikeout opts - then "~~" <> contents <> "~~" - else if isEnabled Ext_raw_html opts - then "<s>" <> contents <> "</s>" - else contents -inlineToMarkdown _ (Superscript []) = return empty -inlineToMarkdown opts (Superscript lst) = - local (\env -> env {envEscapeSpaces = envVariant env == Markdown}) $ do - contents <- inlineListToMarkdown opts lst - if isEnabled Ext_superscript opts - then return $ "^" <> contents <> "^" - else if isEnabled Ext_raw_html opts - then return $ "<sup>" <> contents <> "</sup>" - else - case traverse toSuperscriptInline lst of - Just xs' | not (writerPreferAscii opts) - -> inlineListToMarkdown opts xs' - _ -> do - let rendered = render Nothing contents - return $ - case mapM toSuperscript (T.unpack rendered) of - Just r -> literal $ T.pack r - Nothing -> literal $ "^(" <> rendered <> ")" -inlineToMarkdown _ (Subscript []) = return empty -inlineToMarkdown opts (Subscript lst) = - local (\env -> env {envEscapeSpaces = envVariant env == Markdown}) $ do - contents <- inlineListToMarkdown opts lst - if isEnabled Ext_subscript opts - then return $ "~" <> contents <> "~" - else if isEnabled Ext_raw_html opts - then return $ "<sub>" <> contents <> "</sub>" - else - case traverse toSubscriptInline lst of - Just xs' | not (writerPreferAscii opts) - -> inlineListToMarkdown opts xs' - _ -> do - let rendered = render Nothing contents - return $ - case mapM toSuperscript (T.unpack rendered) of - Just r -> literal $ T.pack r - Nothing -> literal $ "_(" <> rendered <> ")" -inlineToMarkdown opts (SmallCaps lst) = do - variant <- asks envVariant - if variant /= PlainText && - (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts) - then inlineToMarkdown opts (Span ("",["smallcaps"],[]) lst) - else inlineListToMarkdown opts $ capitalize lst -inlineToMarkdown opts (Quoted SingleQuote lst) = do - contents <- inlineListToMarkdown opts lst - return $ if isEnabled Ext_smart opts - then "'" <> contents <> "'" - else - if writerPreferAscii opts - then "‘" <> contents <> "’" - else "‘" <> contents <> "’" -inlineToMarkdown opts (Quoted DoubleQuote lst) = do - contents <- inlineListToMarkdown opts lst - return $ if isEnabled Ext_smart opts - then "\"" <> contents <> "\"" - else - if writerPreferAscii opts - then "“" <> contents <> "”" - else "“" <> contents <> "”" -inlineToMarkdown opts (Code attr str) = do - let tickGroups = filter (T.any (== '`')) $ T.group str - let longest = if null tickGroups - then 0 - else maximum $ map T.length tickGroups - let marker = T.replicate (longest + 1) "`" - let spacer = if longest == 0 then "" else " " - let attrs = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr - then attrsToMarkdown attr - else empty - variant <- asks envVariant - case variant of - PlainText -> return $ literal str - _ -> return $ literal - (marker <> spacer <> str <> spacer <> marker) <> attrs -inlineToMarkdown opts (Str str) = do - variant <- asks envVariant - let str' = (if writerPreferAscii opts - then toHtml5Entities - else id) . - (if isEnabled Ext_smart opts - then unsmartify opts - else id) . - (if variant == PlainText - then id - else escapeText opts) $ str - return $ literal str' -inlineToMarkdown opts (Math InlineMath str) = - case writerHTMLMathMethod opts of - WebTeX url -> inlineToMarkdown opts - (Image nullAttr [Str str] (url <> T.pack (urlEncode $ T.unpack str), str)) - _ | isEnabled Ext_tex_math_dollars opts -> - return $ "$" <> literal str <> "$" - | isEnabled Ext_tex_math_single_backslash opts -> - return $ "\\(" <> literal str <> "\\)" - | isEnabled Ext_tex_math_double_backslash opts -> - return $ "\\\\(" <> literal str <> "\\\\)" - | otherwise -> do - variant <- asks envVariant - texMathToInlines InlineMath str >>= - inlineListToMarkdown opts . - (if variant == PlainText then makeMathPlainer else id) -inlineToMarkdown opts (Math DisplayMath str) = - case writerHTMLMathMethod opts of - WebTeX url -> (\x -> blankline <> x <> blankline) `fmap` - inlineToMarkdown opts (Image nullAttr [Str str] - (url <> T.pack (urlEncode $ T.unpack str), str)) - _ | isEnabled Ext_tex_math_dollars opts -> - return $ "$$" <> literal str <> "$$" - | isEnabled Ext_tex_math_single_backslash opts -> - return $ "\\[" <> literal str <> "\\]" - | isEnabled Ext_tex_math_double_backslash opts -> - return $ "\\\\[" <> literal str <> "\\\\]" - | otherwise -> (\x -> cr <> x <> cr) `fmap` - (texMathToInlines DisplayMath str >>= inlineListToMarkdown opts) -inlineToMarkdown opts il@(RawInline f str) = do - let tickGroups = filter (T.any (== '`')) $ T.group str - let numticks = if null tickGroups - then 1 - else 1 + maximum (map T.length tickGroups) - variant <- asks envVariant - let Format fmt = f - let rawAttribInline = return $ - literal (T.replicate numticks "`") <> literal str <> - literal (T.replicate numticks "`") <> literal "{=" <> literal fmt <> literal "}" - let renderEmpty = mempty <$ report (InlineNotRendered il) - case variant of - PlainText -> renderEmpty - Commonmark - | f `elem` ["gfm", "commonmark", "commonmark_x", "markdown"] - -> return $ literal str - Markdown - | f `elem` ["markdown", "markdown_github", "markdown_phpextra", - "markdown_mmd", "markdown_strict"] - -> return $ literal str - _ | isEnabled Ext_raw_attribute opts -> rawAttribInline - | f `elem` ["html", "html5", "html4"] - , isEnabled Ext_raw_html opts - -> return $ literal str - | f `elem` ["latex", "tex"] - , isEnabled Ext_raw_tex opts - -> return $ literal str - _ -> renderEmpty - - -inlineToMarkdown opts LineBreak = do - variant <- asks envVariant - if variant == PlainText || isEnabled Ext_hard_line_breaks opts - then return cr - else return $ - if isEnabled Ext_escaped_line_breaks opts - then "\\" <> cr - else " " <> cr -inlineToMarkdown _ Space = do - escapeSpaces <- asks envEscapeSpaces - return $ if escapeSpaces then "\\ " else space -inlineToMarkdown opts SoftBreak = do - escapeSpaces <- asks envEscapeSpaces - let space' = if escapeSpaces then "\\ " else space - return $ case writerWrapText opts of - WrapNone -> space' - WrapAuto -> space' - WrapPreserve -> cr -inlineToMarkdown opts (Cite [] lst) = inlineListToMarkdown opts lst -inlineToMarkdown opts (Cite (c:cs) lst) - | not (isEnabled Ext_citations opts) = inlineListToMarkdown opts lst - | otherwise = - if citationMode c == AuthorInText - then do - suffs <- inlineListToMarkdown opts $ citationSuffix c - rest <- mapM convertOne cs - let inbr = suffs <+> joincits rest - br = if isEmpty inbr then empty else char '[' <> inbr <> char ']' - return $ literal ("@" <> citationId c) <+> br - else do - cits <- mapM convertOne (c:cs) - return $ literal "[" <> joincits cits <> literal "]" - where - joincits = hcat . intersperse (literal "; ") . filter (not . isEmpty) - convertOne Citation { citationId = k - , citationPrefix = pinlines - , citationSuffix = sinlines - , citationMode = m } - = do - pdoc <- inlineListToMarkdown opts pinlines - sdoc <- inlineListToMarkdown opts sinlines - let k' = literal (modekey m <> "@" <> k) - r = case sinlines of - Str (T.uncons -> Just (y,_)):_ | y `elem` (",;]@" :: String) -> k' <> sdoc - _ -> k' <+> sdoc - return $ pdoc <+> r - modekey SuppressAuthor = "-" - modekey _ = "" -inlineToMarkdown opts lnk@(Link attr txt (src, tit)) = do - variant <- asks envVariant - linktext <- inlineListToMarkdown opts txt - let linktitle = if T.null tit - then empty - else literal $ " \"" <> tit <> "\"" - let srcSuffix = fromMaybe src (T.stripPrefix "mailto:" src) - let useAuto = isURI src && - case txt of - [Str s] | escapeURI s == srcSuffix -> True - _ -> False - let useRefLinks = writerReferenceLinks opts && not useAuto - shortcutable <- asks envRefShortcutable - let useShortcutRefLinks = shortcutable && - isEnabled Ext_shortcut_reference_links opts - reftext <- if useRefLinks - then literal <$> getReference attr linktext (src, tit) - else return mempty - case variant of - PlainText - | useAuto -> return $ literal srcSuffix - | otherwise -> return linktext - _ | useAuto -> return $ "<" <> literal srcSuffix <> ">" - | useRefLinks -> - let first = "[" <> linktext <> "]" - second = if getKey linktext == getKey reftext - then if useShortcutRefLinks - then "" - else "[]" - else "[" <> reftext <> "]" - in return $ first <> second - | isEnabled Ext_raw_html opts - , not (isEnabled Ext_link_attributes opts) - , attr /= nullAttr -> -- use raw HTML to render attributes - literal . T.strip <$> - writeHtml5String opts{ writerTemplate = Nothing } - (Pandoc nullMeta [Plain [lnk]]) - | otherwise -> return $ - "[" <> linktext <> "](" <> literal src <> linktitle <> ")" <> - linkAttributes opts attr -inlineToMarkdown opts img@(Image attr alternate (source, tit)) - | isEnabled Ext_raw_html opts && - not (isEnabled Ext_link_attributes opts) && - attr /= nullAttr = -- use raw HTML - literal . T.strip <$> - writeHtml5String opts{ writerTemplate = Nothing } (Pandoc nullMeta [Plain [img]]) - | otherwise = do - variant <- asks envVariant - let txt = if null alternate || alternate == [Str source] - -- to prevent autolinks - then [Str ""] - else alternate - linkPart <- inlineToMarkdown opts (Link attr txt (source, tit)) - return $ case variant of - PlainText -> "[" <> linkPart <> "]" - _ -> "!" <> linkPart -inlineToMarkdown opts (Note contents) = do - modify (\st -> st{ stNotes = contents : stNotes st }) - st <- get - let ref = literal $ writerIdentifierPrefix opts <> tshow (stNoteNum st + length (stNotes st) - 1) - if isEnabled Ext_footnotes opts - then return $ "[^" <> ref <> "]" - else return $ "[" <> ref <> "]" - -makeMathPlainer :: [Inline] -> [Inline] -makeMathPlainer = walk go - where - go (Emph xs) = Span nullAttr xs - go x = x - lineBreakToSpace :: Inline -> Inline lineBreakToSpace LineBreak = Space lineBreakToSpace SoftBreak = Space lineBreakToSpace x = x - -toSubscriptInline :: Inline -> Maybe Inline -toSubscriptInline Space = Just Space -toSubscriptInline (Span attr ils) = Span attr <$> traverse toSubscriptInline ils -toSubscriptInline (Str s) = Str . T.pack <$> traverse toSubscript (T.unpack s) -toSubscriptInline LineBreak = Just LineBreak -toSubscriptInline SoftBreak = Just SoftBreak -toSubscriptInline _ = Nothing - -toSuperscriptInline :: Inline -> Maybe Inline -toSuperscriptInline Space = Just Space -toSuperscriptInline (Span attr ils) = Span attr <$> traverse toSuperscriptInline ils -toSuperscriptInline (Str s) = Str . T.pack <$> traverse toSuperscript (T.unpack s) -toSuperscriptInline LineBreak = Just LineBreak -toSuperscriptInline SoftBreak = Just SoftBreak -toSuperscriptInline _ = Nothing diff --git a/src/Text/Pandoc/Writers/Markdown/Inline.hs b/src/Text/Pandoc/Writers/Markdown/Inline.hs new file mode 100644 index 000000000..19157701e --- /dev/null +++ b/src/Text/Pandoc/Writers/Markdown/Inline.hs @@ -0,0 +1,601 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE ViewPatterns #-} +{- | + Module : Text.Pandoc.Writers.Markdown.Inline + Copyright : Copyright (C) 2006-2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable +-} +module Text.Pandoc.Writers.Markdown.Inline ( + inlineListToMarkdown + ) where +import Control.Monad.Reader +import Control.Monad.State.Strict +import Data.Char (isAlphaNum, isDigit) +import Data.List (find, intersperse) +import qualified Data.Map as M +import Data.Maybe (fromMaybe) +import Data.Text (Text) +import qualified Data.Text as T +import Network.HTTP (urlEncode) +import Text.Pandoc.Class.PandocMonad (PandocMonad, report) +import Text.Pandoc.Definition +import Text.Pandoc.Logging +import Text.Pandoc.Options +import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space) +import Text.DocLayout +import Text.Pandoc.Shared +import Text.Pandoc.Writers.Shared +import Text.Pandoc.Walk +import Text.Pandoc.Writers.HTML (writeHtml5String) +import Text.Pandoc.Writers.Math (texMathToInlines) +import Text.Pandoc.XML (toHtml5Entities) +import Data.Coerce (coerce) +import Text.Pandoc.Writers.Markdown.Types (MarkdownVariant(..), + WriterState(..), + WriterEnv(..), MD) + +-- | Escape special characters for Markdown. +escapeText :: WriterOptions -> Text -> Text +escapeText opts = T.pack . go . T.unpack + where + go [] = [] + go (c:cs) = + case c of + '<' | isEnabled Ext_all_symbols_escapable opts -> + '\\' : '<' : go cs + | otherwise -> "<" ++ go cs + '>' | isEnabled Ext_all_symbols_escapable opts -> + '\\' : '>' : go cs + | otherwise -> ">" ++ go cs + '@' | isEnabled Ext_citations opts -> + case cs of + (d:_) + | isAlphaNum d || d == '_' + -> '\\':'@':go cs + _ -> '@':go cs + _ | c `elem` ['\\','`','*','_','[',']','#'] -> + '\\':c:go cs + '|' | isEnabled Ext_pipe_tables opts -> '\\':'|':go cs + '^' | isEnabled Ext_superscript opts -> '\\':'^':go cs + '~' | isEnabled Ext_subscript opts || + isEnabled Ext_strikeout opts -> '\\':'~':go cs + '$' | isEnabled Ext_tex_math_dollars opts -> '\\':'$':go cs + '\'' | isEnabled Ext_smart opts -> '\\':'\'':go cs + '"' | isEnabled Ext_smart opts -> '\\':'"':go cs + '-' | isEnabled Ext_smart opts -> + case cs of + '-':_ -> '\\':'-':go cs + _ -> '-':go cs + '.' | isEnabled Ext_smart opts -> + case cs of + '.':'.':rest -> '\\':'.':'.':'.':go rest + _ -> '.':go cs + _ -> case cs of + '_':x:xs + | isEnabled Ext_intraword_underscores opts + , isAlphaNum c + , isAlphaNum x -> c : '_' : x : go xs + _ -> c : go cs + +attrsToMarkdown :: Attr -> Doc Text +attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys] + where attribId = case attribs of + ("",_,_) -> empty + (i,_,_) -> "#" <> escAttr i + attribClasses = case attribs of + (_,[],_) -> empty + (_,cs,_) -> hsep $ + map (escAttr . ("."<>)) + cs + attribKeys = case attribs of + (_,_,[]) -> empty + (_,_,ks) -> hsep $ + map (\(k,v) -> escAttr k + <> "=\"" <> + escAttr v <> "\"") ks + escAttr = mconcat . map escAttrChar . T.unpack + escAttrChar '"' = literal "\\\"" + escAttrChar '\\' = literal "\\\\" + escAttrChar c = literal $ T.singleton c + +linkAttributes :: WriterOptions -> Attr -> Doc Text +linkAttributes opts attr = + if isEnabled Ext_link_attributes opts && attr /= nullAttr + then attrsToMarkdown attr + else empty + +getKey :: Doc Text -> Key +getKey = toKey . render Nothing + +findUsableIndex :: [Text] -> Int -> Int +findUsableIndex lbls i = if tshow i `elem` lbls + then findUsableIndex lbls (i + 1) + else i + +getNextIndex :: PandocMonad m => MD m Int +getNextIndex = do + prevRefs <- gets stPrevRefs + refs <- gets stRefs + i <- (+ 1) <$> gets stLastIdx + modify $ \s -> s{ stLastIdx = i } + let refLbls = map (\(r,_,_) -> r) $ prevRefs ++ refs + return $ findUsableIndex refLbls i + +-- | Get reference for target; if none exists, create unique one and return. +-- Prefer label if possible; otherwise, generate a unique key. +getReference :: PandocMonad m => Attr -> Doc Text -> Target -> MD m Text +getReference attr label target = do + refs <- gets stRefs + case find (\(_,t,a) -> t == target && a == attr) refs of + Just (ref, _, _) -> return ref + Nothing -> do + keys <- gets stKeys + let key = getKey label + let rawkey = coerce key + case M.lookup key keys of + Nothing -> do -- no other refs with this label + (lab', idx) <- if T.null rawkey || + T.length rawkey > 999 || + T.any (\c -> c == '[' || c == ']') rawkey + then do + i <- getNextIndex + return (tshow i, i) + else + return (render Nothing label, 0) + modify (\s -> s{ + stRefs = (lab', target, attr) : refs, + stKeys = M.insert (getKey label) + (M.insert (target, attr) idx mempty) + (stKeys s) }) + return lab' + + Just km -> -- we have refs with this label + case M.lookup (target, attr) km of + Just i -> do + let lab' = render Nothing $ + label <> if i == 0 + then mempty + else literal (tshow i) + -- make sure it's in stRefs; it may be + -- a duplicate that was printed in a previous + -- block: + when ((lab', target, attr) `notElem` refs) $ + modify (\s -> s{ + stRefs = (lab', target, attr) : refs }) + return lab' + Nothing -> do -- but this one is to a new target + i <- getNextIndex + let lab' = tshow i + modify (\s -> s{ + stRefs = (lab', target, attr) : refs, + stKeys = M.insert key + (M.insert (target, attr) i km) + (stKeys s) }) + return lab' + +-- | Convert list of Pandoc inline elements to markdown. +inlineListToMarkdown :: PandocMonad m => WriterOptions -> [Inline] -> MD m (Doc Text) +inlineListToMarkdown opts lst = do + inlist <- asks envInList + go (if inlist then avoidBadWrapsInList lst else lst) + where go [] = return empty + go (x@Math{}:y@(Str t):zs) + | T.all isDigit (T.take 1 t) -- starts with digit -- see #7058 + = liftM2 (<>) (inlineToMarkdown opts x) + (go (RawInline (Format "html") "<!-- -->" : y : zs)) + go (i:is) = case i of + Link {} -> case is of + -- If a link is followed by another link, or '[', '(' or ':' + -- then we don't shortcut + Link {}:_ -> unshortcutable + Space:Link {}:_ -> unshortcutable + Space:(Str(thead -> Just '[')):_ -> unshortcutable + Space:(RawInline _ (thead -> Just '[')):_ -> unshortcutable + Space:(Cite _ _):_ -> unshortcutable + SoftBreak:Link {}:_ -> unshortcutable + SoftBreak:(Str(thead -> Just '[')):_ -> unshortcutable + SoftBreak:(RawInline _ (thead -> Just '[')):_ -> unshortcutable + SoftBreak:(Cite _ _):_ -> unshortcutable + LineBreak:Link {}:_ -> unshortcutable + LineBreak:(Str(thead -> Just '[')):_ -> unshortcutable + LineBreak:(RawInline _ (thead -> Just '[')):_ -> unshortcutable + LineBreak:(Cite _ _):_ -> unshortcutable + (Cite _ _):_ -> unshortcutable + Str (thead -> Just '['):_ -> unshortcutable + Str (thead -> Just '('):_ -> unshortcutable + Str (thead -> Just ':'):_ -> unshortcutable + (RawInline _ (thead -> Just '[')):_ -> unshortcutable + (RawInline _ (thead -> Just '(')):_ -> unshortcutable + (RawInline _ (thead -> Just ':')):_ -> unshortcutable + (RawInline _ (T.stripPrefix " [" -> Just _ )):_ -> unshortcutable + _ -> shortcutable + _ -> shortcutable + where shortcutable = liftM2 (<>) (inlineToMarkdown opts i) (go is) + unshortcutable = do + iMark <- local + (\env -> env { envRefShortcutable = False }) + (inlineToMarkdown opts i) + fmap (iMark <>) (go is) + thead = fmap fst . T.uncons + +isSp :: Inline -> Bool +isSp Space = True +isSp SoftBreak = True +isSp _ = False + +avoidBadWrapsInList :: [Inline] -> [Inline] +avoidBadWrapsInList [] = [] +avoidBadWrapsInList (s:Str (T.uncons -> Just ('>',cs)):xs) | isSp s = + Str (" >" <> cs) : avoidBadWrapsInList xs +avoidBadWrapsInList [s, Str (T.uncons -> Just (c, cs))] + | T.null cs && isSp s && c `elem` ['-','*','+'] = [Str $ T.pack [' ', c]] +avoidBadWrapsInList (s:Str (T.uncons -> Just (c, cs)):Space:xs) + | T.null cs && isSp s && c `elem` ['-','*','+'] = + Str (T.pack [' ', c]) : Space : avoidBadWrapsInList xs +avoidBadWrapsInList (s:Str cs:Space:xs) + | isSp s && isOrderedListMarker cs = + Str (" " <> cs) : Space : avoidBadWrapsInList xs +avoidBadWrapsInList [s, Str cs] + | isSp s && isOrderedListMarker cs = [Str $ " " <> cs] +avoidBadWrapsInList (x:xs) = x : avoidBadWrapsInList xs + +isOrderedListMarker :: Text -> Bool +isOrderedListMarker xs = not (T.null xs) && (T.last xs `elem` ['.',')']) && + isRight (runParser (anyOrderedListMarker >> eof) + defaultParserState "" xs) + where + isRight (Right _) = True + isRight (Left _) = False + +-- | Convert Pandoc inline element to markdown. +inlineToMarkdown :: PandocMonad m => WriterOptions -> Inline -> MD m (Doc Text) +inlineToMarkdown opts (Span ("",["emoji"],kvs) [Str s]) = + case lookup "data-emoji" kvs of + Just emojiname | isEnabled Ext_emoji opts -> + return $ ":" <> literal emojiname <> ":" + _ -> inlineToMarkdown opts (Str s) +inlineToMarkdown opts (Span attrs ils) = do + variant <- asks envVariant + contents <- inlineListToMarkdown opts ils + return $ case attrs of + (_,["csl-block"],_) -> (cr <>) + (_,["csl-left-margin"],_) -> (cr <>) + (_,["csl-indent"],_) -> (cr <>) + _ -> id + $ case variant of + PlainText -> contents + _ | attrs == nullAttr -> contents + | isEnabled Ext_bracketed_spans opts -> + let attrs' = if attrs /= nullAttr + then attrsToMarkdown attrs + else empty + in "[" <> contents <> "]" <> attrs' + | isEnabled Ext_raw_html opts || + isEnabled Ext_native_spans opts -> + tagWithAttrs "span" attrs <> contents <> literal "</span>" + | otherwise -> contents +inlineToMarkdown _ (Emph []) = return empty +inlineToMarkdown opts (Emph lst) = do + variant <- asks envVariant + contents <- inlineListToMarkdown opts lst + return $ case variant of + PlainText + | isEnabled Ext_gutenberg opts -> "_" <> contents <> "_" + | otherwise -> contents + _ -> "*" <> contents <> "*" +inlineToMarkdown _ (Underline []) = return empty +inlineToMarkdown opts (Underline lst) = do + variant <- asks envVariant + contents <- inlineListToMarkdown opts lst + case variant of + PlainText -> return contents + _ | isEnabled Ext_bracketed_spans opts -> + return $ "[" <> contents <> "]" <> "{.ul}" + | isEnabled Ext_native_spans opts -> + return $ tagWithAttrs "span" ("", ["underline"], []) + <> contents + <> literal "</span>" + | isEnabled Ext_raw_html opts -> + return $ "<u>" <> contents <> "</u>" + | otherwise -> inlineToMarkdown opts (Emph lst) +inlineToMarkdown _ (Strong []) = return empty +inlineToMarkdown opts (Strong lst) = do + variant <- asks envVariant + case variant of + PlainText -> + inlineListToMarkdown opts $ + if isEnabled Ext_gutenberg opts + then capitalize lst + else lst + _ -> do + contents <- inlineListToMarkdown opts lst + return $ "**" <> contents <> "**" +inlineToMarkdown _ (Strikeout []) = return empty +inlineToMarkdown opts (Strikeout lst) = do + contents <- inlineListToMarkdown opts lst + return $ if isEnabled Ext_strikeout opts + then "~~" <> contents <> "~~" + else if isEnabled Ext_raw_html opts + then "<s>" <> contents <> "</s>" + else contents +inlineToMarkdown _ (Superscript []) = return empty +inlineToMarkdown opts (Superscript lst) = + local (\env -> env {envEscapeSpaces = envVariant env == Markdown}) $ do + contents <- inlineListToMarkdown opts lst + if isEnabled Ext_superscript opts + then return $ "^" <> contents <> "^" + else if isEnabled Ext_raw_html opts + then return $ "<sup>" <> contents <> "</sup>" + else + case traverse toSuperscriptInline lst of + Just xs' | not (writerPreferAscii opts) + -> inlineListToMarkdown opts xs' + _ -> do + let rendered = render Nothing contents + return $ + case mapM toSuperscript (T.unpack rendered) of + Just r -> literal $ T.pack r + Nothing -> literal $ "^(" <> rendered <> ")" +inlineToMarkdown _ (Subscript []) = return empty +inlineToMarkdown opts (Subscript lst) = + local (\env -> env {envEscapeSpaces = envVariant env == Markdown}) $ do + contents <- inlineListToMarkdown opts lst + if isEnabled Ext_subscript opts + then return $ "~" <> contents <> "~" + else if isEnabled Ext_raw_html opts + then return $ "<sub>" <> contents <> "</sub>" + else + case traverse toSubscriptInline lst of + Just xs' | not (writerPreferAscii opts) + -> inlineListToMarkdown opts xs' + _ -> do + let rendered = render Nothing contents + return $ + case mapM toSuperscript (T.unpack rendered) of + Just r -> literal $ T.pack r + Nothing -> literal $ "_(" <> rendered <> ")" +inlineToMarkdown opts (SmallCaps lst) = do + variant <- asks envVariant + if variant /= PlainText && + (isEnabled Ext_raw_html opts || isEnabled Ext_native_spans opts) + then inlineToMarkdown opts (Span ("",["smallcaps"],[]) lst) + else inlineListToMarkdown opts $ capitalize lst +inlineToMarkdown opts (Quoted SingleQuote lst) = do + contents <- inlineListToMarkdown opts lst + return $ if isEnabled Ext_smart opts + then "'" <> contents <> "'" + else + if writerPreferAscii opts + then "‘" <> contents <> "’" + else "‘" <> contents <> "’" +inlineToMarkdown opts (Quoted DoubleQuote lst) = do + contents <- inlineListToMarkdown opts lst + return $ if isEnabled Ext_smart opts + then "\"" <> contents <> "\"" + else + if writerPreferAscii opts + then "“" <> contents <> "”" + else "“" <> contents <> "”" +inlineToMarkdown opts (Code attr str) = do + let tickGroups = filter (T.any (== '`')) $ T.group str + let longest = if null tickGroups + then 0 + else maximum $ map T.length tickGroups + let marker = T.replicate (longest + 1) "`" + let spacer = if longest == 0 then "" else " " + let attrs = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr + then attrsToMarkdown attr + else empty + variant <- asks envVariant + case variant of + PlainText -> return $ literal str + _ -> return $ literal + (marker <> spacer <> str <> spacer <> marker) <> attrs +inlineToMarkdown opts (Str str) = do + variant <- asks envVariant + let str' = (if writerPreferAscii opts + then toHtml5Entities + else id) . + (if isEnabled Ext_smart opts + then unsmartify opts + else id) . + (if variant == PlainText + then id + else escapeText opts) $ str + return $ literal str' +inlineToMarkdown opts (Math InlineMath str) = + case writerHTMLMathMethod opts of + WebTeX url -> inlineToMarkdown opts + (Image nullAttr [Str str] (url <> T.pack (urlEncode $ T.unpack str), str)) + _ | isEnabled Ext_tex_math_dollars opts -> + return $ "$" <> literal str <> "$" + | isEnabled Ext_tex_math_single_backslash opts -> + return $ "\\(" <> literal str <> "\\)" + | isEnabled Ext_tex_math_double_backslash opts -> + return $ "\\\\(" <> literal str <> "\\\\)" + | otherwise -> do + variant <- asks envVariant + texMathToInlines InlineMath str >>= + inlineListToMarkdown opts . + (if variant == PlainText then makeMathPlainer else id) +inlineToMarkdown opts (Math DisplayMath str) = + case writerHTMLMathMethod opts of + WebTeX url -> (\x -> blankline <> x <> blankline) `fmap` + inlineToMarkdown opts (Image nullAttr [Str str] + (url <> T.pack (urlEncode $ T.unpack str), str)) + _ | isEnabled Ext_tex_math_dollars opts -> + return $ "$$" <> literal str <> "$$" + | isEnabled Ext_tex_math_single_backslash opts -> + return $ "\\[" <> literal str <> "\\]" + | isEnabled Ext_tex_math_double_backslash opts -> + return $ "\\\\[" <> literal str <> "\\\\]" + | otherwise -> (\x -> cr <> x <> cr) `fmap` + (texMathToInlines DisplayMath str >>= inlineListToMarkdown opts) +inlineToMarkdown opts il@(RawInline f str) = do + let tickGroups = filter (T.any (== '`')) $ T.group str + let numticks = if null tickGroups + then 1 + else 1 + maximum (map T.length tickGroups) + variant <- asks envVariant + let Format fmt = f + let rawAttribInline = return $ + literal (T.replicate numticks "`") <> literal str <> + literal (T.replicate numticks "`") <> literal "{=" <> literal fmt <> literal "}" + let renderEmpty = mempty <$ report (InlineNotRendered il) + case variant of + PlainText -> renderEmpty + Commonmark + | f `elem` ["gfm", "commonmark", "commonmark_x", "markdown"] + -> return $ literal str + Markdown + | f `elem` ["markdown", "markdown_github", "markdown_phpextra", + "markdown_mmd", "markdown_strict"] + -> return $ literal str + _ | isEnabled Ext_raw_attribute opts -> rawAttribInline + | f `elem` ["html", "html5", "html4"] + , isEnabled Ext_raw_html opts + -> return $ literal str + | f `elem` ["latex", "tex"] + , isEnabled Ext_raw_tex opts + -> return $ literal str + _ -> renderEmpty + + +inlineToMarkdown opts LineBreak = do + variant <- asks envVariant + if variant == PlainText || isEnabled Ext_hard_line_breaks opts + then return cr + else return $ + if isEnabled Ext_escaped_line_breaks opts + then "\\" <> cr + else " " <> cr +inlineToMarkdown _ Space = do + escapeSpaces <- asks envEscapeSpaces + return $ if escapeSpaces then "\\ " else space +inlineToMarkdown opts SoftBreak = do + escapeSpaces <- asks envEscapeSpaces + let space' = if escapeSpaces then "\\ " else space + return $ case writerWrapText opts of + WrapNone -> space' + WrapAuto -> space' + WrapPreserve -> cr +inlineToMarkdown opts (Cite [] lst) = inlineListToMarkdown opts lst +inlineToMarkdown opts (Cite (c:cs) lst) + | not (isEnabled Ext_citations opts) = inlineListToMarkdown opts lst + | otherwise = + if citationMode c == AuthorInText + then do + suffs <- inlineListToMarkdown opts $ citationSuffix c + rest <- mapM convertOne cs + let inbr = suffs <+> joincits rest + br = if isEmpty inbr then empty else char '[' <> inbr <> char ']' + return $ literal ("@" <> citationId c) <+> br + else do + cits <- mapM convertOne (c:cs) + return $ literal "[" <> joincits cits <> literal "]" + where + joincits = hcat . intersperse (literal "; ") . filter (not . isEmpty) + convertOne Citation { citationId = k + , citationPrefix = pinlines + , citationSuffix = sinlines + , citationMode = m } + = do + pdoc <- inlineListToMarkdown opts pinlines + sdoc <- inlineListToMarkdown opts sinlines + let k' = literal (modekey m <> "@" <> k) + r = case sinlines of + Str (T.uncons -> Just (y,_)):_ | y `elem` (",;]@" :: String) -> k' <> sdoc + _ -> k' <+> sdoc + return $ pdoc <+> r + modekey SuppressAuthor = "-" + modekey _ = "" +inlineToMarkdown opts lnk@(Link attr txt (src, tit)) = do + variant <- asks envVariant + linktext <- inlineListToMarkdown opts txt + let linktitle = if T.null tit + then empty + else literal $ " \"" <> tit <> "\"" + let srcSuffix = fromMaybe src (T.stripPrefix "mailto:" src) + let useAuto = isURI src && + case txt of + [Str s] | escapeURI s == srcSuffix -> True + _ -> False + let useRefLinks = writerReferenceLinks opts && not useAuto + shortcutable <- asks envRefShortcutable + let useShortcutRefLinks = shortcutable && + isEnabled Ext_shortcut_reference_links opts + reftext <- if useRefLinks + then literal <$> getReference attr linktext (src, tit) + else return mempty + case variant of + PlainText + | useAuto -> return $ literal srcSuffix + | otherwise -> return linktext + _ | useAuto -> return $ "<" <> literal srcSuffix <> ">" + | useRefLinks -> + let first = "[" <> linktext <> "]" + second = if getKey linktext == getKey reftext + then if useShortcutRefLinks + then "" + else "[]" + else "[" <> reftext <> "]" + in return $ first <> second + | isEnabled Ext_raw_html opts + , not (isEnabled Ext_link_attributes opts) + , attr /= nullAttr -> -- use raw HTML to render attributes + literal . T.strip <$> + writeHtml5String opts{ writerTemplate = Nothing } + (Pandoc nullMeta [Plain [lnk]]) + | otherwise -> return $ + "[" <> linktext <> "](" <> literal src <> linktitle <> ")" <> + linkAttributes opts attr +inlineToMarkdown opts img@(Image attr alternate (source, tit)) + | isEnabled Ext_raw_html opts && + not (isEnabled Ext_link_attributes opts) && + attr /= nullAttr = -- use raw HTML + literal . T.strip <$> + writeHtml5String opts{ writerTemplate = Nothing } (Pandoc nullMeta [Plain [img]]) + | otherwise = do + variant <- asks envVariant + let txt = if null alternate || alternate == [Str source] + -- to prevent autolinks + then [Str ""] + else alternate + linkPart <- inlineToMarkdown opts (Link attr txt (source, tit)) + return $ case variant of + PlainText -> "[" <> linkPart <> "]" + _ -> "!" <> linkPart +inlineToMarkdown opts (Note contents) = do + modify (\st -> st{ stNotes = contents : stNotes st }) + st <- get + let ref = literal $ writerIdentifierPrefix opts <> tshow (stNoteNum st + length (stNotes st) - 1) + if isEnabled Ext_footnotes opts + then return $ "[^" <> ref <> "]" + else return $ "[" <> ref <> "]" + +makeMathPlainer :: [Inline] -> [Inline] +makeMathPlainer = walk go + where + go (Emph xs) = Span nullAttr xs + go x = x + +toSubscriptInline :: Inline -> Maybe Inline +toSubscriptInline Space = Just Space +toSubscriptInline (Span attr ils) = Span attr <$> traverse toSubscriptInline ils +toSubscriptInline (Str s) = Str . T.pack <$> traverse toSubscript (T.unpack s) +toSubscriptInline LineBreak = Just LineBreak +toSubscriptInline SoftBreak = Just SoftBreak +toSubscriptInline _ = Nothing + +toSuperscriptInline :: Inline -> Maybe Inline +toSuperscriptInline Space = Just Space +toSuperscriptInline (Span attr ils) = Span attr <$> traverse toSuperscriptInline ils +toSuperscriptInline (Str s) = Str . T.pack <$> traverse toSuperscript (T.unpack s) +toSuperscriptInline LineBreak = Just LineBreak +toSuperscriptInline SoftBreak = Just SoftBreak +toSuperscriptInline _ = Nothing diff --git a/src/Text/Pandoc/Writers/Markdown/Types.hs b/src/Text/Pandoc/Writers/Markdown/Types.hs new file mode 100644 index 000000000..a1d0d14e4 --- /dev/null +++ b/src/Text/Pandoc/Writers/Markdown/Types.hs @@ -0,0 +1,81 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Writers.Markdown.Types + Copyright : Copyright (C) 2006-2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable +-} +module Text.Pandoc.Writers.Markdown.Types ( + MarkdownVariant(..), + WriterState(..), + WriterEnv(..), + Notes, + Ref, + Refs, + MD, + evalMD + ) where +import Control.Monad.Reader +import Control.Monad.State.Strict +import Data.Default +import qualified Data.Map as M +import qualified Data.Set as Set +import Data.Text (Text) +import Text.Pandoc.Parsing (Key) +import Text.Pandoc.Class.PandocMonad (PandocMonad) +import Text.Pandoc.Definition + +type Notes = [[Block]] +type Ref = (Text, Target, Attr) +type Refs = [Ref] + +type MD m = ReaderT WriterEnv (StateT WriterState m) + +evalMD :: PandocMonad m => MD m a -> WriterEnv -> WriterState -> m a +evalMD md env st = evalStateT (runReaderT md env) st + +data WriterEnv = WriterEnv { envInList :: Bool + , envVariant :: MarkdownVariant + , envRefShortcutable :: Bool + , envBlockLevel :: Int + , envEscapeSpaces :: Bool + } + +data MarkdownVariant = + PlainText + | Commonmark + | Markdown + deriving (Show, Eq) + +instance Default WriterEnv + where def = WriterEnv { envInList = False + , envVariant = Markdown + , envRefShortcutable = True + , envBlockLevel = 0 + , envEscapeSpaces = False + } + +data WriterState = WriterState { stNotes :: Notes + , stPrevRefs :: Refs + , stRefs :: Refs + , stKeys :: M.Map Key + (M.Map (Target, Attr) Int) + , stLastIdx :: Int + , stIds :: Set.Set Text + , stNoteNum :: Int + } + +instance Default WriterState + where def = WriterState{ stNotes = [] + , stPrevRefs = [] + , stRefs = [] + , stKeys = M.empty + , stLastIdx = 0 + , stIds = Set.empty + , stNoteNum = 1 + } + + -- cgit v1.2.3 From 827ecdd2de935d377399a68aaa52b3fb6ab5b607 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 2 Mar 2021 21:33:58 -0800 Subject: Split out T.P.Writers.LaTeX.Lang. --- pandoc.cabal | 1 + src/Text/Pandoc/Writers/LaTeX.hs | 192 +++------------------------------- src/Text/Pandoc/Writers/LaTeX/Lang.hs | 191 +++++++++++++++++++++++++++++++++ 3 files changed, 204 insertions(+), 180 deletions(-) create mode 100644 src/Text/Pandoc/Writers/LaTeX/Lang.hs (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index 3aa29b477..416641a96 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -664,6 +664,7 @@ library Text.Pandoc.Writers.LaTeX.Caption, Text.Pandoc.Writers.LaTeX.Notes, Text.Pandoc.Writers.LaTeX.Table, + Text.Pandoc.Writers.LaTeX.Lang, Text.Pandoc.Writers.LaTeX.Types, Text.Pandoc.Writers.Markdown.Types, Text.Pandoc.Writers.Markdown.Inline, diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index e31ec9d52..6b1d44b23 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -45,6 +45,8 @@ import Text.Pandoc.Walk (query, walk, walkM) import Text.Pandoc.Writers.LaTeX.Caption (getCaption) import Text.Pandoc.Writers.LaTeX.Table (tableToLaTeX) import Text.Pandoc.Writers.LaTeX.Types (LW, WriterState (..), startingState) +import Text.Pandoc.Writers.LaTeX.Lang (toPolyglossiaEnv, toPolyglossia, + toBabel) import Text.Pandoc.Writers.Shared import Text.Printf (printf) import qualified Data.Text.Normalize as Normalize @@ -926,6 +928,16 @@ labelFor ident = do ref <- literal `fmap` toLabel ident return $ text "\\label" <> braces ref +-- Determine listings language from list of class attributes. +getListingsLanguage :: [Text] -> Maybe Text +getListingsLanguage xs + = foldr ((<|>) . toListingsLanguage) Nothing xs + +mbBraced :: Text -> Text +mbBraced x = if not (T.all isAlphaNum x) + then "{" <> x <> "}" + else x + -- | Convert list of inline elements to LaTeX. inlineListToLaTeX :: PandocMonad m => [Inline] -- ^ Inlines to convert @@ -1363,16 +1375,6 @@ citationsToBiblatex (c:cs) citationsToBiblatex _ = return empty --- Determine listings language from list of class attributes. -getListingsLanguage :: [Text] -> Maybe Text -getListingsLanguage xs - = foldr ((<|>) . toListingsLanguage) Nothing xs - -mbBraced :: Text -> Text -mbBraced x = if not (T.all isAlphaNum x) - then "{" <> x <> "}" - else x - -- Extract a key from divs and spans extract :: Text -> Block -> [Text] extract key (Div attr _) = lookKey key attr @@ -1390,174 +1392,4 @@ extractInline _ _ = [] lookKey :: Text -> Attr -> [Text] lookKey key (_,_,kvs) = maybe [] T.words $ lookup key kvs --- In environments \Arabic instead of \arabic is used -toPolyglossiaEnv :: Lang -> (Text, Text) -toPolyglossiaEnv l = - case toPolyglossia l of - ("arabic", o) -> ("Arabic", o) - x -> x - --- Takes a list of the constituents of a BCP 47 language code and --- converts it to a Polyglossia (language, options) tuple --- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf -toPolyglossia :: Lang -> (Text, Text) -toPolyglossia (Lang "ar" _ "DZ" _) = ("arabic", "locale=algeria") -toPolyglossia (Lang "ar" _ "IQ" _) = ("arabic", "locale=mashriq") -toPolyglossia (Lang "ar" _ "JO" _) = ("arabic", "locale=mashriq") -toPolyglossia (Lang "ar" _ "LB" _) = ("arabic", "locale=mashriq") -toPolyglossia (Lang "ar" _ "LY" _) = ("arabic", "locale=libya") -toPolyglossia (Lang "ar" _ "MA" _) = ("arabic", "locale=morocco") -toPolyglossia (Lang "ar" _ "MR" _) = ("arabic", "locale=mauritania") -toPolyglossia (Lang "ar" _ "PS" _) = ("arabic", "locale=mashriq") -toPolyglossia (Lang "ar" _ "SY" _) = ("arabic", "locale=mashriq") -toPolyglossia (Lang "ar" _ "TN" _) = ("arabic", "locale=tunisia") -toPolyglossia (Lang "de" _ _ vars) - | "1901" `elem` vars = ("german", "spelling=old") -toPolyglossia (Lang "de" _ "AT" vars) - | "1901" `elem` vars = ("german", "variant=austrian, spelling=old") -toPolyglossia (Lang "de" _ "AT" _) = ("german", "variant=austrian") -toPolyglossia (Lang "de" _ "CH" vars) - | "1901" `elem` vars = ("german", "variant=swiss, spelling=old") -toPolyglossia (Lang "de" _ "CH" _) = ("german", "variant=swiss") -toPolyglossia (Lang "de" _ _ _) = ("german", "") -toPolyglossia (Lang "dsb" _ _ _) = ("lsorbian", "") -toPolyglossia (Lang "el" _ "polyton" _) = ("greek", "variant=poly") -toPolyglossia (Lang "en" _ "AU" _) = ("english", "variant=australian") -toPolyglossia (Lang "en" _ "CA" _) = ("english", "variant=canadian") -toPolyglossia (Lang "en" _ "GB" _) = ("english", "variant=british") -toPolyglossia (Lang "en" _ "NZ" _) = ("english", "variant=newzealand") -toPolyglossia (Lang "en" _ "UK" _) = ("english", "variant=british") -toPolyglossia (Lang "en" _ "US" _) = ("english", "variant=american") -toPolyglossia (Lang "grc" _ _ _) = ("greek", "variant=ancient") -toPolyglossia (Lang "hsb" _ _ _) = ("usorbian", "") -toPolyglossia (Lang "la" _ _ vars) - | "x-classic" `elem` vars = ("latin", "variant=classic") -toPolyglossia (Lang "pt" _ "BR" _) = ("portuguese", "variant=brazilian") -toPolyglossia (Lang "sl" _ _ _) = ("slovenian", "") -toPolyglossia x = (commonFromBcp47 x, "") --- Takes a list of the constituents of a BCP 47 language code and --- converts it to a Babel language string. --- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf --- List of supported languages (slightly outdated): --- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf -toBabel :: Lang -> Text -toBabel (Lang "de" _ "AT" vars) - | "1901" `elem` vars = "austrian" - | otherwise = "naustrian" -toBabel (Lang "de" _ "CH" vars) - | "1901" `elem` vars = "swissgerman" - | otherwise = "nswissgerman" -toBabel (Lang "de" _ _ vars) - | "1901" `elem` vars = "german" - | otherwise = "ngerman" -toBabel (Lang "dsb" _ _ _) = "lowersorbian" -toBabel (Lang "el" _ _ vars) - | "polyton" `elem` vars = "polutonikogreek" -toBabel (Lang "en" _ "AU" _) = "australian" -toBabel (Lang "en" _ "CA" _) = "canadian" -toBabel (Lang "en" _ "GB" _) = "british" -toBabel (Lang "en" _ "NZ" _) = "newzealand" -toBabel (Lang "en" _ "UK" _) = "british" -toBabel (Lang "en" _ "US" _) = "american" -toBabel (Lang "fr" _ "CA" _) = "canadien" -toBabel (Lang "fra" _ _ vars) - | "aca" `elem` vars = "acadian" -toBabel (Lang "grc" _ _ _) = "polutonikogreek" -toBabel (Lang "hsb" _ _ _) = "uppersorbian" -toBabel (Lang "la" _ _ vars) - | "x-classic" `elem` vars = "classiclatin" -toBabel (Lang "pt" _ "BR" _) = "brazilian" -toBabel (Lang "sl" _ _ _) = "slovene" -toBabel x = commonFromBcp47 x - --- Takes a list of the constituents of a BCP 47 language code --- and converts it to a string shared by Babel and Polyglossia. --- https://tools.ietf.org/html/bcp47#section-2.1 -commonFromBcp47 :: Lang -> Text -commonFromBcp47 (Lang "sr" "Cyrl" _ _) = "serbianc" -commonFromBcp47 (Lang "zh" "Latn" _ vars) - | "pinyin" `elem` vars = "pinyin" -commonFromBcp47 (Lang l _ _ _) = fromIso l - where - fromIso "af" = "afrikaans" - fromIso "am" = "amharic" - fromIso "ar" = "arabic" - fromIso "as" = "assamese" - fromIso "ast" = "asturian" - fromIso "bg" = "bulgarian" - fromIso "bn" = "bengali" - fromIso "bo" = "tibetan" - fromIso "br" = "breton" - fromIso "ca" = "catalan" - fromIso "cy" = "welsh" - fromIso "cs" = "czech" - fromIso "cop" = "coptic" - fromIso "da" = "danish" - fromIso "dv" = "divehi" - fromIso "el" = "greek" - fromIso "en" = "english" - fromIso "eo" = "esperanto" - fromIso "es" = "spanish" - fromIso "et" = "estonian" - fromIso "eu" = "basque" - fromIso "fa" = "farsi" - fromIso "fi" = "finnish" - fromIso "fr" = "french" - fromIso "fur" = "friulan" - fromIso "ga" = "irish" - fromIso "gd" = "scottish" - fromIso "gez" = "ethiopic" - fromIso "gl" = "galician" - fromIso "he" = "hebrew" - fromIso "hi" = "hindi" - fromIso "hr" = "croatian" - fromIso "hu" = "magyar" - fromIso "hy" = "armenian" - fromIso "ia" = "interlingua" - fromIso "id" = "indonesian" - fromIso "ie" = "interlingua" - fromIso "is" = "icelandic" - fromIso "it" = "italian" - fromIso "ja" = "japanese" - fromIso "km" = "khmer" - fromIso "kmr" = "kurmanji" - fromIso "kn" = "kannada" - fromIso "ko" = "korean" - fromIso "la" = "latin" - fromIso "lo" = "lao" - fromIso "lt" = "lithuanian" - fromIso "lv" = "latvian" - fromIso "ml" = "malayalam" - fromIso "mn" = "mongolian" - fromIso "mr" = "marathi" - fromIso "nb" = "norsk" - fromIso "nl" = "dutch" - fromIso "nn" = "nynorsk" - fromIso "no" = "norsk" - fromIso "nqo" = "nko" - fromIso "oc" = "occitan" - fromIso "pa" = "panjabi" - fromIso "pl" = "polish" - fromIso "pms" = "piedmontese" - fromIso "pt" = "portuguese" - fromIso "rm" = "romansh" - fromIso "ro" = "romanian" - fromIso "ru" = "russian" - fromIso "sa" = "sanskrit" - fromIso "se" = "samin" - fromIso "sk" = "slovak" - fromIso "sq" = "albanian" - fromIso "sr" = "serbian" - fromIso "sv" = "swedish" - fromIso "syr" = "syriac" - fromIso "ta" = "tamil" - fromIso "te" = "telugu" - fromIso "th" = "thai" - fromIso "ti" = "ethiopic" - fromIso "tk" = "turkmen" - fromIso "tr" = "turkish" - fromIso "uk" = "ukrainian" - fromIso "ur" = "urdu" - fromIso "vi" = "vietnamese" - fromIso _ = "" diff --git a/src/Text/Pandoc/Writers/LaTeX/Lang.hs b/src/Text/Pandoc/Writers/LaTeX/Lang.hs new file mode 100644 index 000000000..41aafee48 --- /dev/null +++ b/src/Text/Pandoc/Writers/LaTeX/Lang.hs @@ -0,0 +1,191 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{- | + Module : Text.Pandoc.Writers.LaTeX.Lang + Copyright : Copyright (C) 2006-2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable +-} +module Text.Pandoc.Writers.LaTeX.Lang + ( toPolyglossiaEnv, + toPolyglossia, + toBabel + ) where +import Data.Text (Text) +import Text.Pandoc.BCP47 (Lang (..)) + + +-- In environments \Arabic instead of \arabic is used +toPolyglossiaEnv :: Lang -> (Text, Text) +toPolyglossiaEnv l = + case toPolyglossia l of + ("arabic", o) -> ("Arabic", o) + x -> x + +-- Takes a list of the constituents of a BCP 47 language code and +-- converts it to a Polyglossia (language, options) tuple +-- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf +toPolyglossia :: Lang -> (Text, Text) +toPolyglossia (Lang "ar" _ "DZ" _) = ("arabic", "locale=algeria") +toPolyglossia (Lang "ar" _ "IQ" _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ "JO" _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ "LB" _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ "LY" _) = ("arabic", "locale=libya") +toPolyglossia (Lang "ar" _ "MA" _) = ("arabic", "locale=morocco") +toPolyglossia (Lang "ar" _ "MR" _) = ("arabic", "locale=mauritania") +toPolyglossia (Lang "ar" _ "PS" _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ "SY" _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ "TN" _) = ("arabic", "locale=tunisia") +toPolyglossia (Lang "de" _ _ vars) + | "1901" `elem` vars = ("german", "spelling=old") +toPolyglossia (Lang "de" _ "AT" vars) + | "1901" `elem` vars = ("german", "variant=austrian, spelling=old") +toPolyglossia (Lang "de" _ "AT" _) = ("german", "variant=austrian") +toPolyglossia (Lang "de" _ "CH" vars) + | "1901" `elem` vars = ("german", "variant=swiss, spelling=old") +toPolyglossia (Lang "de" _ "CH" _) = ("german", "variant=swiss") +toPolyglossia (Lang "de" _ _ _) = ("german", "") +toPolyglossia (Lang "dsb" _ _ _) = ("lsorbian", "") +toPolyglossia (Lang "el" _ "polyton" _) = ("greek", "variant=poly") +toPolyglossia (Lang "en" _ "AU" _) = ("english", "variant=australian") +toPolyglossia (Lang "en" _ "CA" _) = ("english", "variant=canadian") +toPolyglossia (Lang "en" _ "GB" _) = ("english", "variant=british") +toPolyglossia (Lang "en" _ "NZ" _) = ("english", "variant=newzealand") +toPolyglossia (Lang "en" _ "UK" _) = ("english", "variant=british") +toPolyglossia (Lang "en" _ "US" _) = ("english", "variant=american") +toPolyglossia (Lang "grc" _ _ _) = ("greek", "variant=ancient") +toPolyglossia (Lang "hsb" _ _ _) = ("usorbian", "") +toPolyglossia (Lang "la" _ _ vars) + | "x-classic" `elem` vars = ("latin", "variant=classic") +toPolyglossia (Lang "pt" _ "BR" _) = ("portuguese", "variant=brazilian") +toPolyglossia (Lang "sl" _ _ _) = ("slovenian", "") +toPolyglossia x = (commonFromBcp47 x, "") + +-- Takes a list of the constituents of a BCP 47 language code and +-- converts it to a Babel language string. +-- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf +-- List of supported languages (slightly outdated): +-- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf +toBabel :: Lang -> Text +toBabel (Lang "de" _ "AT" vars) + | "1901" `elem` vars = "austrian" + | otherwise = "naustrian" +toBabel (Lang "de" _ "CH" vars) + | "1901" `elem` vars = "swissgerman" + | otherwise = "nswissgerman" +toBabel (Lang "de" _ _ vars) + | "1901" `elem` vars = "german" + | otherwise = "ngerman" +toBabel (Lang "dsb" _ _ _) = "lowersorbian" +toBabel (Lang "el" _ _ vars) + | "polyton" `elem` vars = "polutonikogreek" +toBabel (Lang "en" _ "AU" _) = "australian" +toBabel (Lang "en" _ "CA" _) = "canadian" +toBabel (Lang "en" _ "GB" _) = "british" +toBabel (Lang "en" _ "NZ" _) = "newzealand" +toBabel (Lang "en" _ "UK" _) = "british" +toBabel (Lang "en" _ "US" _) = "american" +toBabel (Lang "fr" _ "CA" _) = "canadien" +toBabel (Lang "fra" _ _ vars) + | "aca" `elem` vars = "acadian" +toBabel (Lang "grc" _ _ _) = "polutonikogreek" +toBabel (Lang "hsb" _ _ _) = "uppersorbian" +toBabel (Lang "la" _ _ vars) + | "x-classic" `elem` vars = "classiclatin" +toBabel (Lang "pt" _ "BR" _) = "brazilian" +toBabel (Lang "sl" _ _ _) = "slovene" +toBabel x = commonFromBcp47 x + +-- Takes a list of the constituents of a BCP 47 language code +-- and converts it to a string shared by Babel and Polyglossia. +-- https://tools.ietf.org/html/bcp47#section-2.1 +commonFromBcp47 :: Lang -> Text +commonFromBcp47 (Lang "sr" "Cyrl" _ _) = "serbianc" +commonFromBcp47 (Lang "zh" "Latn" _ vars) + | "pinyin" `elem` vars = "pinyin" +commonFromBcp47 (Lang l _ _ _) = fromIso l + where + fromIso "af" = "afrikaans" + fromIso "am" = "amharic" + fromIso "ar" = "arabic" + fromIso "as" = "assamese" + fromIso "ast" = "asturian" + fromIso "bg" = "bulgarian" + fromIso "bn" = "bengali" + fromIso "bo" = "tibetan" + fromIso "br" = "breton" + fromIso "ca" = "catalan" + fromIso "cy" = "welsh" + fromIso "cs" = "czech" + fromIso "cop" = "coptic" + fromIso "da" = "danish" + fromIso "dv" = "divehi" + fromIso "el" = "greek" + fromIso "en" = "english" + fromIso "eo" = "esperanto" + fromIso "es" = "spanish" + fromIso "et" = "estonian" + fromIso "eu" = "basque" + fromIso "fa" = "farsi" + fromIso "fi" = "finnish" + fromIso "fr" = "french" + fromIso "fur" = "friulan" + fromIso "ga" = "irish" + fromIso "gd" = "scottish" + fromIso "gez" = "ethiopic" + fromIso "gl" = "galician" + fromIso "he" = "hebrew" + fromIso "hi" = "hindi" + fromIso "hr" = "croatian" + fromIso "hu" = "magyar" + fromIso "hy" = "armenian" + fromIso "ia" = "interlingua" + fromIso "id" = "indonesian" + fromIso "ie" = "interlingua" + fromIso "is" = "icelandic" + fromIso "it" = "italian" + fromIso "ja" = "japanese" + fromIso "km" = "khmer" + fromIso "kmr" = "kurmanji" + fromIso "kn" = "kannada" + fromIso "ko" = "korean" + fromIso "la" = "latin" + fromIso "lo" = "lao" + fromIso "lt" = "lithuanian" + fromIso "lv" = "latvian" + fromIso "ml" = "malayalam" + fromIso "mn" = "mongolian" + fromIso "mr" = "marathi" + fromIso "nb" = "norsk" + fromIso "nl" = "dutch" + fromIso "nn" = "nynorsk" + fromIso "no" = "norsk" + fromIso "nqo" = "nko" + fromIso "oc" = "occitan" + fromIso "pa" = "panjabi" + fromIso "pl" = "polish" + fromIso "pms" = "piedmontese" + fromIso "pt" = "portuguese" + fromIso "rm" = "romansh" + fromIso "ro" = "romanian" + fromIso "ru" = "russian" + fromIso "sa" = "sanskrit" + fromIso "se" = "samin" + fromIso "sk" = "slovak" + fromIso "sq" = "albanian" + fromIso "sr" = "serbian" + fromIso "sv" = "swedish" + fromIso "syr" = "syriac" + fromIso "ta" = "tamil" + fromIso "te" = "telugu" + fromIso "th" = "thai" + fromIso "ti" = "ethiopic" + fromIso "tk" = "turkmen" + fromIso "tr" = "turkish" + fromIso "uk" = "ukrainian" + fromIso "ur" = "urdu" + fromIso "vi" = "vietnamese" + fromIso _ = "" -- cgit v1.2.3 From fe483c653b34897346e3ab6e0e26de88ecee4447 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 2 Mar 2021 21:57:13 -0800 Subject: Split out T.P.Writers.LaTeX.Citation. --- pandoc.cabal | 1 + src/Text/Pandoc/Writers/LaTeX.hs | 148 +----------------------- src/Text/Pandoc/Writers/LaTeX/Citation.hs | 181 ++++++++++++++++++++++++++++++ 3 files changed, 188 insertions(+), 142 deletions(-) create mode 100644 src/Text/Pandoc/Writers/LaTeX/Citation.hs (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index 416641a96..135ee2f26 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -666,6 +666,7 @@ library Text.Pandoc.Writers.LaTeX.Table, Text.Pandoc.Writers.LaTeX.Lang, Text.Pandoc.Writers.LaTeX.Types, + Text.Pandoc.Writers.LaTeX.Citation, Text.Pandoc.Writers.Markdown.Types, Text.Pandoc.Writers.Markdown.Inline, Text.Pandoc.Writers.Roff, diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 6b1d44b23..84c96a507 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -20,9 +20,8 @@ module Text.Pandoc.Writers.LaTeX ( ) where import Control.Applicative ((<|>)) import Control.Monad.State.Strict -import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isSpace, - isPunctuation, ord) -import Data.List (foldl', intersperse, nubBy, (\\), uncons) +import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isSpace, ord) +import Data.List (intersperse, nubBy, (\\), uncons) import Data.Maybe (catMaybes, fromMaybe, isJust, mapMaybe, isNothing) import qualified Data.Map as M import Data.Text (Text) @@ -44,6 +43,8 @@ import Text.Pandoc.Slides import Text.Pandoc.Walk (query, walk, walkM) import Text.Pandoc.Writers.LaTeX.Caption (getCaption) import Text.Pandoc.Writers.LaTeX.Table (tableToLaTeX) +import Text.Pandoc.Writers.LaTeX.Citation (citationsToNatbib, + citationsToBiblatex) import Text.Pandoc.Writers.LaTeX.Types (LW, WriterState (..), startingState) import Text.Pandoc.Writers.LaTeX.Lang (toPolyglossiaEnv, toPolyglossia, toBabel) @@ -1026,8 +1027,8 @@ inlineToLaTeX (Cite cits lst) = do st <- get let opts = stOptions st case writerCiteMethod opts of - Natbib -> citationsToNatbib cits - Biblatex -> citationsToBiblatex cits + Natbib -> citationsToNatbib inlineListToLaTeX cits + Biblatex -> citationsToBiblatex inlineListToLaTeX cits _ -> inlineListToLaTeX lst inlineToLaTeX (Code (_,classes,kvs) str) = do @@ -1238,143 +1239,6 @@ protectCode x = [x] setEmptyLine :: PandocMonad m => Bool -> LW m () setEmptyLine b = modify $ \st -> st{ stEmptyLine = b } -citationsToNatbib :: PandocMonad m => [Citation] -> LW m (Doc Text) -citationsToNatbib - [one] - = citeCommand c p s k - where - Citation { citationId = k - , citationPrefix = p - , citationSuffix = s - , citationMode = m - } - = one - c = case m of - AuthorInText -> "citet" - SuppressAuthor -> "citeyearpar" - NormalCitation -> "citep" - -citationsToNatbib cits - | noPrefix (tail cits) && noSuffix (init cits) && ismode NormalCitation cits - = citeCommand "citep" p s ks - where - noPrefix = all (null . citationPrefix) - noSuffix = all (null . citationSuffix) - ismode m = all ((==) m . citationMode) - p = citationPrefix $ - head cits - s = citationSuffix $ - last cits - ks = T.intercalate ", " $ map citationId cits - -citationsToNatbib (c:cs) | citationMode c == AuthorInText = do - author <- citeCommand "citeauthor" [] [] (citationId c) - cits <- citationsToNatbib (c { citationMode = SuppressAuthor } : cs) - return $ author <+> cits - -citationsToNatbib cits = do - cits' <- mapM convertOne cits - return $ text "\\citetext{" <> foldl' combineTwo empty cits' <> text "}" - where - combineTwo a b | isEmpty a = b - | otherwise = a <> text "; " <> b - convertOne Citation { citationId = k - , citationPrefix = p - , citationSuffix = s - , citationMode = m - } - = case m of - AuthorInText -> citeCommand "citealt" p s k - SuppressAuthor -> citeCommand "citeyear" p s k - NormalCitation -> citeCommand "citealp" p s k - -citeCommand :: PandocMonad m - => Text -> [Inline] -> [Inline] -> Text -> LW m (Doc Text) -citeCommand c p s k = do - args <- citeArguments p s k - return $ literal ("\\" <> c) <> args - -type Prefix = [Inline] -type Suffix = [Inline] -type CiteId = Text -data CiteGroup = CiteGroup Prefix Suffix [CiteId] - -citeArgumentsList :: PandocMonad m - => CiteGroup -> LW m (Doc Text) -citeArgumentsList (CiteGroup _ _ []) = return empty -citeArgumentsList (CiteGroup pfxs sfxs ids) = do - pdoc <- inlineListToLaTeX pfxs - sdoc <- inlineListToLaTeX sfxs' - return $ optargs pdoc sdoc <> - braces (literal (T.intercalate "," (reverse ids))) - where sfxs' = stripLocatorBraces $ case sfxs of - (Str t : r) -> case T.uncons t of - Just (x, xs) - | T.null xs - , isPunctuation x -> dropWhile (== Space) r - | isPunctuation x -> Str xs : r - _ -> sfxs - _ -> sfxs - optargs pdoc sdoc = case (isEmpty pdoc, isEmpty sdoc) of - (True, True ) -> empty - (True, False) -> brackets sdoc - (_ , _ ) -> brackets pdoc <> brackets sdoc - -citeArguments :: PandocMonad m - => [Inline] -> [Inline] -> Text -> LW m (Doc Text) -citeArguments p s k = citeArgumentsList (CiteGroup p s [k]) - --- strip off {} used to define locator in pandoc-citeproc; see #5722 -stripLocatorBraces :: [Inline] -> [Inline] -stripLocatorBraces = walk go - where go (Str xs) = Str $ T.filter (\c -> c /= '{' && c /= '}') xs - go x = x - -citationsToBiblatex :: PandocMonad m => [Citation] -> LW m (Doc Text) -citationsToBiblatex - [one] - = citeCommand cmd p s k - where - Citation { citationId = k - , citationPrefix = p - , citationSuffix = s - , citationMode = m - } = one - cmd = case m of - SuppressAuthor -> "autocite*" - AuthorInText -> "textcite" - NormalCitation -> "autocite" - -citationsToBiblatex (c:cs) - | all (\cit -> null (citationPrefix cit) && null (citationSuffix cit)) (c:cs) - = do - let cmd = case citationMode c of - SuppressAuthor -> "\\autocite*" - AuthorInText -> "\\textcite" - NormalCitation -> "\\autocite" - return $ text cmd <> - braces (literal (T.intercalate "," (map citationId (c:cs)))) - | otherwise - = do - let cmd = case citationMode c of - SuppressAuthor -> "\\autocites*" - AuthorInText -> "\\textcites" - NormalCitation -> "\\autocites" - - groups <- mapM citeArgumentsList (reverse (foldl' grouper [] (c:cs))) - - return $ text cmd <> mconcat groups - - where grouper prev cit = case prev of - ((CiteGroup oPfx oSfx ids):rest) - | null oSfx && null pfx -> CiteGroup oPfx sfx (cid:ids) : rest - _ -> CiteGroup pfx sfx [cid] : prev - where pfx = citationPrefix cit - sfx = citationSuffix cit - cid = citationId cit - -citationsToBiblatex _ = return empty - -- Extract a key from divs and spans extract :: Text -> Block -> [Text] extract key (Div attr _) = lookKey key attr diff --git a/src/Text/Pandoc/Writers/LaTeX/Citation.hs b/src/Text/Pandoc/Writers/LaTeX/Citation.hs new file mode 100644 index 000000000..f48a43d7a --- /dev/null +++ b/src/Text/Pandoc/Writers/LaTeX/Citation.hs @@ -0,0 +1,181 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Writers.LaTeX.Citation + Copyright : Copyright (C) 2006-2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable +-} +module Text.Pandoc.Writers.LaTeX.Citation + ( citationsToNatbib, + citationsToBiblatex + ) where + +import Data.Text (Text) +import Data.Char (isPunctuation) +import qualified Data.Text as T +import Text.Pandoc.Class.PandocMonad (PandocMonad) +import Text.Pandoc.Definition +import Data.List (foldl') +import Text.DocLayout (Doc, brackets, empty, (<+>), text, isEmpty, literal, + braces) +import Text.Pandoc.Walk +import Text.Pandoc.Writers.LaTeX.Types ( LW ) + +citationsToNatbib :: PandocMonad m + => ([Inline] -> LW m (Doc Text)) + -> [Citation] + -> LW m (Doc Text) +citationsToNatbib inlineListToLaTeX [one] + = citeCommand inlineListToLaTeX c p s k + where + Citation { citationId = k + , citationPrefix = p + , citationSuffix = s + , citationMode = m + } + = one + c = case m of + AuthorInText -> "citet" + SuppressAuthor -> "citeyearpar" + NormalCitation -> "citep" + +citationsToNatbib inlineListToLaTeX cits + | noPrefix (tail cits) && noSuffix (init cits) && ismode NormalCitation cits + = citeCommand inlineListToLaTeX "citep" p s ks + where + noPrefix = all (null . citationPrefix) + noSuffix = all (null . citationSuffix) + ismode m = all ((==) m . citationMode) + p = citationPrefix $ + head cits + s = citationSuffix $ + last cits + ks = T.intercalate ", " $ map citationId cits + +citationsToNatbib inlineListToLaTeX (c:cs) + | citationMode c == AuthorInText = do + author <- citeCommand inlineListToLaTeX "citeauthor" [] [] (citationId c) + cits <- citationsToNatbib inlineListToLaTeX + (c { citationMode = SuppressAuthor } : cs) + return $ author <+> cits + +citationsToNatbib inlineListToLaTeX cits = do + cits' <- mapM convertOne cits + return $ text "\\citetext{" <> foldl' combineTwo empty cits' <> text "}" + where + citeCommand' = citeCommand inlineListToLaTeX + combineTwo a b | isEmpty a = b + | otherwise = a <> text "; " <> b + convertOne Citation { citationId = k + , citationPrefix = p + , citationSuffix = s + , citationMode = m + } + = case m of + AuthorInText -> citeCommand' "citealt" p s k + SuppressAuthor -> citeCommand' "citeyear" p s k + NormalCitation -> citeCommand' "citealp" p s k + +citeCommand :: PandocMonad m + => ([Inline] -> LW m (Doc Text)) + -> Text + -> [Inline] + -> [Inline] + -> Text + -> LW m (Doc Text) +citeCommand inlineListToLaTeX c p s k = do + args <- citeArguments inlineListToLaTeX p s k + return $ literal ("\\" <> c) <> args + +type Prefix = [Inline] +type Suffix = [Inline] +type CiteId = Text +data CiteGroup = CiteGroup Prefix Suffix [CiteId] + +citeArgumentsList :: PandocMonad m + => ([Inline] -> LW m (Doc Text)) + -> CiteGroup + -> LW m (Doc Text) +citeArgumentsList _inlineListToLaTeX (CiteGroup _ _ []) = return empty +citeArgumentsList inlineListToLaTeX (CiteGroup pfxs sfxs ids) = do + pdoc <- inlineListToLaTeX pfxs + sdoc <- inlineListToLaTeX sfxs' + return $ optargs pdoc sdoc <> + braces (literal (T.intercalate "," (reverse ids))) + where sfxs' = stripLocatorBraces $ case sfxs of + (Str t : r) -> case T.uncons t of + Just (x, xs) + | T.null xs + , isPunctuation x -> dropWhile (== Space) r + | isPunctuation x -> Str xs : r + _ -> sfxs + _ -> sfxs + optargs pdoc sdoc = case (isEmpty pdoc, isEmpty sdoc) of + (True, True ) -> empty + (True, False) -> brackets sdoc + (_ , _ ) -> brackets pdoc <> brackets sdoc + +citeArguments :: PandocMonad m + => ([Inline] -> LW m (Doc Text)) + -> [Inline] + -> [Inline] + -> Text + -> LW m (Doc Text) +citeArguments inlineListToLaTeX p s k = + citeArgumentsList inlineListToLaTeX (CiteGroup p s [k]) + +-- strip off {} used to define locator in pandoc-citeproc; see #5722 +stripLocatorBraces :: [Inline] -> [Inline] +stripLocatorBraces = walk go + where go (Str xs) = Str $ T.filter (\c -> c /= '{' && c /= '}') xs + go x = x + +citationsToBiblatex :: PandocMonad m + => ([Inline] -> LW m (Doc Text)) + -> [Citation] -> LW m (Doc Text) +citationsToBiblatex inlineListToLaTeX [one] + = citeCommand inlineListToLaTeX cmd p s k + where + Citation { citationId = k + , citationPrefix = p + , citationSuffix = s + , citationMode = m + } = one + cmd = case m of + SuppressAuthor -> "autocite*" + AuthorInText -> "textcite" + NormalCitation -> "autocite" + +citationsToBiblatex inlineListToLaTeX (c:cs) + | all (\cit -> null (citationPrefix cit) && null (citationSuffix cit)) (c:cs) + = do + let cmd = case citationMode c of + SuppressAuthor -> "\\autocite*" + AuthorInText -> "\\textcite" + NormalCitation -> "\\autocite" + return $ text cmd <> + braces (literal (T.intercalate "," (map citationId (c:cs)))) + | otherwise + = do + let cmd = case citationMode c of + SuppressAuthor -> "\\autocites*" + AuthorInText -> "\\textcites" + NormalCitation -> "\\autocites" + + groups <- mapM (citeArgumentsList inlineListToLaTeX) + (reverse (foldl' grouper [] (c:cs))) + + return $ text cmd <> mconcat groups + + where grouper prev cit = case prev of + ((CiteGroup oPfx oSfx ids):rest) + | null oSfx && null pfx -> CiteGroup oPfx sfx (cid:ids) : rest + _ -> CiteGroup pfx sfx [cid] : prev + where pfx = citationPrefix cit + sfx = citationSuffix cit + cid = citationId cit + +citationsToBiblatex _ _ = return empty -- cgit v1.2.3 From e8e5ffe1f4ecaff6f4e21af04ba593d64f4061f4 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 2 Mar 2021 22:36:13 -0800 Subject: Split out T.P.Writers.LaTeX.Util. --- pandoc.cabal | 1 + src/Text/Pandoc/Writers/LaTeX.hs | 254 ++----------------------------- src/Text/Pandoc/Writers/LaTeX/Util.hs | 274 ++++++++++++++++++++++++++++++++++ 3 files changed, 286 insertions(+), 243 deletions(-) create mode 100644 src/Text/Pandoc/Writers/LaTeX/Util.hs (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index 135ee2f26..d457a0620 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -667,6 +667,7 @@ library Text.Pandoc.Writers.LaTeX.Lang, Text.Pandoc.Writers.LaTeX.Types, Text.Pandoc.Writers.LaTeX.Citation, + Text.Pandoc.Writers.LaTeX.Util, Text.Pandoc.Writers.Markdown.Types, Text.Pandoc.Writers.Markdown.Inline, Text.Pandoc.Writers.Roff, diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 84c96a507..180aaa44d 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -18,10 +18,9 @@ module Text.Pandoc.Writers.LaTeX ( writeLaTeX , writeBeamer ) where -import Control.Applicative ((<|>)) import Control.Monad.State.Strict -import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isSpace, ord) -import Data.List (intersperse, nubBy, (\\), uncons) +import Data.Char (isDigit) +import Data.List (intersperse, nubBy, (\\)) import Data.Maybe (catMaybes, fromMaybe, isJust, mapMaybe, isNothing) import qualified Data.Map as M import Data.Text (Text) @@ -33,7 +32,7 @@ import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import Text.Pandoc.Definition import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight, - styleToLaTeX, toListingsLanguage) + styleToLaTeX) import Text.Pandoc.ImageSize import Text.Pandoc.Logging import Text.Pandoc.Options @@ -46,11 +45,12 @@ import Text.Pandoc.Writers.LaTeX.Table (tableToLaTeX) import Text.Pandoc.Writers.LaTeX.Citation (citationsToNatbib, citationsToBiblatex) import Text.Pandoc.Writers.LaTeX.Types (LW, WriterState (..), startingState) -import Text.Pandoc.Writers.LaTeX.Lang (toPolyglossiaEnv, toPolyglossia, - toBabel) +import Text.Pandoc.Writers.LaTeX.Lang (toPolyglossia, toBabel) +import Text.Pandoc.Writers.LaTeX.Util (stringToLaTeX, StringContext(..), + toLabel, inCmd, + wrapDiv, hypertarget, labelFor, + getListingsLanguage, mbBraced) import Text.Pandoc.Writers.Shared -import Text.Printf (printf) -import qualified Data.Text.Normalize as Normalize import qualified Text.Pandoc.Writers.AnnotatedTable as Ann -- | Convert Pandoc to LaTeX. @@ -244,152 +244,6 @@ pandocToLaTeX options (Pandoc meta blocks) = do Nothing -> main Just tpl -> renderTemplate tpl context' -data StringContext = TextString - | URLString - | CodeString - deriving (Eq) - --- escape things as needed for LaTeX -stringToLaTeX :: PandocMonad m => StringContext -> Text -> LW m Text -stringToLaTeX context zs = do - opts <- gets stOptions - return $ T.pack $ - foldr (go opts context) mempty $ T.unpack $ - if writerPreferAscii opts - then Normalize.normalize Normalize.NFD zs - else zs - where - go :: WriterOptions -> StringContext -> Char -> String -> String - go opts ctx x xs = - let ligatures = isEnabled Ext_smart opts && ctx == TextString - isUrl = ctx == URLString - mbAccentCmd = - if writerPreferAscii opts && ctx == TextString - then uncons xs >>= \(c,_) -> lookupAccent c - else Nothing - emits s = - case mbAccentCmd of - Just cmd -> - cmd <> "{" <> s <> "}" <> drop 1 xs -- drop combining accent - Nothing -> s <> xs - emitc c = - case mbAccentCmd of - Just cmd -> - cmd <> "{" <> [c] <> "}" <> drop 1 xs -- drop combining accent - Nothing -> c : xs - emitcseq cs = - case xs of - c:_ | isLetter c - , ctx == TextString - -> cs <> " " <> xs - | isSpace c -> cs <> "{}" <> xs - | ctx == TextString - -> cs <> xs - _ -> cs <> "{}" <> xs - emitquote cs = - case xs of - '`':_ -> cs <> "\\," <> xs -- add thin space - '\'':_ -> cs <> "\\," <> xs -- add thin space - _ -> cs <> xs - in case x of - '?' | ligatures -> -- avoid ?` ligature - case xs of - '`':_ -> emits "?{}" - _ -> emitc x - '!' | ligatures -> -- avoid !` ligature - case xs of - '`':_ -> emits "!{}" - _ -> emitc x - '{' -> emits "\\{" - '}' -> emits "\\}" - '`' | ctx == CodeString -> emitcseq "\\textasciigrave" - '$' | not isUrl -> emits "\\$" - '%' -> emits "\\%" - '&' -> emits "\\&" - '_' | not isUrl -> emits "\\_" - '#' -> emits "\\#" - '-' | not isUrl -> case xs of - -- prevent adjacent hyphens from forming ligatures - ('-':_) -> emits "-\\/" - _ -> emitc '-' - '~' | not isUrl -> emitcseq "\\textasciitilde" - '^' -> emits "\\^{}" - '\\'| isUrl -> emitc '/' -- NB. / works as path sep even on Windows - | otherwise -> emitcseq "\\textbackslash" - '|' | not isUrl -> emitcseq "\\textbar" - '<' -> emitcseq "\\textless" - '>' -> emitcseq "\\textgreater" - '[' -> emits "{[}" -- to avoid interpretation as - ']' -> emits "{]}" -- optional arguments - '\'' | ctx == CodeString -> emitcseq "\\textquotesingle" - '\160' -> emits "~" - '\x200B' -> emits "\\hspace{0pt}" -- zero-width space - '\x202F' -> emits "\\," - '\x2026' -> emitcseq "\\ldots" - '\x2018' | ligatures -> emitquote "`" - '\x2019' | ligatures -> emitquote "'" - '\x201C' | ligatures -> emitquote "``" - '\x201D' | ligatures -> emitquote "''" - '\x2014' | ligatures -> emits "---" - '\x2013' | ligatures -> emits "--" - _ | writerPreferAscii opts - -> case x of - 'ı' -> emitcseq "\\i" - 'ȷ' -> emitcseq "\\j" - 'å' -> emitcseq "\\aa" - 'Å' -> emitcseq "\\AA" - 'ß' -> emitcseq "\\ss" - 'ø' -> emitcseq "\\o" - 'Ø' -> emitcseq "\\O" - 'Ł' -> emitcseq "\\L" - 'ł' -> emitcseq "\\l" - 'æ' -> emitcseq "\\ae" - 'Æ' -> emitcseq "\\AE" - 'œ' -> emitcseq "\\oe" - 'Œ' -> emitcseq "\\OE" - '£' -> emitcseq "\\pounds" - '€' -> emitcseq "\\euro" - '©' -> emitcseq "\\copyright" - _ -> emitc x - | otherwise -> emitc x - -lookupAccent :: Char -> Maybe String -lookupAccent '\779' = Just "\\H" -lookupAccent '\768' = Just "\\`" -lookupAccent '\769' = Just "\\'" -lookupAccent '\770' = Just "\\^" -lookupAccent '\771' = Just "\\~" -lookupAccent '\776' = Just "\\\"" -lookupAccent '\775' = Just "\\." -lookupAccent '\772' = Just "\\=" -lookupAccent '\781' = Just "\\|" -lookupAccent '\817' = Just "\\b" -lookupAccent '\807' = Just "\\c" -lookupAccent '\783' = Just "\\G" -lookupAccent '\777' = Just "\\h" -lookupAccent '\803' = Just "\\d" -lookupAccent '\785' = Just "\\f" -lookupAccent '\778' = Just "\\r" -lookupAccent '\865' = Just "\\t" -lookupAccent '\782' = Just "\\U" -lookupAccent '\780' = Just "\\v" -lookupAccent '\774' = Just "\\u" -lookupAccent '\808' = Just "\\k" -lookupAccent '\8413' = Just "\\textcircled" -lookupAccent _ = Nothing - -toLabel :: PandocMonad m => Text -> LW m Text -toLabel z = go `fmap` stringToLaTeX URLString z - where - go = T.concatMap $ \x -> case x of - _ | (isLetter x || isDigit x) && isAscii x -> T.singleton x - | x `elemText` "_-+=:;." -> T.singleton x - | otherwise -> T.pack $ "ux" <> printf "%x" (ord x) - --- | Puts contents into LaTeX command. -inCmd :: Text -> Doc Text -> Doc Text -inCmd cmd contents = char '\\' <> literal cmd <> braces contents - toSlides :: PandocMonad m => [Block] -> LW m [Block] toSlides bs = do opts <- gets stOptions @@ -854,91 +708,6 @@ sectionHeader classes ident level lst = do braces txtNoNotes else empty -mapAlignment :: Text -> Text -mapAlignment a = case a of - "top" -> "T" - "top-baseline" -> "t" - "bottom" -> "b" - "center" -> "c" - _ -> a - -wrapDiv :: PandocMonad m => Attr -> Doc Text -> LW m (Doc Text) -wrapDiv (_,classes,kvs) t = do - beamer <- gets stBeamer - let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir - lang <- toLang $ lookup "lang" kvs - let wrapColumns = if beamer && "columns" `elem` classes - then \contents -> - let valign = maybe "T" mapAlignment (lookup "align" kvs) - totalwidth = maybe [] (\x -> ["totalwidth=" <> x]) - (lookup "totalwidth" kvs) - onlytextwidth = filter ("onlytextwidth" ==) classes - options = text $ T.unpack $ T.intercalate "," $ - valign : totalwidth ++ onlytextwidth - in inCmd "begin" "columns" <> brackets options - $$ contents - $$ inCmd "end" "columns" - else id - wrapColumn = if beamer && "column" `elem` classes - then \contents -> - let valign = - maybe "" - (brackets . text . T.unpack . mapAlignment) - (lookup "align" kvs) - w = maybe "0.48" fromPct (lookup "width" kvs) - in inCmd "begin" "column" <> - valign <> - braces (literal w <> "\\textwidth") - $$ contents - $$ inCmd "end" "column" - else id - fromPct xs = - case T.unsnoc xs of - Just (ds, '%') -> case safeRead ds of - Just digits -> showFl (digits / 100 :: Double) - Nothing -> xs - _ -> xs - wrapDir = case lookup "dir" kvs of - Just "rtl" -> align "RTL" - Just "ltr" -> align "LTR" - _ -> id - wrapLang txt = case lang of - Just lng -> let (l, o) = toPolyglossiaEnv lng - ops = if T.null o - then "" - else brackets $ literal o - in inCmd "begin" (literal l) <> ops - $$ blankline <> txt <> blankline - $$ inCmd "end" (literal l) - Nothing -> txt - return $ wrapColumns . wrapColumn . wrapDir . wrapLang $ t - -hypertarget :: PandocMonad m => Bool -> Text -> Doc Text -> LW m (Doc Text) -hypertarget _ "" x = return x -hypertarget addnewline ident x = do - ref <- literal `fmap` toLabel ident - return $ text "\\hypertarget" - <> braces ref - <> braces ((if addnewline && not (isEmpty x) - then "%" <> cr - else empty) <> x) - -labelFor :: PandocMonad m => Text -> LW m (Doc Text) -labelFor "" = return empty -labelFor ident = do - ref <- literal `fmap` toLabel ident - return $ text "\\label" <> braces ref - --- Determine listings language from list of class attributes. -getListingsLanguage :: [Text] -> Maybe Text -getListingsLanguage xs - = foldr ((<|>) . toListingsLanguage) Nothing xs - -mbBraced :: Text -> Text -mbBraced x = if not (T.all isAlphaNum x) - then "{" <> x <> "}" - else x - -- | Convert list of inline elements to LaTeX. inlineListToLaTeX :: PandocMonad m => [Inline] -- ^ Inlines to convert @@ -963,10 +732,6 @@ inlineListToLaTeX lst = hcat <$> fixInitialLineBreaks xs fixInitialLineBreaks xs = xs -isQuoted :: Inline -> Bool -isQuoted (Quoted _ _) = True -isQuoted _ = False - -- | Convert inline element to LaTeX inlineToLaTeX :: PandocMonad m => Inline -- ^ Inline to convert @@ -1102,6 +867,9 @@ inlineToLaTeX (Quoted qt lst) = do if isEnabled Ext_smart opts then char '`' <> inner <> char '\'' else char '\x2018' <> inner <> char '\x2019' + where + isQuoted (Quoted _ _) = True + isQuoted _ = False inlineToLaTeX (Str str) = do setEmptyLine False liftM literal $ stringToLaTeX TextString str diff --git a/src/Text/Pandoc/Writers/LaTeX/Util.hs b/src/Text/Pandoc/Writers/LaTeX/Util.hs new file mode 100644 index 000000000..56bb792ae --- /dev/null +++ b/src/Text/Pandoc/Writers/LaTeX/Util.hs @@ -0,0 +1,274 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Writers.LaTeX.Util + Copyright : Copyright (C) 2006-2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable +-} +module Text.Pandoc.Writers.LaTeX.Util ( + stringToLaTeX + , StringContext(..) + , toLabel + , inCmd + , wrapDiv + , hypertarget + , labelFor + , getListingsLanguage + , mbBraced + ) +where + +import Control.Applicative ((<|>)) +import Text.Pandoc.Class (PandocMonad, toLang) +import Text.Pandoc.Options (WriterOptions(..), isEnabled) +import Text.Pandoc.Writers.LaTeX.Types (LW, WriterState(..)) +import Text.Pandoc.Writers.LaTeX.Lang (toPolyglossiaEnv) +import Text.Pandoc.Highlighting (toListingsLanguage) +import Text.DocLayout +import Text.Pandoc.Definition +import Text.Pandoc.ImageSize (showFl) +import Control.Monad.State.Strict (gets) +import Data.Text (Text) +import qualified Data.Text as T +import Text.Pandoc.Extensions (Extension(Ext_smart)) +import Data.Char (isLetter, isSpace, isDigit, isAscii, ord, isAlphaNum) +import Text.Printf (printf) +import Text.Pandoc.Shared (safeRead, elemText) +import qualified Data.Text.Normalize as Normalize +import Data.List (uncons) + +data StringContext = TextString + | URLString + | CodeString + deriving (Eq) + +-- escape things as needed for LaTeX +stringToLaTeX :: PandocMonad m => StringContext -> Text -> LW m Text +stringToLaTeX context zs = do + opts <- gets stOptions + return $ T.pack $ + foldr (go opts context) mempty $ T.unpack $ + if writerPreferAscii opts + then Normalize.normalize Normalize.NFD zs + else zs + where + go :: WriterOptions -> StringContext -> Char -> String -> String + go opts ctx x xs = + let ligatures = isEnabled Ext_smart opts && ctx == TextString + isUrl = ctx == URLString + mbAccentCmd = + if writerPreferAscii opts && ctx == TextString + then uncons xs >>= \(c,_) -> lookupAccent c + else Nothing + emits s = + case mbAccentCmd of + Just cmd -> + cmd <> "{" <> s <> "}" <> drop 1 xs -- drop combining accent + Nothing -> s <> xs + emitc c = + case mbAccentCmd of + Just cmd -> + cmd <> "{" <> [c] <> "}" <> drop 1 xs -- drop combining accent + Nothing -> c : xs + emitcseq cs = + case xs of + c:_ | isLetter c + , ctx == TextString + -> cs <> " " <> xs + | isSpace c -> cs <> "{}" <> xs + | ctx == TextString + -> cs <> xs + _ -> cs <> "{}" <> xs + emitquote cs = + case xs of + '`':_ -> cs <> "\\," <> xs -- add thin space + '\'':_ -> cs <> "\\," <> xs -- add thin space + _ -> cs <> xs + in case x of + '?' | ligatures -> -- avoid ?` ligature + case xs of + '`':_ -> emits "?{}" + _ -> emitc x + '!' | ligatures -> -- avoid !` ligature + case xs of + '`':_ -> emits "!{}" + _ -> emitc x + '{' -> emits "\\{" + '}' -> emits "\\}" + '`' | ctx == CodeString -> emitcseq "\\textasciigrave" + '$' | not isUrl -> emits "\\$" + '%' -> emits "\\%" + '&' -> emits "\\&" + '_' | not isUrl -> emits "\\_" + '#' -> emits "\\#" + '-' | not isUrl -> case xs of + -- prevent adjacent hyphens from forming ligatures + ('-':_) -> emits "-\\/" + _ -> emitc '-' + '~' | not isUrl -> emitcseq "\\textasciitilde" + '^' -> emits "\\^{}" + '\\'| isUrl -> emitc '/' -- NB. / works as path sep even on Windows + | otherwise -> emitcseq "\\textbackslash" + '|' | not isUrl -> emitcseq "\\textbar" + '<' -> emitcseq "\\textless" + '>' -> emitcseq "\\textgreater" + '[' -> emits "{[}" -- to avoid interpretation as + ']' -> emits "{]}" -- optional arguments + '\'' | ctx == CodeString -> emitcseq "\\textquotesingle" + '\160' -> emits "~" + '\x200B' -> emits "\\hspace{0pt}" -- zero-width space + '\x202F' -> emits "\\," + '\x2026' -> emitcseq "\\ldots" + '\x2018' | ligatures -> emitquote "`" + '\x2019' | ligatures -> emitquote "'" + '\x201C' | ligatures -> emitquote "``" + '\x201D' | ligatures -> emitquote "''" + '\x2014' | ligatures -> emits "---" + '\x2013' | ligatures -> emits "--" + _ | writerPreferAscii opts + -> case x of + 'ı' -> emitcseq "\\i" + 'ȷ' -> emitcseq "\\j" + 'å' -> emitcseq "\\aa" + 'Å' -> emitcseq "\\AA" + 'ß' -> emitcseq "\\ss" + 'ø' -> emitcseq "\\o" + 'Ø' -> emitcseq "\\O" + 'Ł' -> emitcseq "\\L" + 'ł' -> emitcseq "\\l" + 'æ' -> emitcseq "\\ae" + 'Æ' -> emitcseq "\\AE" + 'œ' -> emitcseq "\\oe" + 'Œ' -> emitcseq "\\OE" + '£' -> emitcseq "\\pounds" + '€' -> emitcseq "\\euro" + '©' -> emitcseq "\\copyright" + _ -> emitc x + | otherwise -> emitc x + +lookupAccent :: Char -> Maybe String +lookupAccent '\779' = Just "\\H" +lookupAccent '\768' = Just "\\`" +lookupAccent '\769' = Just "\\'" +lookupAccent '\770' = Just "\\^" +lookupAccent '\771' = Just "\\~" +lookupAccent '\776' = Just "\\\"" +lookupAccent '\775' = Just "\\." +lookupAccent '\772' = Just "\\=" +lookupAccent '\781' = Just "\\|" +lookupAccent '\817' = Just "\\b" +lookupAccent '\807' = Just "\\c" +lookupAccent '\783' = Just "\\G" +lookupAccent '\777' = Just "\\h" +lookupAccent '\803' = Just "\\d" +lookupAccent '\785' = Just "\\f" +lookupAccent '\778' = Just "\\r" +lookupAccent '\865' = Just "\\t" +lookupAccent '\782' = Just "\\U" +lookupAccent '\780' = Just "\\v" +lookupAccent '\774' = Just "\\u" +lookupAccent '\808' = Just "\\k" +lookupAccent '\8413' = Just "\\textcircled" +lookupAccent _ = Nothing + +toLabel :: PandocMonad m => Text -> LW m Text +toLabel z = go `fmap` stringToLaTeX URLString z + where + go = T.concatMap $ \x -> case x of + _ | (isLetter x || isDigit x) && isAscii x -> T.singleton x + | x `elemText` "_-+=:;." -> T.singleton x + | otherwise -> T.pack $ "ux" <> printf "%x" (ord x) + +-- | Puts contents into LaTeX command. +inCmd :: Text -> Doc Text -> Doc Text +inCmd cmd contents = char '\\' <> literal cmd <> braces contents + +mapAlignment :: Text -> Text +mapAlignment a = case a of + "top" -> "T" + "top-baseline" -> "t" + "bottom" -> "b" + "center" -> "c" + _ -> a + +wrapDiv :: PandocMonad m => Attr -> Doc Text -> LW m (Doc Text) +wrapDiv (_,classes,kvs) t = do + beamer <- gets stBeamer + let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir + lang <- toLang $ lookup "lang" kvs + let wrapColumns = if beamer && "columns" `elem` classes + then \contents -> + let valign = maybe "T" mapAlignment (lookup "align" kvs) + totalwidth = maybe [] (\x -> ["totalwidth=" <> x]) + (lookup "totalwidth" kvs) + onlytextwidth = filter ("onlytextwidth" ==) classes + options = text $ T.unpack $ T.intercalate "," $ + valign : totalwidth ++ onlytextwidth + in inCmd "begin" "columns" <> brackets options + $$ contents + $$ inCmd "end" "columns" + else id + wrapColumn = if beamer && "column" `elem` classes + then \contents -> + let valign = + maybe "" + (brackets . text . T.unpack . mapAlignment) + (lookup "align" kvs) + w = maybe "0.48" fromPct (lookup "width" kvs) + in inCmd "begin" "column" <> + valign <> + braces (literal w <> "\\textwidth") + $$ contents + $$ inCmd "end" "column" + else id + fromPct xs = + case T.unsnoc xs of + Just (ds, '%') -> case safeRead ds of + Just digits -> showFl (digits / 100 :: Double) + Nothing -> xs + _ -> xs + wrapDir = case lookup "dir" kvs of + Just "rtl" -> align "RTL" + Just "ltr" -> align "LTR" + _ -> id + wrapLang txt = case lang of + Just lng -> let (l, o) = toPolyglossiaEnv lng + ops = if T.null o + then "" + else brackets $ literal o + in inCmd "begin" (literal l) <> ops + $$ blankline <> txt <> blankline + $$ inCmd "end" (literal l) + Nothing -> txt + return $ wrapColumns . wrapColumn . wrapDir . wrapLang $ t + +hypertarget :: PandocMonad m => Bool -> Text -> Doc Text -> LW m (Doc Text) +hypertarget _ "" x = return x +hypertarget addnewline ident x = do + ref <- literal `fmap` toLabel ident + return $ text "\\hypertarget" + <> braces ref + <> braces ((if addnewline && not (isEmpty x) + then "%" <> cr + else empty) <> x) + +labelFor :: PandocMonad m => Text -> LW m (Doc Text) +labelFor "" = return empty +labelFor ident = do + ref <- literal `fmap` toLabel ident + return $ text "\\label" <> braces ref + +-- Determine listings language from list of class attributes. +getListingsLanguage :: [Text] -> Maybe Text +getListingsLanguage xs + = foldr ((<|>) . toListingsLanguage) Nothing xs + +mbBraced :: Text -> Text +mbBraced x = if not (T.all isAlphaNum x) + then "{" <> x <> "}" + else x + + -- cgit v1.2.3 From bbcc1501a5fa6b40ded88f6738d35ce7a8079313 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 3 Mar 2021 10:05:46 -0800 Subject: Split out T.P.Readers.LaTeX.Inline. --- pandoc.cabal | 11 +- src/Text/Pandoc/Readers/LaTeX.hs | 474 ++++++++++---------------------- src/Text/Pandoc/Readers/LaTeX/Inline.hs | 275 ++++++++++++++++++ 3 files changed, 419 insertions(+), 341 deletions(-) create mode 100644 src/Text/Pandoc/Readers/LaTeX/Inline.hs (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index d457a0620..211327642 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -626,15 +626,16 @@ library Text.Pandoc.Readers.HTML.Table, Text.Pandoc.Readers.HTML.TagCategories, Text.Pandoc.Readers.HTML.Types, - Text.Pandoc.Readers.LaTeX.Types, - Text.Pandoc.Readers.LaTeX.Parsing, - Text.Pandoc.Readers.LaTeX.Lang, - Text.Pandoc.Readers.LaTeX.SIunitx, Text.Pandoc.Readers.LaTeX.Accent, + Text.Pandoc.Readers.LaTeX.Inline, Text.Pandoc.Readers.LaTeX.Citation, + Text.Pandoc.Readers.LaTeX.Lang, + Text.Pandoc.Readers.LaTeX.Macro, Text.Pandoc.Readers.LaTeX.Math, + Text.Pandoc.Readers.LaTeX.Parsing, + Text.Pandoc.Readers.LaTeX.SIunitx, Text.Pandoc.Readers.LaTeX.Table, - Text.Pandoc.Readers.LaTeX.Macro, + Text.Pandoc.Readers.LaTeX.Types, Text.Pandoc.Readers.Odt.Base, Text.Pandoc.Readers.Odt.Namespaces, Text.Pandoc.Readers.Odt.StyleReader, diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index fc85f0545..a27135fd2 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -35,13 +35,13 @@ import Data.Text (Text) import qualified Data.Text as T import System.FilePath (addExtension, replaceExtension, takeExtension) import Text.Pandoc.BCP47 (Lang (..), renderLang) -import Text.Pandoc.Builder +import Text.Pandoc.Builder as B import Text.Pandoc.Class.PandocPure (PandocPure) import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath, - readFileFromDirs, report, setResourcePath, - translateTerm) + readFileFromDirs, report, + setResourcePath) import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError)) -import Text.Pandoc.Highlighting (fromListingsLanguage, languagesByExtension) +import Text.Pandoc.Highlighting (languagesByExtension) import Text.Pandoc.ImageSize (numUnit, showFl) import Text.Pandoc.Logging import Text.Pandoc.Options @@ -61,10 +61,12 @@ import Text.Pandoc.Readers.LaTeX.Macro (macroDef) import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47, babelLangToBCP47, setDefaultLanguage) import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands) +import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands, + nameCommands, charCommands, + verbCommands, rawInlineOr, + listingsLanguage) import Text.Pandoc.Shared -import qualified Text.Pandoc.Translations as Translations import Text.Pandoc.Walk -import qualified Text.Pandoc.Builder as B import Safe -- for debugging: @@ -317,76 +319,6 @@ blockquote cvariant mblang = do optional $ symbolIn (".:;?!" :: [Char]) -- currently ignored return $ blockQuote . langdiv $ (bs <> citepar) -doAcronym :: PandocMonad m => Text -> LP m Inlines -doAcronym form = do - acro <- braced - return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro), - ("acronym-form", "singular+" <> form)]) - $ str $ untokenize acro] - -doAcronymPlural :: PandocMonad m => Text -> LP m Inlines -doAcronymPlural form = do - acro <- braced - plural <- lit "s" - return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro), - ("acronym-form", "plural+" <> form)]) $ - mconcat [str $ untokenize acro, plural]] - -doverb :: PandocMonad m => LP m Inlines -doverb = do - Tok _ Symbol t <- anySymbol - marker <- case T.uncons t of - Just (c, ts) | T.null ts -> return c - _ -> mzero - withVerbatimMode $ - code . untokenize <$> - manyTill (notFollowedBy newlineTok >> verbTok marker) (symbol marker) - -verbTok :: PandocMonad m => Char -> LP m Tok -verbTok stopchar = do - t@(Tok pos toktype txt) <- anyTok - case T.findIndex (== stopchar) txt of - Nothing -> return t - Just i -> do - let (t1, t2) = T.splitAt i txt - inp <- getInput - setInput $ Tok (incSourceColumn pos i) Symbol (T.singleton stopchar) - : totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp - return $ Tok pos toktype t1 - -listingsLanguage :: [(Text, Text)] -> Maybe Text -listingsLanguage opts = - case lookup "language" opts of - Nothing -> Nothing - Just l -> fromListingsLanguage l `mplus` Just l - -dolstinline :: PandocMonad m => LP m Inlines -dolstinline = do - options <- option [] keyvals - let classes = maybeToList $ listingsLanguage options - doinlinecode classes - -domintinline :: PandocMonad m => LP m Inlines -domintinline = do - skipopts - cls <- untokenize <$> braced - doinlinecode [cls] - -doinlinecode :: PandocMonad m => [Text] -> LP m Inlines -doinlinecode classes = do - Tok _ Symbol t <- anySymbol - marker <- case T.uncons t of - Just (c, ts) | T.null ts -> return c - _ -> mzero - let stopchar = if marker == '{' then '}' else marker - withVerbatimMode $ - codeWith ("",classes,[]) . T.map nlToSpace . untokenize <$> - manyTill (verbTok stopchar) (symbol stopchar) - -nlToSpace :: Char -> Char -nlToSpace '\n' = ' ' -nlToSpace x = x - inlineCommand' :: PandocMonad m => LP m Inlines inlineCommand' = try $ do Tok _ (CtrlSeq name) cmd <- anyControlSeq @@ -405,9 +337,6 @@ inlineCommand' = try $ do tok :: PandocMonad m => LP m Inlines tok = tokWith inline -inBrackets :: Inlines -> Inlines -inBrackets x = str "[" <> x <> str "]" - unescapeURL :: Text -> Text unescapeURL = T.concat . go . T.splitOn "\\" where @@ -420,234 +349,136 @@ unescapeURL = T.concat . go . T.splitOn "\\" | otherwise = "\\" <> t inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines) -inlineCommands = - M.union inlineLanguageCommands $ - M.union (accentCommands tok) $ - M.union (citationCommands inline) $ - M.union (siunitxCommands tok) $ - M.fromList - [ ("emph", extractSpaces emph <$> tok) - , ("textit", extractSpaces emph <$> tok) - , ("textsl", extractSpaces emph <$> tok) - , ("textsc", extractSpaces smallcaps <$> tok) - , ("textsf", extractSpaces (spanWith ("",["sans-serif"],[])) <$> tok) - , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok) - , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok) - , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok) - , ("texttt", ttfamily) - , ("sout", extractSpaces strikeout <$> tok) - , ("alert", skipopts >> spanWith ("",["alert"],[]) <$> tok) -- beamer - , ("lq", return (str "‘")) - , ("rq", return (str "’")) - , ("textquoteleft", return (str "‘")) - , ("textquoteright", return (str "’")) - , ("textquotedblleft", return (str "“")) - , ("textquotedblright", return (str "”")) - , ("textsuperscript", extractSpaces superscript <$> tok) - , ("textsubscript", extractSpaces subscript <$> tok) - , ("textbackslash", lit "\\") - , ("backslash", lit "\\") - , ("slash", lit "/") - , ("textbf", extractSpaces strong <$> tok) - , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok) - , ("underline", underline <$> tok) - , ("ldots", lit "…") - , ("vdots", lit "\8942") - , ("dots", lit "…") - , ("mdots", lit "…") - , ("sim", lit "~") - , ("sep", lit ",") - , ("label", rawInlineOr "label" dolabel) - , ("ref", rawInlineOr "ref" $ doref "ref") - , ("cref", rawInlineOr "cref" $ doref "ref") -- from cleveref.sty - , ("vref", rawInlineOr "vref" $ doref "ref+page") -- from varioref.sty - , ("eqref", rawInlineOr "eqref" $ doref "eqref") -- from amsmath.sty - , ("mbox", rawInlineOr "mbox" $ processHBox <$> tok) - , ("hbox", rawInlineOr "hbox" $ processHBox <$> tok) - , ("lettrine", rawInlineOr "lettrine" lettrine) - , ("(", mathInline . untokenize <$> manyTill anyTok (controlSeq ")")) - , ("[", mathDisplay . untokenize <$> manyTill anyTok (controlSeq "]")) - , ("ensuremath", mathInline . untokenize <$> braced) - , ("texorpdfstring", const <$> tok <*> tok) - , ("P", lit "¶") - , ("S", lit "§") - , ("$", lit "$") - , ("%", lit "%") - , ("&", lit "&") - , ("#", lit "#") - , ("_", lit "_") - , ("{", lit "{") - , ("}", lit "}") - , ("qed", lit "\a0\x25FB") - -- old TeX commands - , ("em", extractSpaces emph <$> inlines) - , ("it", extractSpaces emph <$> inlines) - , ("sl", extractSpaces emph <$> inlines) - , ("bf", extractSpaces strong <$> inlines) - , ("tt", code . stringify . toList <$> inlines) - , ("rm", inlines) - , ("itshape", extractSpaces emph <$> inlines) - , ("slshape", extractSpaces emph <$> inlines) - , ("scshape", extractSpaces smallcaps <$> inlines) - , ("bfseries", extractSpaces strong <$> inlines) - , ("MakeUppercase", makeUppercase <$> tok) - , ("MakeTextUppercase", makeUppercase <$> tok) -- textcase - , ("uppercase", makeUppercase <$> tok) - , ("MakeLowercase", makeLowercase <$> tok) - , ("MakeTextLowercase", makeLowercase <$> tok) - , ("lowercase", makeLowercase <$> tok) - , ("/", pure mempty) -- italic correction - , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState - guard $ not inTableCell - optional rawopt - spaces)) - , (",", lit "\8198") - , ("@", pure mempty) - , (" ", lit "\160") - , ("ps", pure $ str "PS." <> space) - , ("TeX", lit "TeX") - , ("LaTeX", lit "LaTeX") - , ("bar", lit "|") - , ("textless", lit "<") - , ("textgreater", lit ">") - , ("thanks", skipopts >> note <$> grouped block) - , ("footnote", skipopts >> note <$> grouped block) - , ("passthrough", tok) -- \passthrough macro used by latex writer - -- for listings - , ("verb", doverb) - , ("lstinline", dolstinline) - , ("mintinline", domintinline) - , ("Verb", doverb) - , ("url", (\url -> link url "" (str url)) . unescapeURL . untokenize <$> - bracedUrl) - , ("nolinkurl", code . unescapeURL . untokenize <$> bracedUrl) - , ("href", do url <- bracedUrl - sp - link (unescapeURL $ untokenize url) "" <$> tok) - , ("includegraphics", do options <- option [] keyvals - src <- braced - mkImage options . unescapeURL . removeDoubleQuotes $ - untokenize src) - , ("enquote*", enquote True Nothing) - , ("enquote", enquote False Nothing) - -- foreignquote is supposed to use native quote marks - , ("foreignquote*", braced >>= enquote True . Just . untokenize) - , ("foreignquote", braced >>= enquote False . Just . untokenize) - -- hypehnquote uses regular quotes - , ("hyphenquote*", braced >>= enquote True . Just . untokenize) - , ("hyphenquote", braced >>= enquote False . Just . untokenize) - , ("figurename", doTerm Translations.Figure) - , ("prefacename", doTerm Translations.Preface) - , ("refname", doTerm Translations.References) - , ("bibname", doTerm Translations.Bibliography) - , ("chaptername", doTerm Translations.Chapter) - , ("partname", doTerm Translations.Part) - , ("contentsname", doTerm Translations.Contents) - , ("listfigurename", doTerm Translations.ListOfFigures) - , ("listtablename", doTerm Translations.ListOfTables) - , ("indexname", doTerm Translations.Index) - , ("abstractname", doTerm Translations.Abstract) - , ("tablename", doTerm Translations.Table) - , ("enclname", doTerm Translations.Encl) - , ("ccname", doTerm Translations.Cc) - , ("headtoname", doTerm Translations.To) - , ("pagename", doTerm Translations.Page) - , ("seename", doTerm Translations.See) - , ("seealsoname", doTerm Translations.SeeAlso) - , ("proofname", doTerm Translations.Proof) - , ("glossaryname", doTerm Translations.Glossary) - , ("lstlistingname", doTerm Translations.Listing) - , ("hyperlink", hyperlink) - , ("hypertarget", hypertargetInline) - -- glossaries package - , ("gls", doAcronym "short") - , ("Gls", doAcronym "short") - , ("glsdesc", doAcronym "long") - , ("Glsdesc", doAcronym "long") - , ("GLSdesc", doAcronym "long") - , ("acrlong", doAcronym "long") - , ("Acrlong", doAcronym "long") - , ("acrfull", doAcronym "full") - , ("Acrfull", doAcronym "full") - , ("acrshort", doAcronym "abbrv") - , ("Acrshort", doAcronym "abbrv") - , ("glspl", doAcronymPlural "short") - , ("Glspl", doAcronymPlural "short") - , ("glsdescplural", doAcronymPlural "long") - , ("Glsdescplural", doAcronymPlural "long") - , ("GLSdescplural", doAcronymPlural "long") - -- acronyms package - , ("ac", doAcronym "short") - , ("acf", doAcronym "full") - , ("acs", doAcronym "abbrv") - , ("acl", doAcronym "long") - , ("acp", doAcronymPlural "short") - , ("acfp", doAcronymPlural "full") - , ("acsp", doAcronymPlural "abbrv") - , ("aclp", doAcronymPlural "long") - , ("Ac", doAcronym "short") - , ("Acf", doAcronym "full") - , ("Acs", doAcronym "abbrv") - , ("Acl", doAcronym "long") - , ("Acp", doAcronymPlural "short") - , ("Acfp", doAcronymPlural "full") - , ("Acsp", doAcronymPlural "abbrv") - , ("Aclp", doAcronymPlural "long") - -- hyphenat - , ("bshyp", lit "\\\173") - , ("fshyp", lit "/\173") - , ("dothyp", lit ".\173") - , ("colonhyp", lit ":\173") - , ("hyp", lit "-") - , ("nohyphens", tok) - , ("textnhtt", ttfamily) - , ("nhttfamily", ttfamily) - -- LaTeX colors - , ("textcolor", coloredInline "color") - , ("colorbox", coloredInline "background-color") - -- fontawesome - , ("faCheck", lit "\10003") - , ("faClose", lit "\10007") - -- xspace - , ("xspace", doxspace) - -- etoolbox - , ("ifstrequal", ifstrequal) - , ("newtoggle", braced >>= newToggle) - , ("toggletrue", braced >>= setToggle True) - , ("togglefalse", braced >>= setToggle False) - , ("iftoggle", try $ ifToggle >> inline) - -- biblatex misc - , ("RN", romanNumeralUpper) - , ("Rn", romanNumeralLower) - -- babel - , ("foreignlanguage", foreignlanguage) - -- include - , ("input", rawInlineOr "input" $ include "input") - -- soul package - , ("ul", underline <$> tok) - -- ulem package - , ("uline", underline <$> tok) - -- plain tex stuff that should just be passed through as raw tex - , ("ifdim", ifdim) - -- bibtex - , ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok) - , ("mkbibemph", spanWith nullAttr . emph <$> tok) - , ("mkbibitalic", spanWith nullAttr . emph <$> tok) - , ("mkbibbold", spanWith nullAttr . strong <$> tok) - , ("mkbibparens", - spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok) - , ("mkbibbrackets", - spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok) - , ("autocap", spanWith nullAttr <$> tok) - , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok) - , ("bibstring", - (\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize - <$> braced) - , ("adddot", pure (str ".")) - , ("adddotspace", pure (spanWith nullAttr (str "." <> space))) - , ("addabbrvspace", pure space) - , ("hyphen", pure (str "-")) - ] +inlineCommands = M.unions + [ inlineLanguageCommands + , accentCommands tok + , citationCommands inline + , siunitxCommands tok + , acronymCommands + , refCommands + , nameCommands + , verbCommands + , charCommands + , rest ] + where + rest = M.fromList + [ ("emph", extractSpaces emph <$> tok) + , ("textit", extractSpaces emph <$> tok) + , ("textsl", extractSpaces emph <$> tok) + , ("textsc", extractSpaces smallcaps <$> tok) + , ("textsf", extractSpaces (spanWith ("",["sans-serif"],[])) <$> tok) + , ("textmd", extractSpaces (spanWith ("",["medium"],[])) <$> tok) + , ("textrm", extractSpaces (spanWith ("",["roman"],[])) <$> tok) + , ("textup", extractSpaces (spanWith ("",["upright"],[])) <$> tok) + , ("texttt", ttfamily) + , ("sout", extractSpaces strikeout <$> tok) + , ("alert", skipopts >> spanWith ("",["alert"],[]) <$> tok) -- beamer + , ("lq", return (str "‘")) + , ("rq", return (str "’")) + , ("textquoteleft", return (str "‘")) + , ("textquoteright", return (str "’")) + , ("textquotedblleft", return (str "“")) + , ("textquotedblright", return (str "”")) + , ("textsuperscript", extractSpaces superscript <$> tok) + , ("textsubscript", extractSpaces subscript <$> tok) + , ("textbf", extractSpaces strong <$> tok) + , ("textnormal", extractSpaces (spanWith ("",["nodecor"],[])) <$> tok) + , ("underline", underline <$> tok) + , ("mbox", rawInlineOr "mbox" $ processHBox <$> tok) + , ("hbox", rawInlineOr "hbox" $ processHBox <$> tok) + , ("lettrine", rawInlineOr "lettrine" lettrine) + , ("(", mathInline . untokenize <$> manyTill anyTok (controlSeq ")")) + , ("[", mathDisplay . untokenize <$> manyTill anyTok (controlSeq "]")) + , ("ensuremath", mathInline . untokenize <$> braced) + , ("texorpdfstring", const <$> tok <*> tok) + -- old TeX commands + , ("em", extractSpaces emph <$> inlines) + , ("it", extractSpaces emph <$> inlines) + , ("sl", extractSpaces emph <$> inlines) + , ("bf", extractSpaces strong <$> inlines) + , ("tt", code . stringify . toList <$> inlines) + , ("rm", inlines) + , ("itshape", extractSpaces emph <$> inlines) + , ("slshape", extractSpaces emph <$> inlines) + , ("scshape", extractSpaces smallcaps <$> inlines) + , ("bfseries", extractSpaces strong <$> inlines) + , ("MakeUppercase", makeUppercase <$> tok) + , ("MakeTextUppercase", makeUppercase <$> tok) -- textcase + , ("uppercase", makeUppercase <$> tok) + , ("MakeLowercase", makeLowercase <$> tok) + , ("MakeTextLowercase", makeLowercase <$> tok) + , ("lowercase", makeLowercase <$> tok) + , ("thanks", skipopts >> note <$> grouped block) + , ("footnote", skipopts >> note <$> grouped block) + , ("passthrough", tok) -- \passthrough macro used by latex writer + -- for listings + , ("url", (\url -> link url "" (str url)) . unescapeURL . untokenize <$> + bracedUrl) + , ("nolinkurl", code . unescapeURL . untokenize <$> bracedUrl) + , ("href", do url <- bracedUrl + sp + link (unescapeURL $ untokenize url) "" <$> tok) + , ("includegraphics", do options <- option [] keyvals + src <- braced + mkImage options . unescapeURL . removeDoubleQuotes $ + untokenize src) + , ("enquote*", enquote True Nothing) + , ("enquote", enquote False Nothing) + -- foreignquote is supposed to use native quote marks + , ("foreignquote*", braced >>= enquote True . Just . untokenize) + , ("foreignquote", braced >>= enquote False . Just . untokenize) + -- hypehnquote uses regular quotes + , ("hyphenquote*", braced >>= enquote True . Just . untokenize) + , ("hyphenquote", braced >>= enquote False . Just . untokenize) + , ("hyperlink", hyperlink) + , ("hypertarget", hypertargetInline) + -- hyphenat + , ("nohyphens", tok) + , ("textnhtt", ttfamily) + , ("nhttfamily", ttfamily) + -- LaTeX colors + , ("textcolor", coloredInline "color") + , ("colorbox", coloredInline "background-color") + -- xspace + , ("xspace", doxspace) + -- etoolbox + , ("ifstrequal", ifstrequal) + , ("newtoggle", braced >>= newToggle) + , ("toggletrue", braced >>= setToggle True) + , ("togglefalse", braced >>= setToggle False) + , ("iftoggle", try $ ifToggle >> inline) + -- biblatex misc + , ("RN", romanNumeralUpper) + , ("Rn", romanNumeralLower) + -- babel + , ("foreignlanguage", foreignlanguage) + -- include + , ("input", rawInlineOr "input" $ include "input") + -- soul package + , ("ul", underline <$> tok) + -- ulem package + , ("uline", underline <$> tok) + -- plain tex stuff that should just be passed through as raw tex + , ("ifdim", ifdim) + -- bibtex + , ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok) + , ("mkbibemph", spanWith nullAttr . emph <$> tok) + , ("mkbibitalic", spanWith nullAttr . emph <$> tok) + , ("mkbibbold", spanWith nullAttr . strong <$> tok) + , ("mkbibparens", + spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok) + , ("mkbibbrackets", + spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok) + , ("autocap", spanWith nullAttr <$> tok) + , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok) + , ("bibstring", + (\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize + <$> braced) + , ("adddot", pure (str ".")) + , ("adddotspace", pure (spanWith nullAttr (str "." <> space))) + , ("addabbrvspace", pure space) + , ("hyphen", pure (str "-")) + ] lettrine :: PandocMonad m => LP m Inlines lettrine = do @@ -766,9 +597,6 @@ ifToggle = do report $ UndefinedToggle name' pos return () -doTerm :: PandocMonad m => Translations.Term -> LP m Inlines -doTerm term = str <$> translateTerm term - ifstrequal :: (PandocMonad m, Monoid a) => LP m a ifstrequal = do str1 <- tok @@ -789,13 +617,6 @@ coloredInline stylename = do ttfamily :: PandocMonad m => LP m Inlines ttfamily = code . stringify . toList <$> tok -rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines -rawInlineOr name' fallback = do - parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions - if parseRaw - then rawInline "latex" <$> getRawCommand name' ("\\" <> name') - else fallback - processHBox :: Inlines -> Inlines processHBox = walk convert where @@ -846,25 +667,6 @@ treatAsInline = Set.fromList , "pagebreak" ] -dolabel :: PandocMonad m => LP m Inlines -dolabel = do - v <- braced - let refstr = untokenize v - updateState $ \st -> - st{ sLastLabel = Just refstr } - return $ spanWith (refstr,[],[("label", refstr)]) - $ inBrackets $ str $ untokenize v - -doref :: PandocMonad m => Text -> LP m Inlines -doref cls = do - v <- braced - let refstr = untokenize v - return $ linkWith ("",[],[ ("reference-type", cls) - , ("reference", refstr)]) - ("#" <> refstr) - "" - (inBrackets $ str refstr) - lookupListDefault :: (Ord k) => v -> [k] -> M.Map k v -> v lookupListDefault d = (fromMaybe d .) . lookupList where lookupList l m = msum $ map (`M.lookup` m) l diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs new file mode 100644 index 000000000..66014a77f --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs @@ -0,0 +1,275 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Readers.LaTeX.Inline + Copyright : Copyright (C) 2006-2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable +-} +module Text.Pandoc.Readers.LaTeX.Inline + ( acronymCommands + , verbCommands + , charCommands + , nameCommands + , refCommands + , rawInlineOr + , listingsLanguage + ) +where + +import qualified Data.Map as M +import Data.Text (Text) +import qualified Data.Text as T +import Text.Pandoc.Builder +import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..)) +import Control.Applicative (optional) +import Control.Monad (guard, mzero, mplus) +import Text.Pandoc.Class.PandocMonad (PandocMonad (..), translateTerm) +import Text.Pandoc.Readers.LaTeX.Parsing +import Text.Pandoc.Extensions (extensionEnabled, Extension(..)) +import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy, + manyTill, getInput, setInput, incSourceColumn, + option) +import Text.Pandoc.Highlighting (fromListingsLanguage,) +import Data.Maybe (maybeToList) +import Text.Pandoc.Options (ReaderOptions(..)) +import qualified Text.Pandoc.Translations as Translations + +rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines +rawInlineOr name' fallback = do + parseRaw <- extensionEnabled Ext_raw_tex <$> getOption readerExtensions + if parseRaw + then rawInline "latex" <$> getRawCommand name' ("\\" <> name') + else fallback + +dolabel :: PandocMonad m => LP m Inlines +dolabel = do + v <- braced + let refstr = untokenize v + updateState $ \st -> + st{ sLastLabel = Just refstr } + return $ spanWith (refstr,[],[("label", refstr)]) + $ inBrackets $ str $ untokenize v + +doref :: PandocMonad m => Text -> LP m Inlines +doref cls = do + v <- braced + let refstr = untokenize v + return $ linkWith ("",[],[ ("reference-type", cls) + , ("reference", refstr)]) + ("#" <> refstr) + "" + (inBrackets $ str refstr) + +inBrackets :: Inlines -> Inlines +inBrackets x = str "[" <> x <> str "]" + +doTerm :: PandocMonad m => Translations.Term -> LP m Inlines +doTerm term = str <$> translateTerm term + +lit :: Text -> LP m Inlines +lit = pure . str + +doverb :: PandocMonad m => LP m Inlines +doverb = do + Tok _ Symbol t <- anySymbol + marker <- case T.uncons t of + Just (c, ts) | T.null ts -> return c + _ -> mzero + withVerbatimMode $ + code . untokenize <$> + manyTill (notFollowedBy newlineTok >> verbTok marker) (symbol marker) + +verbTok :: PandocMonad m => Char -> LP m Tok +verbTok stopchar = do + t@(Tok pos toktype txt) <- anyTok + case T.findIndex (== stopchar) txt of + Nothing -> return t + Just i -> do + let (t1, t2) = T.splitAt i txt + inp <- getInput + setInput $ Tok (incSourceColumn pos i) Symbol (T.singleton stopchar) + : totoks (incSourceColumn pos (i + 1)) (T.drop 1 t2) ++ inp + return $ Tok pos toktype t1 + +listingsLanguage :: [(Text, Text)] -> Maybe Text +listingsLanguage opts = + case lookup "language" opts of + Nothing -> Nothing + Just l -> fromListingsLanguage l `mplus` Just l + +dolstinline :: PandocMonad m => LP m Inlines +dolstinline = do + options <- option [] keyvals + let classes = maybeToList $ listingsLanguage options + doinlinecode classes + +domintinline :: PandocMonad m => LP m Inlines +domintinline = do + skipopts + cls <- untokenize <$> braced + doinlinecode [cls] + +doinlinecode :: PandocMonad m => [Text] -> LP m Inlines +doinlinecode classes = do + Tok _ Symbol t <- anySymbol + marker <- case T.uncons t of + Just (c, ts) | T.null ts -> return c + _ -> mzero + let stopchar = if marker == '{' then '}' else marker + withVerbatimMode $ + codeWith ("",classes,[]) . T.map nlToSpace . untokenize <$> + manyTill (verbTok stopchar) (symbol stopchar) + +nlToSpace :: Char -> Char +nlToSpace '\n' = ' ' +nlToSpace x = x + + + +verbCommands :: PandocMonad m => M.Map Text (LP m Inlines) +verbCommands = M.fromList + [ ("verb", doverb) + , ("lstinline", dolstinline) + , ("mintinline", domintinline) + , ("Verb", doverb) + ] + + + +charCommands :: PandocMonad m => M.Map Text (LP m Inlines) +charCommands = M.fromList + [ ("ldots", lit "…") + , ("vdots", lit "\8942") + , ("dots", lit "…") + , ("mdots", lit "…") + , ("sim", lit "~") + , ("sep", lit ",") + , ("P", lit "¶") + , ("S", lit "§") + , ("$", lit "$") + , ("%", lit "%") + , ("&", lit "&") + , ("#", lit "#") + , ("_", lit "_") + , ("{", lit "{") + , ("}", lit "}") + , ("qed", lit "\a0\x25FB") + , ("/", pure mempty) -- italic correction + , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState + guard $ not inTableCell + optional rawopt + spaces)) + , (",", lit "\8198") + , ("@", pure mempty) + , (" ", lit "\160") + , ("ps", pure $ str "PS." <> space) + , ("TeX", lit "TeX") + , ("LaTeX", lit "LaTeX") + , ("bar", lit "|") + , ("textless", lit "<") + , ("textgreater", lit ">") + , ("textbackslash", lit "\\") + , ("backslash", lit "\\") + , ("slash", lit "/") + -- fontawesome + , ("faCheck", lit "\10003") + , ("faClose", lit "\10007") + -- hyphenat + , ("bshyp", lit "\\\173") + , ("fshyp", lit "/\173") + , ("dothyp", lit ".\173") + , ("colonhyp", lit ":\173") + , ("hyp", lit "-") + ] + +nameCommands :: PandocMonad m => M.Map Text (LP m Inlines) +nameCommands = M.fromList + [ ("figurename", doTerm Translations.Figure) + , ("prefacename", doTerm Translations.Preface) + , ("refname", doTerm Translations.References) + , ("bibname", doTerm Translations.Bibliography) + , ("chaptername", doTerm Translations.Chapter) + , ("partname", doTerm Translations.Part) + , ("contentsname", doTerm Translations.Contents) + , ("listfigurename", doTerm Translations.ListOfFigures) + , ("listtablename", doTerm Translations.ListOfTables) + , ("indexname", doTerm Translations.Index) + , ("abstractname", doTerm Translations.Abstract) + , ("tablename", doTerm Translations.Table) + , ("enclname", doTerm Translations.Encl) + , ("ccname", doTerm Translations.Cc) + , ("headtoname", doTerm Translations.To) + , ("pagename", doTerm Translations.Page) + , ("seename", doTerm Translations.See) + , ("seealsoname", doTerm Translations.SeeAlso) + , ("proofname", doTerm Translations.Proof) + , ("glossaryname", doTerm Translations.Glossary) + , ("lstlistingname", doTerm Translations.Listing) + ] + +refCommands :: PandocMonad m => M.Map Text (LP m Inlines) +refCommands = M.fromList + [ ("label", rawInlineOr "label" dolabel) + , ("ref", rawInlineOr "ref" $ doref "ref") + , ("cref", rawInlineOr "cref" $ doref "ref") -- from cleveref.sty + , ("vref", rawInlineOr "vref" $ doref "ref+page") -- from varioref.sty + , ("eqref", rawInlineOr "eqref" $ doref "eqref") -- from amsmath.sty + ] + +acronymCommands :: PandocMonad m => M.Map Text (LP m Inlines) +acronymCommands = M.fromList + -- glossaries package + [ ("gls", doAcronym "short") + , ("Gls", doAcronym "short") + , ("glsdesc", doAcronym "long") + , ("Glsdesc", doAcronym "long") + , ("GLSdesc", doAcronym "long") + , ("acrlong", doAcronym "long") + , ("Acrlong", doAcronym "long") + , ("acrfull", doAcronym "full") + , ("Acrfull", doAcronym "full") + , ("acrshort", doAcronym "abbrv") + , ("Acrshort", doAcronym "abbrv") + , ("glspl", doAcronymPlural "short") + , ("Glspl", doAcronymPlural "short") + , ("glsdescplural", doAcronymPlural "long") + , ("Glsdescplural", doAcronymPlural "long") + , ("GLSdescplural", doAcronymPlural "long") + -- acronyms package + , ("ac", doAcronym "short") + , ("acf", doAcronym "full") + , ("acs", doAcronym "abbrv") + , ("acl", doAcronym "long") + , ("acp", doAcronymPlural "short") + , ("acfp", doAcronymPlural "full") + , ("acsp", doAcronymPlural "abbrv") + , ("aclp", doAcronymPlural "long") + , ("Ac", doAcronym "short") + , ("Acf", doAcronym "full") + , ("Acs", doAcronym "abbrv") + , ("Acl", doAcronym "long") + , ("Acp", doAcronymPlural "short") + , ("Acfp", doAcronymPlural "full") + , ("Acsp", doAcronymPlural "abbrv") + , ("Aclp", doAcronymPlural "long") + ] + +doAcronym :: PandocMonad m => Text -> LP m Inlines +doAcronym form = do + acro <- braced + return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro), + ("acronym-form", "singular+" <> form)]) + $ str $ untokenize acro] + +doAcronymPlural :: PandocMonad m => Text -> LP m Inlines +doAcronymPlural form = do + acro <- braced + let plural = str "s" + return . mconcat $ [spanWith ("",[],[("acronym-label", untokenize acro), + ("acronym-form", "plural+" <> form)]) $ + mconcat [str $ untokenize acro, plural]] + + -- cgit v1.2.3 From 044bc44fc621e421b74367765022f108494b4e2e Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 3 Mar 2021 11:08:02 -0800 Subject: Moved more into T.P.Readers.LaTeX.Lang. --- src/Text/Pandoc/Readers/LaTeX.hs | 84 +++------------------------------ src/Text/Pandoc/Readers/LaTeX/Inline.hs | 65 +++++++++++++++++++++++-- src/Text/Pandoc/Readers/LaTeX/Lang.hs | 30 +++++++++++- 3 files changed, 97 insertions(+), 82 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index a27135fd2..4ec038b94 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -34,7 +34,7 @@ import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T import System.FilePath (addExtension, replaceExtension, takeExtension) -import Text.Pandoc.BCP47 (Lang (..), renderLang) +import Text.Pandoc.BCP47 (renderLang) import Text.Pandoc.Builder as B import Text.Pandoc.Class.PandocPure (PandocPure) import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath, @@ -58,11 +58,12 @@ import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments, theoremEnvironment) import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments) import Text.Pandoc.Readers.LaTeX.Macro (macroDef) -import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47, +import Text.Pandoc.Readers.LaTeX.Lang (inlineLanguageCommands, babelLangToBCP47, setDefaultLanguage) import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands) import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands, nameCommands, charCommands, + biblatexInlineCommands, verbCommands, rawInlineOr, listingsLanguage) import Text.Pandoc.Shared @@ -350,8 +351,7 @@ unescapeURL = T.concat . go . T.splitOn "\\" inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines) inlineCommands = M.unions - [ inlineLanguageCommands - , accentCommands tok + [ accentCommands tok , citationCommands inline , siunitxCommands tok , acronymCommands @@ -359,6 +359,8 @@ inlineCommands = M.unions , nameCommands , verbCommands , charCommands + , inlineLanguageCommands tok + , biblatexInlineCommands tok , rest ] where rest = M.fromList @@ -373,12 +375,6 @@ inlineCommands = M.unions , ("texttt", ttfamily) , ("sout", extractSpaces strikeout <$> tok) , ("alert", skipopts >> spanWith ("",["alert"],[]) <$> tok) -- beamer - , ("lq", return (str "‘")) - , ("rq", return (str "’")) - , ("textquoteleft", return (str "‘")) - , ("textquoteright", return (str "’")) - , ("textquotedblleft", return (str "“")) - , ("textquotedblright", return (str "”")) , ("textsuperscript", extractSpaces superscript <$> tok) , ("textsubscript", extractSpaces subscript <$> tok) , ("textbf", extractSpaces strong <$> tok) @@ -447,11 +443,6 @@ inlineCommands = M.unions , ("toggletrue", braced >>= setToggle True) , ("togglefalse", braced >>= setToggle False) , ("iftoggle", try $ ifToggle >> inline) - -- biblatex misc - , ("RN", romanNumeralUpper) - , ("Rn", romanNumeralLower) - -- babel - , ("foreignlanguage", foreignlanguage) -- include , ("input", rawInlineOr "input" $ include "input") -- soul package @@ -460,24 +451,6 @@ inlineCommands = M.unions , ("uline", underline <$> tok) -- plain tex stuff that should just be passed through as raw tex , ("ifdim", ifdim) - -- bibtex - , ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok) - , ("mkbibemph", spanWith nullAttr . emph <$> tok) - , ("mkbibitalic", spanWith nullAttr . emph <$> tok) - , ("mkbibbold", spanWith nullAttr . strong <$> tok) - , ("mkbibparens", - spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok) - , ("mkbibbrackets", - spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok) - , ("autocap", spanWith nullAttr <$> tok) - , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok) - , ("bibstring", - (\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize - <$> braced) - , ("adddot", pure (str ".")) - , ("adddotspace", pure (spanWith nullAttr (str "." <> space))) - , ("addabbrvspace", pure space) - , ("hyphen", pure (str "-")) ] lettrine :: PandocMonad m => LP m Inlines @@ -502,26 +475,6 @@ alterStr :: (Text -> Text) -> Inline -> Inline alterStr f (Str xs) = Str (f xs) alterStr _ x = x -foreignlanguage :: PandocMonad m => LP m Inlines -foreignlanguage = do - babelLang <- untokenize <$> braced - case babelLangToBCP47 babelLang of - Just lang -> spanWith ("", [], [("lang", renderLang lang)]) <$> tok - _ -> tok - -inlineLanguageCommands :: PandocMonad m => M.Map Text (LP m Inlines) -inlineLanguageCommands = M.fromList $ mk <$> M.toList polyglossiaLangToBCP47 - where - mk (polyglossia, bcp47Func) = - ("text" <> polyglossia, inlineLanguage bcp47Func) - -inlineLanguage :: PandocMonad m => (Text -> Lang) -> LP m Inlines -inlineLanguage bcp47Func = do - o <- option "" $ T.filter (\c -> c /= '[' && c /= ']') - <$> rawopt - let lang = renderLang $ bcp47Func o - extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok - hyperlink :: PandocMonad m => LP m Inlines hyperlink = try $ do src <- untokenize <$> braced @@ -542,31 +495,6 @@ hypertargetInline = try $ do ils <- grouped inline return $ spanWith (ref, [], []) ils -romanNumeralUpper :: (PandocMonad m) => LP m Inlines -romanNumeralUpper = - str . toRomanNumeral <$> romanNumeralArg - -romanNumeralLower :: (PandocMonad m) => LP m Inlines -romanNumeralLower = - str . T.toLower . toRomanNumeral <$> romanNumeralArg - -romanNumeralArg :: (PandocMonad m) => LP m Int -romanNumeralArg = spaces *> (parser <|> inBraces) - where - inBraces = do - symbol '{' - spaces - res <- parser - spaces - symbol '}' - return res - parser = do - s <- untokenize <$> many1 (satisfyTok isWordTok) - let (digits, rest) = T.span isDigit s - unless (T.null rest) $ - Prelude.fail "Non-digits in argument to \\Rn or \\RN" - safeRead digits - newToggle :: (Monoid a, PandocMonad m) => [Tok] -> LP m a newToggle name = do updateState $ \st -> diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs index 66014a77f..37c29188e 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Inline.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs @@ -13,6 +13,7 @@ module Text.Pandoc.Readers.LaTeX.Inline , verbCommands , charCommands , nameCommands + , biblatexInlineCommands , refCommands , rawInlineOr , listingsLanguage @@ -23,15 +24,17 @@ import qualified Data.Map as M import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.Builder +import Text.Pandoc.Shared (toRomanNumeral, safeRead) import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..)) -import Control.Applicative (optional) -import Control.Monad (guard, mzero, mplus) +import Control.Applicative (optional, (<|>)) +import Control.Monad (guard, mzero, mplus, unless) import Text.Pandoc.Class.PandocMonad (PandocMonad (..), translateTerm) import Text.Pandoc.Readers.LaTeX.Parsing import Text.Pandoc.Extensions (extensionEnabled, Extension(..)) import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy, manyTill, getInput, setInput, incSourceColumn, - option) + option, many1) +import Data.Char (isDigit) import Text.Pandoc.Highlighting (fromListingsLanguage,) import Data.Maybe (maybeToList) import Text.Pandoc.Options (ReaderOptions(..)) @@ -127,6 +130,31 @@ nlToSpace :: Char -> Char nlToSpace '\n' = ' ' nlToSpace x = x +romanNumeralUpper :: (PandocMonad m) => LP m Inlines +romanNumeralUpper = + str . toRomanNumeral <$> romanNumeralArg + +romanNumeralLower :: (PandocMonad m) => LP m Inlines +romanNumeralLower = + str . T.toLower . toRomanNumeral <$> romanNumeralArg + +romanNumeralArg :: (PandocMonad m) => LP m Int +romanNumeralArg = spaces *> (parser <|> inBraces) + where + inBraces = do + symbol '{' + spaces + res <- parser + spaces + symbol '}' + return res + parser = do + s <- untokenize <$> many1 (satisfyTok isWordTok) + let (digits, rest) = T.span isDigit s + unless (T.null rest) $ + Prelude.fail "Non-digits in argument to \\Rn or \\RN" + safeRead digits + verbCommands :: PandocMonad m => M.Map Text (LP m Inlines) @@ -157,6 +185,12 @@ charCommands = M.fromList , ("{", lit "{") , ("}", lit "}") , ("qed", lit "\a0\x25FB") + , ("lq", return (str "‘")) + , ("rq", return (str "’")) + , ("textquoteleft", return (str "‘")) + , ("textquoteright", return (str "’")) + , ("textquotedblleft", return (str "“")) + , ("textquotedblright", return (str "”")) , ("/", pure mempty) -- italic correction , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState guard $ not inTableCell @@ -185,6 +219,31 @@ charCommands = M.fromList , ("hyp", lit "-") ] +biblatexInlineCommands :: PandocMonad m + => LP m Inlines -> M.Map Text (LP m Inlines) +biblatexInlineCommands tok = M.fromList + -- biblatex misc + [ ("RN", romanNumeralUpper) + , ("Rn", romanNumeralLower) + , ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok) + , ("mkbibemph", spanWith nullAttr . emph <$> tok) + , ("mkbibitalic", spanWith nullAttr . emph <$> tok) + , ("mkbibbold", spanWith nullAttr . strong <$> tok) + , ("mkbibparens", + spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok) + , ("mkbibbrackets", + spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok) + , ("autocap", spanWith nullAttr <$> tok) + , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok) + , ("bibstring", + (\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize + <$> braced) + , ("adddot", pure (str ".")) + , ("adddotspace", pure (spanWith nullAttr (str "." <> space))) + , ("addabbrvspace", pure space) + , ("hyphen", pure (str "-")) + ] + nameCommands :: PandocMonad m => M.Map Text (LP m Inlines) nameCommands = M.fromList [ ("figurename", doTerm Translations.Figure) diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs index adbeaa6d4..24acbdbe4 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs @@ -15,15 +15,43 @@ module Text.Pandoc.Readers.LaTeX.Lang ( setDefaultLanguage , polyglossiaLangToBCP47 , babelLangToBCP47 + , inlineLanguageCommands ) where import qualified Data.Map as M +import Data.Text (Text) import qualified Data.Text as T +import Text.Pandoc.Shared (extractSpaces) import Text.Pandoc.BCP47 (Lang(..), renderLang) import Text.Pandoc.Class (PandocMonad(..), setTranslations) import Text.Pandoc.Readers.LaTeX.Parsing import Text.Pandoc.Parsing (updateState, option) -import Text.Pandoc.Builder (Blocks, setMeta, str) +import Text.Pandoc.Builder (Blocks, Inlines, setMeta, str, spanWith) + +foreignlanguage :: PandocMonad m => LP m Inlines -> LP m Inlines +foreignlanguage tok = do + babelLang <- untokenize <$> braced + case babelLangToBCP47 babelLang of + Just lang -> spanWith ("", [], [("lang", renderLang lang)]) <$> tok + _ -> tok + +inlineLanguageCommands :: PandocMonad m + => LP m Inlines -> M.Map Text (LP m Inlines) +inlineLanguageCommands tok = + M.fromList $ + ("foreignlanguage", foreignlanguage tok) : + (mk <$> M.toList polyglossiaLangToBCP47) + where + mk (polyglossia, bcp47Func) = + ("text" <> polyglossia, inlineLanguage tok bcp47Func) + +inlineLanguage :: PandocMonad m + => LP m Inlines -> (Text -> Lang) -> LP m Inlines +inlineLanguage tok bcp47Func = do + o <- option "" $ T.filter (\c -> c /= '[' && c /= ']') + <$> rawopt + let lang = renderLang $ bcp47Func o + extractSpaces (spanWith ("", [], [("lang", lang)])) <$> tok setDefaultLanguage :: PandocMonad m => LP m Blocks setDefaultLanguage = do -- cgit v1.2.3 From da5e9e5956aae3ac83edef7831939553360b8964 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 3 Mar 2021 11:22:42 -0800 Subject: Move enquote commands to T.P.LaTeX.Lang. --- src/Text/Pandoc/Readers/LaTeX.hs | 22 ++------------------- src/Text/Pandoc/Readers/LaTeX/Inline.hs | 2 -- src/Text/Pandoc/Readers/LaTeX/Lang.hs | 34 +++++++++++++++++++++++++++++++-- 3 files changed, 34 insertions(+), 24 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 4ec038b94..a4261bbeb 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -59,6 +59,7 @@ import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments, import Text.Pandoc.Readers.LaTeX.Table (tableEnvironments) import Text.Pandoc.Readers.LaTeX.Macro (macroDef) import Text.Pandoc.Readers.LaTeX.Lang (inlineLanguageCommands, + enquoteCommands, babelLangToBCP47, setDefaultLanguage) import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands) import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands, @@ -293,18 +294,6 @@ quoted' f starter ender = do lit :: Text -> LP m Inlines lit = pure . str -enquote :: PandocMonad m => Bool -> Maybe Text -> LP m Inlines -enquote starred mblang = do - skipopts - let lang = mblang >>= babelLangToBCP47 - let langspan = case lang of - Nothing -> id - Just l -> spanWith ("",[],[("lang", renderLang l)]) - quoteContext <- sQuoteContext <$> getState - if starred || quoteContext == InDoubleQuote - then singleQuoted . langspan <$> withQuoteContext InSingleQuote tok - else doubleQuoted . langspan <$> withQuoteContext InDoubleQuote tok - blockquote :: PandocMonad m => Bool -> Maybe Text -> LP m Blocks blockquote cvariant mblang = do citepar <- if cvariant @@ -359,6 +348,7 @@ inlineCommands = M.unions , nameCommands , verbCommands , charCommands + , enquoteCommands tok , inlineLanguageCommands tok , biblatexInlineCommands tok , rest ] @@ -418,14 +408,6 @@ inlineCommands = M.unions src <- braced mkImage options . unescapeURL . removeDoubleQuotes $ untokenize src) - , ("enquote*", enquote True Nothing) - , ("enquote", enquote False Nothing) - -- foreignquote is supposed to use native quote marks - , ("foreignquote*", braced >>= enquote True . Just . untokenize) - , ("foreignquote", braced >>= enquote False . Just . untokenize) - -- hypehnquote uses regular quotes - , ("hyphenquote*", braced >>= enquote True . Just . untokenize) - , ("hyphenquote", braced >>= enquote False . Just . untokenize) , ("hyperlink", hyperlink) , ("hypertarget", hypertargetInline) -- hyphenat diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs index 37c29188e..8bdff58f7 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Inline.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs @@ -155,8 +155,6 @@ romanNumeralArg = spaces *> (parser <|> inBraces) Prelude.fail "Non-digits in argument to \\Rn or \\RN" safeRead digits - - verbCommands :: PandocMonad m => M.Map Text (LP m Inlines) verbCommands = M.fromList [ ("verb", doverb) diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs index 24acbdbe4..08e217bdb 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs @@ -15,6 +15,7 @@ module Text.Pandoc.Readers.LaTeX.Lang ( setDefaultLanguage , polyglossiaLangToBCP47 , babelLangToBCP47 + , enquoteCommands , inlineLanguageCommands ) where @@ -25,8 +26,37 @@ import Text.Pandoc.Shared (extractSpaces) import Text.Pandoc.BCP47 (Lang(..), renderLang) import Text.Pandoc.Class (PandocMonad(..), setTranslations) import Text.Pandoc.Readers.LaTeX.Parsing -import Text.Pandoc.Parsing (updateState, option) -import Text.Pandoc.Builder (Blocks, Inlines, setMeta, str, spanWith) +import Text.Pandoc.Parsing (updateState, option, getState, QuoteContext(..), + withQuoteContext) +import Text.Pandoc.Builder (Blocks, Inlines, setMeta, str, spanWith, + singleQuoted, doubleQuoted) + +enquote :: PandocMonad m + => LP m Inlines + -> Bool -> Maybe Text -> LP m Inlines +enquote tok starred mblang = do + skipopts + let lang = mblang >>= babelLangToBCP47 + let langspan = case lang of + Nothing -> id + Just l -> spanWith ("",[],[("lang", renderLang l)]) + quoteContext <- sQuoteContext <$> getState + if starred || quoteContext == InDoubleQuote + then singleQuoted . langspan <$> withQuoteContext InSingleQuote tok + else doubleQuoted . langspan <$> withQuoteContext InDoubleQuote tok + +enquoteCommands :: PandocMonad m + => LP m Inlines -> M.Map Text (LP m Inlines) +enquoteCommands tok = M.fromList + [ ("enquote*", enquote tok True Nothing) + , ("enquote", enquote tok False Nothing) + -- foreignquote is supposed to use native quote marks + , ("foreignquote*", braced >>= enquote tok True . Just . untokenize) + , ("foreignquote", braced >>= enquote tok False . Just . untokenize) + -- hypehnquote uses regular quotes + , ("hyphenquote*", braced >>= enquote tok True . Just . untokenize) + , ("hyphenquote", braced >>= enquote tok False . Just . untokenize) + ] foreignlanguage :: PandocMonad m => LP m Inlines -> LP m Inlines foreignlanguage tok = do -- cgit v1.2.3 From 33e4c8dd6c2bbc8109880f43b379d074ceb38391 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 3 Mar 2021 18:21:32 -0800 Subject: Remove T.P.Readers.LaTeX.Accent. Incorporate accentCommands into T.P.Readers.LaTeX.Inline. --- pandoc.cabal | 1 - src/Text/Pandoc/Readers/LaTeX.hs | 2 +- src/Text/Pandoc/Readers/LaTeX/Accent.hs | 78 --------------------------------- src/Text/Pandoc/Readers/LaTeX/Inline.hs | 71 ++++++++++++++++++++++++++++-- 4 files changed, 69 insertions(+), 83 deletions(-) delete mode 100644 src/Text/Pandoc/Readers/LaTeX/Accent.hs (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index 211327642..090c28287 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -626,7 +626,6 @@ library Text.Pandoc.Readers.HTML.Table, Text.Pandoc.Readers.HTML.TagCategories, Text.Pandoc.Readers.HTML.Types, - Text.Pandoc.Readers.LaTeX.Accent, Text.Pandoc.Readers.LaTeX.Inline, Text.Pandoc.Readers.LaTeX.Citation, Text.Pandoc.Readers.LaTeX.Lang, diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index a4261bbeb..552411db8 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -49,7 +49,6 @@ import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline, optional, space, spaces, withRaw, (<|>)) import Text.Pandoc.Readers.LaTeX.Types (Tok (..), TokType (..)) import Text.Pandoc.Readers.LaTeX.Parsing -import Text.Pandoc.Readers.LaTeX.Accent (accentCommands) import Text.Pandoc.Readers.LaTeX.Citation (citationCommands, cites) import Text.Pandoc.Readers.LaTeX.Math (dollarsMath, inlineEnvironments, inlineEnvironment, @@ -64,6 +63,7 @@ import Text.Pandoc.Readers.LaTeX.Lang (inlineLanguageCommands, import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands) import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands, nameCommands, charCommands, + accentCommands, biblatexInlineCommands, verbCommands, rawInlineOr, listingsLanguage) diff --git a/src/Text/Pandoc/Readers/LaTeX/Accent.hs b/src/Text/Pandoc/Readers/LaTeX/Accent.hs deleted file mode 100644 index f8c53491c..000000000 --- a/src/Text/Pandoc/Readers/LaTeX/Accent.hs +++ /dev/null @@ -1,78 +0,0 @@ -{-# LANGUAGE ViewPatterns #-} -{-# LANGUAGE OverloadedStrings #-} -module Text.Pandoc.Readers.LaTeX.Accent - ( accentCommands ) -where - -import Text.Pandoc.Class -import Text.Pandoc.Readers.LaTeX.Parsing -import Text.Pandoc.Builder as B -import qualified Data.Map as M -import Data.Text (Text) -import Data.Maybe (fromMaybe) -import Text.Pandoc.Parsing -import qualified Data.Text as T -import qualified Data.Text.Normalize as Normalize - -accentCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines) -accentCommands tok = - let accent = accentWith tok - lit = pure . str - in M.fromList - [ ("aa", lit "å") - , ("AA", lit "Å") - , ("ss", lit "ß") - , ("o", lit "ø") - , ("O", lit "Ø") - , ("L", lit "Ł") - , ("l", lit "ł") - , ("ae", lit "æ") - , ("AE", lit "Æ") - , ("oe", lit "œ") - , ("OE", lit "Œ") - , ("pounds", lit "£") - , ("euro", lit "€") - , ("copyright", lit "©") - , ("textasciicircum", lit "^") - , ("textasciitilde", lit "~") - , ("H", accent '\779' Nothing) -- hungarumlaut - , ("`", accent '\768' (Just '`')) -- grave - , ("'", accent '\769' (Just '\'')) -- acute - , ("^", accent '\770' (Just '^')) -- circ - , ("~", accent '\771' (Just '~')) -- tilde - , ("\"", accent '\776' Nothing) -- umlaut - , (".", accent '\775' Nothing) -- dot - , ("=", accent '\772' Nothing) -- macron - , ("|", accent '\781' Nothing) -- vertical line above - , ("b", accent '\817' Nothing) -- macron below - , ("c", accent '\807' Nothing) -- cedilla - , ("G", accent '\783' Nothing) -- doublegrave - , ("h", accent '\777' Nothing) -- hookabove - , ("d", accent '\803' Nothing) -- dotbelow - , ("f", accent '\785' Nothing) -- inverted breve - , ("r", accent '\778' Nothing) -- ringabove - , ("t", accent '\865' Nothing) -- double inverted breve - , ("U", accent '\782' Nothing) -- double vertical line above - , ("v", accent '\780' Nothing) -- hacek - , ("u", accent '\774' Nothing) -- breve - , ("k", accent '\808' Nothing) -- ogonek - , ("textogonekcentered", accent '\808' Nothing) -- ogonek - , ("i", lit "ı") -- dotless i - , ("j", lit "ȷ") -- dotless j - , ("newtie", accent '\785' Nothing) -- inverted breve - , ("textcircled", accent '\8413' Nothing) -- combining circle - ] - -accentWith :: PandocMonad m - => LP m Inlines -> Char -> Maybe Char -> LP m Inlines -accentWith tok combiningAccent fallBack = try $ do - ils <- tok - case toList ils of - (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $ - -- try to normalize to the combined character: - Str (Normalize.normalize Normalize.NFC - (T.pack [x, combiningAccent]) <> xs) : ys - [Space] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack - [] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack - _ -> return ils - diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs index 8bdff58f7..7b8bca4af 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Inline.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs @@ -1,4 +1,5 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.LaTeX.Inline Copyright : Copyright (C) 2006-2021 John MacFarlane @@ -12,6 +13,7 @@ module Text.Pandoc.Readers.LaTeX.Inline ( acronymCommands , verbCommands , charCommands + , accentCommands , nameCommands , biblatexInlineCommands , refCommands @@ -33,11 +35,12 @@ import Text.Pandoc.Readers.LaTeX.Parsing import Text.Pandoc.Extensions (extensionEnabled, Extension(..)) import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy, manyTill, getInput, setInput, incSourceColumn, - option, many1) + option, many1, try) import Data.Char (isDigit) import Text.Pandoc.Highlighting (fromListingsLanguage,) -import Data.Maybe (maybeToList) +import Data.Maybe (maybeToList, fromMaybe) import Text.Pandoc.Options (ReaderOptions(..)) +import qualified Data.Text.Normalize as Normalize import qualified Text.Pandoc.Translations as Translations rawInlineOr :: PandocMonad m => Text -> LP m Inlines -> LP m Inlines @@ -155,6 +158,22 @@ romanNumeralArg = spaces *> (parser <|> inBraces) Prelude.fail "Non-digits in argument to \\Rn or \\RN" safeRead digits +accentWith :: PandocMonad m + => LP m Inlines -> Char -> Maybe Char -> LP m Inlines +accentWith tok combiningAccent fallBack = try $ do + ils <- tok + case toList ils of + (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $ + -- try to normalize to the combined character: + Str (Normalize.normalize Normalize.NFC + (T.pack [x, combiningAccent]) <> xs) : ys + [Space] -> return $ str $ T.singleton + $ fromMaybe combiningAccent fallBack + [] -> return $ str $ T.singleton + $ fromMaybe combiningAccent fallBack + _ -> return ils + + verbCommands :: PandocMonad m => M.Map Text (LP m Inlines) verbCommands = M.fromList [ ("verb", doverb) @@ -163,7 +182,53 @@ verbCommands = M.fromList , ("Verb", doverb) ] - +accentCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines) +accentCommands tok = + let accent = accentWith tok + in M.fromList + [ ("aa", lit "å") + , ("AA", lit "Å") + , ("ss", lit "ß") + , ("o", lit "ø") + , ("O", lit "Ø") + , ("L", lit "Ł") + , ("l", lit "ł") + , ("ae", lit "æ") + , ("AE", lit "Æ") + , ("oe", lit "œ") + , ("OE", lit "Œ") + , ("pounds", lit "£") + , ("euro", lit "€") + , ("copyright", lit "©") + , ("textasciicircum", lit "^") + , ("textasciitilde", lit "~") + , ("H", accent '\779' Nothing) -- hungarumlaut + , ("`", accent '\768' (Just '`')) -- grave + , ("'", accent '\769' (Just '\'')) -- acute + , ("^", accent '\770' (Just '^')) -- circ + , ("~", accent '\771' (Just '~')) -- tilde + , ("\"", accent '\776' Nothing) -- umlaut + , (".", accent '\775' Nothing) -- dot + , ("=", accent '\772' Nothing) -- macron + , ("|", accent '\781' Nothing) -- vertical line above + , ("b", accent '\817' Nothing) -- macron below + , ("c", accent '\807' Nothing) -- cedilla + , ("G", accent '\783' Nothing) -- doublegrave + , ("h", accent '\777' Nothing) -- hookabove + , ("d", accent '\803' Nothing) -- dotbelow + , ("f", accent '\785' Nothing) -- inverted breve + , ("r", accent '\778' Nothing) -- ringabove + , ("t", accent '\865' Nothing) -- double inverted breve + , ("U", accent '\782' Nothing) -- double vertical line above + , ("v", accent '\780' Nothing) -- hacek + , ("u", accent '\774' Nothing) -- breve + , ("k", accent '\808' Nothing) -- ogonek + , ("textogonekcentered", accent '\808' Nothing) -- ogonek + , ("i", lit "ı") -- dotless i + , ("j", lit "ȷ") -- dotless j + , ("newtie", accent '\785' Nothing) -- inverted breve + , ("textcircled", accent '\8413' Nothing) -- combining circle + ] charCommands :: PandocMonad m => M.Map Text (LP m Inlines) charCommands = M.fromList -- cgit v1.2.3 From b569b0226d4bd5e0699077089d54fb03d4394b7d Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 3 Mar 2021 18:47:17 -0800 Subject: Add T.P.Readers.LaTeX.Include. --- pandoc.cabal | 1 + src/Text/Pandoc/Readers/LaTeX.hs | 57 ++++----------------------- src/Text/Pandoc/Readers/LaTeX/Include.hs | 66 ++++++++++++++++++++++++++++++++ src/Text/Pandoc/Readers/LaTeX/Inline.hs | 15 +++++++- 4 files changed, 87 insertions(+), 52 deletions(-) create mode 100644 src/Text/Pandoc/Readers/LaTeX/Include.hs (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index 090c28287..11d34a19a 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -626,6 +626,7 @@ library Text.Pandoc.Readers.HTML.Table, Text.Pandoc.Readers.HTML.TagCategories, Text.Pandoc.Readers.HTML.Types, + Text.Pandoc.Readers.LaTeX.Include, Text.Pandoc.Readers.LaTeX.Inline, Text.Pandoc.Readers.LaTeX.Citation, Text.Pandoc.Readers.LaTeX.Lang, diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 552411db8..dd6c2a1fa 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -38,9 +38,8 @@ import Text.Pandoc.BCP47 (renderLang) import Text.Pandoc.Builder as B import Text.Pandoc.Class.PandocPure (PandocPure) import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath, - readFileFromDirs, report, - setResourcePath) -import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError)) + report, setResourcePath) +import Text.Pandoc.Error (PandocError (PandocParsecError)) import Text.Pandoc.Highlighting (languagesByExtension) import Text.Pandoc.ImageSize (numUnit, showFl) import Text.Pandoc.Logging @@ -61,6 +60,8 @@ import Text.Pandoc.Readers.LaTeX.Lang (inlineLanguageCommands, enquoteCommands, babelLangToBCP47, setDefaultLanguage) import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands) +import Text.Pandoc.Readers.LaTeX.Include (insertIncluded, + readFileFromTexinputs) import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands, nameCommands, charCommands, accentCommands, @@ -235,19 +236,10 @@ mkImage options (T.unpack -> src) = do _ -> return src return $ imageWith attr (T.pack src') "" alt -doxspace :: PandocMonad m => LP m Inlines -doxspace = - (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty - where startsWithLetter (Tok _ Word t) = - case T.uncons t of - Just (c, _) | isLetter c -> True - _ -> False - startsWithLetter _ = False - removeDoubleQuotes :: Text -> Text removeDoubleQuotes t = - Data.Maybe.fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\"" + fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\"" doubleQuote :: PandocMonad m => LP m Inlines doubleQuote = @@ -406,8 +398,8 @@ inlineCommands = M.unions link (unescapeURL $ untokenize url) "" <$> tok) , ("includegraphics", do options <- option [] keyvals src <- braced - mkImage options . unescapeURL . removeDoubleQuotes $ - untokenize src) + mkImage options . unescapeURL . + removeDoubleQuotes $ untokenize src) , ("hyperlink", hyperlink) , ("hypertarget", hypertargetInline) -- hyphenat @@ -417,8 +409,6 @@ inlineCommands = M.unions -- LaTeX colors , ("textcolor", coloredInline "color") , ("colorbox", coloredInline "background-color") - -- xspace - , ("xspace", doxspace) -- etoolbox , ("ifstrequal", ifstrequal) , ("newtoggle", braced >>= newToggle) @@ -698,39 +688,6 @@ include name = do mapM_ (insertIncluded defaultExt) fs return mempty -readFileFromTexinputs :: PandocMonad m => FilePath -> LP m (Maybe Text) -readFileFromTexinputs fp = do - fileContentsMap <- sFileContents <$> getState - case M.lookup (T.pack fp) fileContentsMap of - Just t -> return (Just t) - Nothing -> do - dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." - <$> lookupEnv "TEXINPUTS" - readFileFromDirs dirs fp - -insertIncluded :: PandocMonad m - => FilePath - -> FilePath - -> LP m () -insertIncluded defaultExtension f' = do - let f = case takeExtension f' of - ".tex" -> f' - ".sty" -> f' - _ -> addExtension f' defaultExtension - pos <- getPosition - containers <- getIncludeFiles <$> getState - when (T.pack f `elem` containers) $ - throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos - updateState $ addIncludeFile $ T.pack f - mbcontents <- readFileFromTexinputs f - contents <- case mbcontents of - Just s -> return s - Nothing -> do - report $ CouldNotLoadIncludeFile (T.pack f) pos - return "" - getInput >>= setInput . (tokenize f contents ++) - updateState dropLatestIncludeFile - authors :: PandocMonad m => LP m () authors = try $ do bgroup diff --git a/src/Text/Pandoc/Readers/LaTeX/Include.hs b/src/Text/Pandoc/Readers/LaTeX/Include.hs new file mode 100644 index 000000000..618a89284 --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX/Include.hs @@ -0,0 +1,66 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Readers.LaTeX.Include + Copyright : Copyright (C) 2006-2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable +-} +module Text.Pandoc.Readers.LaTeX.Include + ( readFileFromTexinputs + , insertIncluded + ) +where + +import qualified Data.Map as M +import Data.Text (Text) +import qualified Data.Text as T +import Text.Pandoc.Shared (splitTextBy) +import System.FilePath (takeExtension, addExtension) +import Control.Monad (when) +import Control.Monad.Except (throwError) +import Text.Pandoc.Error (PandocError(PandocParseError)) +import Text.Pandoc.Logging (LogMessage(CouldNotLoadIncludeFile)) +import Text.Pandoc.Class (PandocMonad (..), readFileFromDirs, report) +import Text.Pandoc.Readers.LaTeX.Parsing +import Text.Pandoc.Parsing (updateState, getState, getInput, setInput, + getPosition, addIncludeFile, getIncludeFiles, + dropLatestIncludeFile) +import Data.Maybe (fromMaybe) + +readFileFromTexinputs :: PandocMonad m => FilePath -> LP m (Maybe Text) +readFileFromTexinputs fp = do + fileContentsMap <- sFileContents <$> getState + case M.lookup (T.pack fp) fileContentsMap of + Just t -> return (Just t) + Nothing -> do + dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." + <$> lookupEnv "TEXINPUTS" + readFileFromDirs dirs fp + +insertIncluded :: PandocMonad m + => FilePath + -> FilePath + -> LP m () +insertIncluded defaultExtension f' = do + let f = case takeExtension f' of + ".tex" -> f' + ".sty" -> f' + _ -> addExtension f' defaultExtension + pos <- getPosition + containers <- getIncludeFiles <$> getState + when (T.pack f `elem` containers) $ + throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos + updateState $ addIncludeFile $ T.pack f + mbcontents <- readFileFromTexinputs f + contents <- case mbcontents of + Just s -> return s + Nothing -> do + report $ CouldNotLoadIncludeFile (T.pack f) pos + return "" + getInput >>= setInput . (tokenize f contents ++) + updateState dropLatestIncludeFile + + diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs index 7b8bca4af..3b37ee50e 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Inline.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs @@ -35,8 +35,8 @@ import Text.Pandoc.Readers.LaTeX.Parsing import Text.Pandoc.Extensions (extensionEnabled, Extension(..)) import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy, manyTill, getInput, setInput, incSourceColumn, - option, many1, try) -import Data.Char (isDigit) + option, many1, try, lookAhead) +import Data.Char (isDigit, isLetter) import Text.Pandoc.Highlighting (fromListingsLanguage,) import Data.Maybe (maybeToList, fromMaybe) import Text.Pandoc.Options (ReaderOptions(..)) @@ -50,6 +50,15 @@ rawInlineOr name' fallback = do then rawInline "latex" <$> getRawCommand name' ("\\" <> name') else fallback +doxspace :: PandocMonad m => LP m Inlines +doxspace = + (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty + where startsWithLetter (Tok _ Word t) = + case T.uncons t of + Just (c, _) | isLetter c -> True + _ -> False + startsWithLetter _ = False + dolabel :: PandocMonad m => LP m Inlines dolabel = do v <- braced @@ -280,6 +289,8 @@ charCommands = M.fromList , ("dothyp", lit ".\173") , ("colonhyp", lit ":\173") , ("hyp", lit "-") + -- xspace + , ("xspace", doxspace) ] biblatexInlineCommands :: PandocMonad m -- cgit v1.2.3 From 92ea8a0cb63241dbc8f89e73a359ac5efca2ab87 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 3 Mar 2021 19:07:16 -0800 Subject: Revert "Add T.P.Readers.LaTeX.Include." This reverts commit b569b0226d4bd5e0699077089d54fb03d4394b7d. Memory usage improvement in compilation wasn't very significant. --- pandoc.cabal | 1 - src/Text/Pandoc/Readers/LaTeX.hs | 57 +++++++++++++++++++++++---- src/Text/Pandoc/Readers/LaTeX/Include.hs | 66 -------------------------------- src/Text/Pandoc/Readers/LaTeX/Inline.hs | 15 +------- 4 files changed, 52 insertions(+), 87 deletions(-) delete mode 100644 src/Text/Pandoc/Readers/LaTeX/Include.hs (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index 11d34a19a..090c28287 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -626,7 +626,6 @@ library Text.Pandoc.Readers.HTML.Table, Text.Pandoc.Readers.HTML.TagCategories, Text.Pandoc.Readers.HTML.Types, - Text.Pandoc.Readers.LaTeX.Include, Text.Pandoc.Readers.LaTeX.Inline, Text.Pandoc.Readers.LaTeX.Citation, Text.Pandoc.Readers.LaTeX.Lang, diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index dd6c2a1fa..552411db8 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -38,8 +38,9 @@ import Text.Pandoc.BCP47 (renderLang) import Text.Pandoc.Builder as B import Text.Pandoc.Class.PandocPure (PandocPure) import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath, - report, setResourcePath) -import Text.Pandoc.Error (PandocError (PandocParsecError)) + readFileFromDirs, report, + setResourcePath) +import Text.Pandoc.Error (PandocError (PandocParseError, PandocParsecError)) import Text.Pandoc.Highlighting (languagesByExtension) import Text.Pandoc.ImageSize (numUnit, showFl) import Text.Pandoc.Logging @@ -60,8 +61,6 @@ import Text.Pandoc.Readers.LaTeX.Lang (inlineLanguageCommands, enquoteCommands, babelLangToBCP47, setDefaultLanguage) import Text.Pandoc.Readers.LaTeX.SIunitx (siunitxCommands) -import Text.Pandoc.Readers.LaTeX.Include (insertIncluded, - readFileFromTexinputs) import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands, nameCommands, charCommands, accentCommands, @@ -236,10 +235,19 @@ mkImage options (T.unpack -> src) = do _ -> return src return $ imageWith attr (T.pack src') "" alt +doxspace :: PandocMonad m => LP m Inlines +doxspace = + (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty + where startsWithLetter (Tok _ Word t) = + case T.uncons t of + Just (c, _) | isLetter c -> True + _ -> False + startsWithLetter _ = False + removeDoubleQuotes :: Text -> Text removeDoubleQuotes t = - fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\"" + Data.Maybe.fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\"" doubleQuote :: PandocMonad m => LP m Inlines doubleQuote = @@ -398,8 +406,8 @@ inlineCommands = M.unions link (unescapeURL $ untokenize url) "" <$> tok) , ("includegraphics", do options <- option [] keyvals src <- braced - mkImage options . unescapeURL . - removeDoubleQuotes $ untokenize src) + mkImage options . unescapeURL . removeDoubleQuotes $ + untokenize src) , ("hyperlink", hyperlink) , ("hypertarget", hypertargetInline) -- hyphenat @@ -409,6 +417,8 @@ inlineCommands = M.unions -- LaTeX colors , ("textcolor", coloredInline "color") , ("colorbox", coloredInline "background-color") + -- xspace + , ("xspace", doxspace) -- etoolbox , ("ifstrequal", ifstrequal) , ("newtoggle", braced >>= newToggle) @@ -688,6 +698,39 @@ include name = do mapM_ (insertIncluded defaultExt) fs return mempty +readFileFromTexinputs :: PandocMonad m => FilePath -> LP m (Maybe Text) +readFileFromTexinputs fp = do + fileContentsMap <- sFileContents <$> getState + case M.lookup (T.pack fp) fileContentsMap of + Just t -> return (Just t) + Nothing -> do + dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." + <$> lookupEnv "TEXINPUTS" + readFileFromDirs dirs fp + +insertIncluded :: PandocMonad m + => FilePath + -> FilePath + -> LP m () +insertIncluded defaultExtension f' = do + let f = case takeExtension f' of + ".tex" -> f' + ".sty" -> f' + _ -> addExtension f' defaultExtension + pos <- getPosition + containers <- getIncludeFiles <$> getState + when (T.pack f `elem` containers) $ + throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos + updateState $ addIncludeFile $ T.pack f + mbcontents <- readFileFromTexinputs f + contents <- case mbcontents of + Just s -> return s + Nothing -> do + report $ CouldNotLoadIncludeFile (T.pack f) pos + return "" + getInput >>= setInput . (tokenize f contents ++) + updateState dropLatestIncludeFile + authors :: PandocMonad m => LP m () authors = try $ do bgroup diff --git a/src/Text/Pandoc/Readers/LaTeX/Include.hs b/src/Text/Pandoc/Readers/LaTeX/Include.hs deleted file mode 100644 index 618a89284..000000000 --- a/src/Text/Pandoc/Readers/LaTeX/Include.hs +++ /dev/null @@ -1,66 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{- | - Module : Text.Pandoc.Readers.LaTeX.Include - Copyright : Copyright (C) 2006-2021 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable --} -module Text.Pandoc.Readers.LaTeX.Include - ( readFileFromTexinputs - , insertIncluded - ) -where - -import qualified Data.Map as M -import Data.Text (Text) -import qualified Data.Text as T -import Text.Pandoc.Shared (splitTextBy) -import System.FilePath (takeExtension, addExtension) -import Control.Monad (when) -import Control.Monad.Except (throwError) -import Text.Pandoc.Error (PandocError(PandocParseError)) -import Text.Pandoc.Logging (LogMessage(CouldNotLoadIncludeFile)) -import Text.Pandoc.Class (PandocMonad (..), readFileFromDirs, report) -import Text.Pandoc.Readers.LaTeX.Parsing -import Text.Pandoc.Parsing (updateState, getState, getInput, setInput, - getPosition, addIncludeFile, getIncludeFiles, - dropLatestIncludeFile) -import Data.Maybe (fromMaybe) - -readFileFromTexinputs :: PandocMonad m => FilePath -> LP m (Maybe Text) -readFileFromTexinputs fp = do - fileContentsMap <- sFileContents <$> getState - case M.lookup (T.pack fp) fileContentsMap of - Just t -> return (Just t) - Nothing -> do - dirs <- map T.unpack . splitTextBy (==':') . fromMaybe "." - <$> lookupEnv "TEXINPUTS" - readFileFromDirs dirs fp - -insertIncluded :: PandocMonad m - => FilePath - -> FilePath - -> LP m () -insertIncluded defaultExtension f' = do - let f = case takeExtension f' of - ".tex" -> f' - ".sty" -> f' - _ -> addExtension f' defaultExtension - pos <- getPosition - containers <- getIncludeFiles <$> getState - when (T.pack f `elem` containers) $ - throwError $ PandocParseError $ T.pack $ "Include file loop at " ++ show pos - updateState $ addIncludeFile $ T.pack f - mbcontents <- readFileFromTexinputs f - contents <- case mbcontents of - Just s -> return s - Nothing -> do - report $ CouldNotLoadIncludeFile (T.pack f) pos - return "" - getInput >>= setInput . (tokenize f contents ++) - updateState dropLatestIncludeFile - - diff --git a/src/Text/Pandoc/Readers/LaTeX/Inline.hs b/src/Text/Pandoc/Readers/LaTeX/Inline.hs index 3b37ee50e..7b8bca4af 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Inline.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Inline.hs @@ -35,8 +35,8 @@ import Text.Pandoc.Readers.LaTeX.Parsing import Text.Pandoc.Extensions (extensionEnabled, Extension(..)) import Text.Pandoc.Parsing (getOption, updateState, getState, notFollowedBy, manyTill, getInput, setInput, incSourceColumn, - option, many1, try, lookAhead) -import Data.Char (isDigit, isLetter) + option, many1, try) +import Data.Char (isDigit) import Text.Pandoc.Highlighting (fromListingsLanguage,) import Data.Maybe (maybeToList, fromMaybe) import Text.Pandoc.Options (ReaderOptions(..)) @@ -50,15 +50,6 @@ rawInlineOr name' fallback = do then rawInline "latex" <$> getRawCommand name' ("\\" <> name') else fallback -doxspace :: PandocMonad m => LP m Inlines -doxspace = - (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty - where startsWithLetter (Tok _ Word t) = - case T.uncons t of - Just (c, _) | isLetter c -> True - _ -> False - startsWithLetter _ = False - dolabel :: PandocMonad m => LP m Inlines dolabel = do v <- braced @@ -289,8 +280,6 @@ charCommands = M.fromList , ("dothyp", lit ".\173") , ("colonhyp", lit ":\173") , ("hyp", lit "-") - -- xspace - , ("xspace", doxspace) ] biblatexInlineCommands :: PandocMonad m -- cgit v1.2.3 From e461b7dd45f717f3317216c7d3207a1d24bf1c85 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 4 Mar 2021 16:01:40 -0800 Subject: Relax `--abbreviations` rules so that a period isn't required. Partially addresses #7124. --- MANUAL.txt | 6 +++--- src/Text/Pandoc/Readers/Markdown.hs | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/MANUAL.txt b/MANUAL.txt index 52b82a3d0..667a784e0 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -691,9 +691,9 @@ header when requesting a document from a URL: system default, use `pandoc --print-default-data-file=abbreviations`. The only use pandoc makes of this list is in the Markdown reader. - Strings ending in a period that are found in this list will - be followed by a nonbreaking space, so that the period will - not produce sentence-ending space in formats like LaTeX. + Strings found in this list will be followed by a nonbreaking + space, and the period will not produce sentence-ending space + in formats like LaTeX. The strings may not contain spaces. [`pandocfilters`]: https://github.com/jgm/pandocfilters [PHP]: https://github.com/vinai/pandocfilters-php diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index b1b99dfe5..34edbcc17 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1725,7 +1725,7 @@ str = do updateLastStrPos (do guardEnabled Ext_smart abbrevs <- getOption readerAbbreviations - if not (T.null result) && T.last result == '.' && result `Set.member` abbrevs + if result `Set.member` abbrevs then try (do ils <- whitespace notFollowedBy (() <$ cite <|> () <$ note) -- ?? lookAhead alphaNum -- cgit v1.2.3 From 916ce4d51121e0529b938fda71f37e947882abe5 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 4 Mar 2021 16:22:08 -0800 Subject: Revert "Relax `--abbreviations` rules so that a period isn't required." This reverts commit e461b7dd45f717f3317216c7d3207a1d24bf1c85. Ill-advised change. This doesn't work because we parse strings in chunks. --- MANUAL.txt | 6 +++--- src/Text/Pandoc/Readers/Markdown.hs | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/MANUAL.txt b/MANUAL.txt index 667a784e0..52b82a3d0 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -691,9 +691,9 @@ header when requesting a document from a URL: system default, use `pandoc --print-default-data-file=abbreviations`. The only use pandoc makes of this list is in the Markdown reader. - Strings found in this list will be followed by a nonbreaking - space, and the period will not produce sentence-ending space - in formats like LaTeX. The strings may not contain spaces. + Strings ending in a period that are found in this list will + be followed by a nonbreaking space, so that the period will + not produce sentence-ending space in formats like LaTeX. [`pandocfilters`]: https://github.com/jgm/pandocfilters [PHP]: https://github.com/vinai/pandocfilters-php diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 34edbcc17..b1b99dfe5 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1725,7 +1725,7 @@ str = do updateLastStrPos (do guardEnabled Ext_smart abbrevs <- getOption readerAbbreviations - if result `Set.member` abbrevs + if not (T.null result) && T.last result == '.' && result `Set.member` abbrevs then try (do ils <- whitespace notFollowedBy (() <$ cite <|> () <$ note) -- ?? lookAhead alphaNum -- cgit v1.2.3 From 030209fc292bd51700189653fd38ae5b2a723ef1 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 4 Mar 2021 16:25:13 -0800 Subject: Revert "Revert "Relax `--abbreviations` rules so that a period isn't required. This reverts commit 916ce4d51121e0529b938fda71f37e947882abe5. I was confused in thinking it wouldn't work. --- MANUAL.txt | 6 +++--- src/Text/Pandoc/Readers/Markdown.hs | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/MANUAL.txt b/MANUAL.txt index 52b82a3d0..667a784e0 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -691,9 +691,9 @@ header when requesting a document from a URL: system default, use `pandoc --print-default-data-file=abbreviations`. The only use pandoc makes of this list is in the Markdown reader. - Strings ending in a period that are found in this list will - be followed by a nonbreaking space, so that the period will - not produce sentence-ending space in formats like LaTeX. + Strings found in this list will be followed by a nonbreaking + space, and the period will not produce sentence-ending space + in formats like LaTeX. The strings may not contain spaces. [`pandocfilters`]: https://github.com/jgm/pandocfilters [PHP]: https://github.com/vinai/pandocfilters-php diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index b1b99dfe5..34edbcc17 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1725,7 +1725,7 @@ str = do updateLastStrPos (do guardEnabled Ext_smart abbrevs <- getOption readerAbbreviations - if not (T.null result) && T.last result == '.' && result `Set.member` abbrevs + if result `Set.member` abbrevs then try (do ils <- whitespace notFollowedBy (() <$ cite <|> () <$ note) -- ?? lookAhead alphaNum -- cgit v1.2.3 From 5f9327cfc8143902bbd3fdb9d97a7995a19fd217 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 5 Mar 2021 10:20:16 -0800 Subject: Shared: Change defaultUserDataDirs -> defaultUserDataDir. Rationale: the manual says that the XDG data directory will be used if it exists, otherwise the legacy data directory. So we should just determine this and use this directory, rather than having a search path which could cause some things to be taken from one data directory and others from others. [API change] --- MANUAL.txt | 12 ++++++------ src/Text/Pandoc/Shared.hs | 20 ++++++++++++-------- 2 files changed, 18 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/MANUAL.txt b/MANUAL.txt index 667a784e0..d97cbcbc9 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -361,15 +361,15 @@ header when requesting a document from a URL: will be used. On \*nix and macOS systems this will be the `pandoc` subdirectory of the XDG data directory (by default, `$HOME/.local/share`, overridable by setting the `XDG_DATA_HOME` - environment variable). If that directory does not exist, - `$HOME/.pandoc` will be used (for backwards compatibility). - In Windows the default user data directory is + environment variable). If that directory does not exist and + `$HOME/.pandoc` exists, it will be used (for backwards compatibility). + On Windows the default user data directory is `C:\Users\USERNAME\AppData\Roaming\pandoc`. You can find the default user data directory on your system by looking at the output of `pandoc --version`. - A `reference.odt`, `reference.docx`, `epub.css`, `templates`, - `slidy`, `slideous`, or `s5` directory - placed in this directory will override pandoc's normal defaults. + Data files placed in this directory (for example, `reference.odt`, + `reference.docx`, `epub.css`, `templates`) will override + pandoc's normal defaults. `-d` *FILE*, `--defaults=`*FILE* diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 922df7922..2aba9b2e1 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -95,7 +95,7 @@ module Text.Pandoc.Shared ( safeRead, safeStrRead, -- * User data directory - defaultUserDataDirs, + defaultUserDataDir, -- * Version pandocVersion ) where @@ -1012,12 +1012,16 @@ safeStrRead s = case reads s of -- -- | Return appropriate user data directory for platform. We use --- XDG_DATA_HOME (or its default value), but fall back to the --- legacy user data directory ($HOME/.pandoc on *nix) if this is --- missing. -defaultUserDataDirs :: IO [FilePath] -defaultUserDataDirs = E.catch (do +-- XDG_DATA_HOME (or its default value), but for backwards compatibility, +-- we fall back to the legacy user data directory ($HOME/.pandoc on *nix) +-- if the XDG_DATA_HOME is missing and this exists. If neither directory +-- is present, we return the XDG data directory. +defaultUserDataDir :: IO FilePath +defaultUserDataDir = do xdgDir <- getXdgDirectory XdgData "pandoc" legacyDir <- getAppUserDataDirectory "pandoc" - return $ ordNub [xdgDir, legacyDir]) - (\(_ :: E.SomeException) -> return []) + xdgExists <- doesDirectoryExist xdgDir + legacyDirExists <- doesDirectoryExist legacyDir + if not xdgExists && legacyDirExists + then return legacyDir + else return xdgDir -- cgit v1.2.3 From ccc530c5884f9c36d63fb8a63ce6fadce166015c Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 5 Mar 2021 10:28:31 -0800 Subject: Logging: Add EnvironmentVariableUndefined constructor to LogMessage. [API change] --- src/Text/Pandoc/Logging.hs | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src') diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs index 3a3acedc7..1d7bc9d66 100644 --- a/src/Text/Pandoc/Logging.hs +++ b/src/Text/Pandoc/Logging.hs @@ -100,6 +100,7 @@ data LogMessage = | FilterCompleted FilePath Integer | CiteprocWarning Text | ATXHeadingInLHS Int Text + | EnvironmentVariableUndefined Text deriving (Show, Eq, Data, Ord, Typeable, Generic) instance ToJSON LogMessage where @@ -229,6 +230,8 @@ instance ToJSON LogMessage where ATXHeadingInLHS lvl contents -> ["level" .= lvl ,"contents" .= contents] + EnvironmentVariableUndefined var -> + ["variable" .= var ] showPos :: SourcePos -> Text showPos pos = Text.pack $ sn ++ "line " ++ @@ -345,6 +348,8 @@ showLogMessage msg = if lvl < 3 then " Consider using --markdown-headings=setext." else "" + EnvironmentVariableUndefined var -> + "Undefined environment variable " <> var <> " in defaults file." messageVerbosity :: LogMessage -> Verbosity messageVerbosity msg = @@ -391,3 +396,4 @@ messageVerbosity msg = FilterCompleted{} -> INFO CiteprocWarning{} -> WARNING ATXHeadingInLHS{} -> WARNING + EnvironmentVariableUndefined{}-> WARNING -- cgit v1.2.3 From a8324690061d561583f2cb583cfb28591f500181 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 5 Mar 2021 10:42:33 -0800 Subject: Add fields for CSL optinos to Opt. * Add `optCSL`, `optBibliography`, `optCitationAbbreviations` to `Opt` [API change]. * Move `addMeta` from T.P.App.Opt to T.P.App.CommandLineOptions. --- src/Text/Pandoc/App/CommandLineOptions.hs | 31 +++++++++++--- src/Text/Pandoc/App/Opt.hs | 68 +++++++++---------------------- 2 files changed, 46 insertions(+), 53 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs index 0a8193f6c..ac92db0ae 100644 --- a/src/Text/Pandoc/App/CommandLineOptions.hs +++ b/src/Text/Pandoc/App/CommandLineOptions.hs @@ -48,12 +48,13 @@ import System.FilePath import System.IO (stdout) import Text.DocTemplates (Context (..), ToContext (toVal), Val (..)) import Text.Pandoc +import Text.Pandoc.Builder (setMeta) import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), IpynbOutput (..), - DefaultsState (..), addMeta, applyDefaults, + DefaultsState (..), applyDefaults, fullDefaultsPath) import Text.Pandoc.Filter (Filter (..)) import Text.Pandoc.Highlighting (highlightingStyles) -import Text.Pandoc.Shared (ordNub, elemText, safeStrRead, defaultUserDataDirs) +import Text.Pandoc.Shared (ordNub, elemText, safeStrRead, defaultUserDataDir) import Text.Printf #ifdef EMBED_DATA_FILES @@ -939,13 +940,12 @@ options = (NoArg (\_ -> do prg <- getProgName - defaultDatadirs <- defaultUserDataDirs + defaultDatadir <- defaultUserDataDir UTF8.hPutStrLn stdout $ T.pack $ prg ++ " " ++ T.unpack pandocVersion ++ compileInfo ++ - "\nUser data directory: " ++ - intercalate " or " defaultDatadirs ++ + "\nUser data directory: " ++ defaultDatadir ++ ('\n':copyrightMessage) exitSuccess )) "" -- "Print version" @@ -1053,6 +1053,27 @@ setVariable key val (Context ctx) = Context $ M.alter go key ctx go (Just (ListVal xs)) = Just $ ListVal $ xs ++ [toVal val] go (Just x) = Just $ ListVal [x, toVal val] +addMeta :: String -> String -> Meta -> Meta +addMeta k v meta = + case lookupMeta k' meta of + Nothing -> setMeta k' v' meta + Just (MetaList xs) -> + setMeta k' (MetaList (xs ++ [v'])) meta + Just x -> setMeta k' (MetaList [x, v']) meta + where + v' = readMetaValue v + k' = T.pack k + +readMetaValue :: String -> MetaValue +readMetaValue s + | s == "true" = MetaBool True + | s == "True" = MetaBool True + | s == "TRUE" = MetaBool True + | s == "false" = MetaBool False + | s == "False" = MetaBool False + | s == "FALSE" = MetaBool False + | otherwise = MetaString $ T.pack s + -- On Windows with ghc 8.6+, we need to rewrite paths -- beginning with \\ to \\?\UNC\. -- See #5127. normalizePath :: FilePath -> FilePath diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs index 0d96ab67c..b69e4e51e 100644 --- a/src/Text/Pandoc/App/Opt.hs +++ b/src/Text/Pandoc/App/Opt.hs @@ -22,7 +22,6 @@ module Text.Pandoc.App.Opt ( , IpynbOutput (..) , DefaultsState (..) , defaultOpts - , addMeta , applyDefaults , fullDefaultsPath ) where @@ -32,7 +31,6 @@ import System.FilePath ( addExtension, (</>), takeExtension ) import Data.Char (isLower, toLower) import Data.Maybe (fromMaybe) import GHC.Generics hiding (Meta) -import Text.Pandoc.Builder (setMeta) import Text.Pandoc.Filter (Filter (..)) import Text.Pandoc.Logging (Verbosity (WARNING)) import Text.Pandoc.Options (TopLevelDivision (TopLevelDefault), @@ -52,7 +50,7 @@ import Data.Text (Text, unpack) import Data.Default (def) import qualified Data.Text as T import qualified Data.Map as M -import Text.Pandoc.Definition (Meta(..), MetaValue(..), lookupMeta) +import Text.Pandoc.Definition (Meta(..), MetaValue(..)) import Data.Aeson (defaultOptions, Options(..)) import Data.Aeson.TH (deriveJSON) import Control.Applicative ((<|>)) @@ -156,6 +154,9 @@ data Opt = Opt , optNoCheckCertificate :: Bool -- ^ Disable certificate validation , optEol :: LineEnding -- ^ Style of line-endings to use , optStripComments :: Bool -- ^ Skip HTML comments + , optCSL :: Maybe FilePath -- ^ CSL stylesheet + , optBibliography :: [FilePath] -- ^ Bibliography files + , optCitationAbbreviations :: Maybe FilePath -- ^ Citation abbreviations } deriving (Generic, Show) instance FromYAML (Opt -> Opt) where @@ -428,26 +429,18 @@ doOpt (k',v) = do (parseYAML v >>= \x -> return (\o -> o{ optCss = optCss o <> [unpack x] })) "bibliography" -> - do let addItem x o = o{ optMetadata = - addMeta "bibliography" (T.unpack x) - (optMetadata o) } - (parseYAML v >>= \(xs :: [Text]) -> return $ \o -> - foldr addItem o xs) - <|> - (parseYAML v >>= \(x :: Text) -> return $ \o -> addItem x o) + (parseYAML v >>= \x -> return (\o -> + o{ optBibliography = optBibliography o <> + map unpack x })) + <|> + (parseYAML v >>= \x -> return (\o -> + o{ optBibliography = optBibliography o <> + [unpack x] })) "csl" -> - do let addItem x o = o{ optMetadata = - addMeta "csl" (T.unpack x) - (optMetadata o) } - (parseYAML v >>= \(xs :: [Text]) -> return $ \o -> - foldr addItem o xs) - <|> - (parseYAML v >>= \(x :: Text) -> return $ \o -> addItem x o) + parseYAML v >>= \x -> return (\o -> o{ optCSL = unpack <$> x }) "citation-abbreviations" -> - parseYAML v >>= \x -> - return (\o -> o{ optMetadata = - addMeta "citation-abbreviations" (T.unpack x) - (optMetadata o) }) + parseYAML v >>= \x -> return (\o -> o{ optCitationAbbreviations = + unpack <$> x }) "ipynb-output" -> parseYAML v >>= \x -> return (\o -> o{ optIpynbOutput = x }) "include-before-body" -> @@ -562,6 +555,9 @@ defaultOpts = Opt , optNoCheckCertificate = False , optEol = Native , optStripComments = False + , optCSL = Nothing + , optBibliography = [] + , optCitationAbbreviations = Nothing } parseStringKey :: Node Pos -> Parser Text @@ -579,27 +575,6 @@ yamlToMeta (Mapping _ _ m) = >>= fmap (Meta . flip P.runF def) yamlToMeta _ = return mempty -addMeta :: String -> String -> Meta -> Meta -addMeta k v meta = - case lookupMeta k' meta of - Nothing -> setMeta k' v' meta - Just (MetaList xs) -> - setMeta k' (MetaList (xs ++ [v'])) meta - Just x -> setMeta k' (MetaList [x, v']) meta - where - v' = readMetaValue v - k' = T.pack k - -readMetaValue :: String -> MetaValue -readMetaValue s - | s == "true" = MetaBool True - | s == "True" = MetaBool True - | s == "TRUE" = MetaBool True - | s == "false" = MetaBool False - | s == "False" = MetaBool False - | s == "FALSE" = MetaBool False - | otherwise = MetaString $ T.pack s - -- | Apply defaults from --defaults file. applyDefaults :: (PandocMonad m, MonadIO m) => Opt @@ -625,12 +600,9 @@ fullDefaultsPath dataDir file = do let fp = if null (takeExtension file) then addExtension file "yaml" else file - dataDirs <- liftIO defaultUserDataDirs - let fps = fp : case dataDir of - Nothing -> map (</> ("defaults" </> fp)) - dataDirs - Just dd -> [dd </> "defaults" </> fp] - fromMaybe fp <$> findM fileExists fps + defaultDataDir <- liftIO defaultUserDataDir + let defaultFp = fromMaybe defaultDataDir dataDir </> "defaults" </> fp + fromMaybe fp <$> findM fileExists [fp, defaultFp] -- | In a list of lists, append another list in front of every list which -- starts with specific element. -- cgit v1.2.3 From 6dd7520cc4b3816ae13ec486ce0909b9b881d240 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 5 Mar 2021 10:44:28 -0800 Subject: Implement environment variable interpolation in defaults files. This allows the syntax `${HOME}` to be used, in fields that expect file paths only. Any environment variable may be interpolated in this way. A warning will be raised for undefined variables. The special variable `USERDATA` is automatically set to the user data directory in force when the defaults file is parsed. (Note: it may be different from the eventual user data directory, if the defaults file or further command line options change that.) Closes #5982. Closes #5977. Closes #6108 (path not taken). --- MANUAL.txt | 15 ++++++ src/Text/Pandoc/App.hs | 29 +++++++---- src/Text/Pandoc/App/Opt.hs | 117 ++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 147 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/MANUAL.txt b/MANUAL.txt index d97cbcbc9..f06293dd3 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -1672,6 +1672,21 @@ one line: verbosity: INFO ``` +In fields that expect a file path (or list of file paths), the +following syntax may be used to interpolate environment variables: + +``` yaml +csl: ${HOME}/mycsldir/special.csl +``` + +`${USERDATA}` may also be used; this will always resolve to the +user data directory that is current when the defaults file is +parsed, regardless of the setting of the environment +variable `USERDATA`. + +This environment variable interpolation syntax *only* works in +fields that expect file paths. + Default files can be placed in the `defaults` subdirectory of the user data directory and used from any directory. For example, one could create a file specifying defaults for writing diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 40fb34834..6b45e5418 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -47,10 +47,11 @@ import System.FilePath ( takeBaseName, takeExtension ) import System.IO (nativeNewline, stdout) import qualified System.IO as IO (Newline (..)) import Text.Pandoc +import Text.Pandoc.Builder (setMeta) import Text.Pandoc.MIME (getCharset) import Text.Pandoc.App.FormatHeuristics (formatFromFilePaths) import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts, - IpynbOutput (..) ) + IpynbOutput (..)) import Text.Pandoc.App.CommandLineOptions (parseOptions, parseOptionsFromArgs, options) import Text.Pandoc.App.OutputSettings (OutputSettings (..), optToOutputSettings) @@ -60,7 +61,7 @@ import Text.Pandoc.PDF (makePDF) import Text.Pandoc.SelfContained (makeSelfContained) import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs, headerShift, isURI, tabFilter, uriPathToPath, filterIpynbOutput, - defaultUserDataDirs, tshow, findM) + defaultUserDataDir, tshow) import Text.Pandoc.Writers.Shared (lookupMetaString) import Text.Pandoc.Readers.Markdown (yamlToMeta) import qualified Text.Pandoc.UTF8 as UTF8 @@ -71,6 +72,15 @@ import System.Posix.Terminal (queryTerminal) convertWithOpts :: Opt -> IO () convertWithOpts opts = do + datadir <- case optDataDir opts of + Nothing -> do + d <- defaultUserDataDir + exists <- doesDirectoryExist d + return $ if exists + then Just d + else Nothing + Just _ -> return $ optDataDir opts + let outputFile = fromMaybe "-" (optOutputFile opts) let filters = optFilters opts let verbosity = optVerbosity opts @@ -85,12 +95,6 @@ convertWithOpts opts = do Just xs | not (optIgnoreArgs opts) -> xs _ -> ["-"] - datadir <- case optDataDir opts of - Nothing -> do - ds <- defaultUserDataDirs - findM doesDirectoryExist ds - Just _ -> return $ optDataDir opts - let runIO' :: PandocIO a -> IO a runIO' f = do (res, reports) <- runIOorExplode $ do @@ -275,12 +279,21 @@ convertWithOpts opts = do report $ Deprecated "pandoc-citeproc filter" "Use --citeproc instead." + let cslMetadata = + maybe id (setMeta "csl") (optCSL opts) . + (case optBibliography opts of + [] -> id + xs -> setMeta "bibliography" xs) . + maybe id (setMeta "citation-abbreviations") + (optCitationAbbreviations opts) $ mempty + doc <- sourceToDoc sources >>= ( (if isJust (optExtractMedia opts) then fillMediaBag else return) >=> return . adjustMetadata (metadataFromFile <>) >=> return . adjustMetadata (<> optMetadata opts) + >=> return . adjustMetadata (<> cslMetadata) >=> applyTransforms transforms >=> applyFilters readerOpts filters [T.unpack format] >=> maybe return extractMedia (optExtractMedia opts) diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs index b69e4e51e..b56b2c377 100644 --- a/src/Text/Pandoc/App/Opt.hs +++ b/src/Text/Pandoc/App/Opt.hs @@ -32,16 +32,18 @@ import Data.Char (isLower, toLower) import Data.Maybe (fromMaybe) import GHC.Generics hiding (Meta) import Text.Pandoc.Filter (Filter (..)) -import Text.Pandoc.Logging (Verbosity (WARNING)) +import Text.Pandoc.Logging (Verbosity (WARNING), LogMessage(..)) import Text.Pandoc.Options (TopLevelDivision (TopLevelDefault), TrackChanges (AcceptChanges), WrapOption (WrapAuto), HTMLMathMethod (PlainMath), ReferenceLocation (EndOfDocument), ObfuscationMethod (NoObfuscation), CiteMethod (Citeproc)) -import Text.Pandoc.Class (readFileLazy, fileExists, setVerbosity, PandocMonad) +import Text.Pandoc.Class (readFileLazy, fileExists, setVerbosity, report, + PandocMonad(lookupEnv), getUserDataDir) import Text.Pandoc.Error (PandocError (PandocParseError, PandocSomeError)) -import Text.Pandoc.Shared (camelCaseStrToHyphenated, defaultUserDataDirs, findM, ordNub) +import Text.Pandoc.Shared (camelCaseStrToHyphenated, defaultUserDataDir, + findM, ordNub) import qualified Text.Pandoc.Parsing as P import Text.Pandoc.Readers.Metadata (yamlMap) import Text.Pandoc.Class.PandocPure @@ -176,17 +178,120 @@ instance (PandocMonad m, MonadIO m) dataDir <- case M.lookup "data-dir" opts of Nothing -> return Nothing Just v -> Just . unpack <$> parseYAML v - f <- parseOptions $ M.toList m + f <- parseOptions (M.toList m) case M.lookup "defaults" opts of Just v -> do g <- parseDefaults v dataDir - return $ g >=> f - Nothing -> return f + return $ g >=> f >=> resolveVarsInOpt + Nothing -> return $ f >=> resolveVarsInOpt where toText (Scalar _ (SStr s)) = s toText _ = "" parseYAML n = failAtNode n "Expected a mapping" +resolveVarsInOpt :: (PandocMonad m, MonadIO m) => Opt -> m Opt +resolveVarsInOpt + opt@Opt + { optTemplate = oTemplate + , optMetadataFiles = oMetadataFiles + , optOutputFile = oOutputFile + , optInputFiles = oInputFiles + , optSyntaxDefinitions = oSyntaxDefinitions + , optAbbreviations = oAbbreviations + , optReferenceDoc = oReferenceDoc + , optEpubMetadata = oEpubMetadata + , optEpubFonts = oEpubFonts + , optEpubCoverImage = oEpubCoverImage + , optLogFile = oLogFile + , optFilters = oFilters + , optDataDir = oDataDir + , optExtractMedia = oExtractMedia + , optCss = oCss + , optIncludeBeforeBody = oIncludeBeforeBody + , optIncludeAfterBody = oIncludeAfterBody + , optIncludeInHeader = oIncludeInHeader + , optResourcePath = oResourcePath + , optCSL = oCSL + , optBibliography = oBibliography + , optCitationAbbreviations = oCitationAbbreviations + } + = do + oTemplate' <- mapM resolveVars oTemplate + oMetadataFiles' <- mapM resolveVars oMetadataFiles + oOutputFile' <- mapM resolveVars oOutputFile + oInputFiles' <- mapM (mapM resolveVars) oInputFiles + oSyntaxDefinitions' <- mapM resolveVars oSyntaxDefinitions + oAbbreviations' <- mapM resolveVars oAbbreviations + oReferenceDoc' <- mapM resolveVars oReferenceDoc + oEpubMetadata' <- mapM resolveVars oEpubMetadata + oEpubFonts' <- mapM resolveVars oEpubFonts + oEpubCoverImage' <- mapM resolveVars oEpubCoverImage + oLogFile' <- mapM resolveVars oLogFile + oFilters' <- mapM resolveVarsInFilter oFilters + oDataDir' <- mapM resolveVars oDataDir + oExtractMedia' <- mapM resolveVars oExtractMedia + oCss' <- mapM resolveVars oCss + oIncludeBeforeBody' <- mapM resolveVars oIncludeBeforeBody + oIncludeAfterBody' <- mapM resolveVars oIncludeAfterBody + oIncludeInHeader' <- mapM resolveVars oIncludeInHeader + oResourcePath' <- mapM resolveVars oResourcePath + oCSL' <- mapM resolveVars oCSL + oBibliography' <- mapM resolveVars oBibliography + oCitationAbbreviations' <- mapM resolveVars oCitationAbbreviations + return opt{ optTemplate = oTemplate' + , optMetadataFiles = oMetadataFiles' + , optOutputFile = oOutputFile' + , optInputFiles = oInputFiles' + , optSyntaxDefinitions = oSyntaxDefinitions' + , optAbbreviations = oAbbreviations' + , optReferenceDoc = oReferenceDoc' + , optEpubMetadata = oEpubMetadata' + , optEpubFonts = oEpubFonts' + , optEpubCoverImage = oEpubCoverImage' + , optLogFile = oLogFile' + , optFilters = oFilters' + , optDataDir = oDataDir' + , optExtractMedia = oExtractMedia' + , optCss = oCss' + , optIncludeBeforeBody = oIncludeBeforeBody' + , optIncludeAfterBody = oIncludeAfterBody' + , optIncludeInHeader = oIncludeInHeader' + , optResourcePath = oResourcePath' + , optCSL = oCSL' + , optBibliography = oBibliography' + , optCitationAbbreviations = oCitationAbbreviations' + } + + where + resolveVars [] = return [] + resolveVars ('$':'{':xs) = + let (ys, zs) = break (=='}') xs + in if null zs + then return $ '$':'{':xs + else do + val <- lookupEnv' ys + (val ++) <$> resolveVars (drop 1 zs) + resolveVars (c:cs) = (c:) <$> resolveVars cs + lookupEnv' "USERDATA" = do + mbodatadir <- mapM resolveVars oDataDir + mbdatadir <- getUserDataDir + defdatadir <- liftIO defaultUserDataDir + return $ fromMaybe defdatadir (mbodatadir <|> mbdatadir) + lookupEnv' v = do + mbval <- fmap T.unpack <$> lookupEnv (T.pack v) + case mbval of + Nothing -> do + report $ EnvironmentVariableUndefined (T.pack v) + return mempty + Just x -> return x + resolveVarsInFilter (JSONFilter fp) = + JSONFilter <$> resolveVars fp + resolveVarsInFilter (LuaFilter fp) = + LuaFilter <$> resolveVars fp + resolveVarsInFilter CiteprocFilter = return CiteprocFilter + + + parseDefaults :: (PandocMonad m, MonadIO m) => Node Pos -> Maybe FilePath -- cgit v1.2.3 From df00cf05cbf817b7d8e7de0a4a220dd70f8a9608 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 5 Mar 2021 11:56:41 -0800 Subject: Allow `${.}` in defaults files paths... to refer to the directory where the default file is. This will make it possible to create moveable "packages" of resources in a directory. Closes #5871. --- MANUAL.txt | 13 +++++++++++++ src/Text/Pandoc/App/Opt.hs | 13 +++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/MANUAL.txt b/MANUAL.txt index f06293dd3..8e98ee1eb 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -1684,6 +1684,19 @@ user data directory that is current when the defaults file is parsed, regardless of the setting of the environment variable `USERDATA`. +`${.}` will resolve to the directory containing the default +file itself. This allows you to refer to resources contained +in that directory: + +``` yaml +epub-cover-image: ${.}/cover.jpg +epub-metadata: ${.}/meta.xml +resource-path: +- . # the working directory from which pandoc is run +- ${.}/images # the images subdirectory of the directory + # containing this defaults file +``` + This environment variable interpolation syntax *only* works in fields that expect file paths. diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs index b56b2c377..d09a6afc0 100644 --- a/src/Text/Pandoc/App/Opt.hs +++ b/src/Text/Pandoc/App/Opt.hs @@ -27,7 +27,8 @@ module Text.Pandoc.App.Opt ( ) where import Control.Monad.Except (MonadIO, liftIO, throwError, (>=>), foldM) import Control.Monad.State.Strict (StateT, modify, gets) -import System.FilePath ( addExtension, (</>), takeExtension ) +import System.FilePath ( addExtension, (</>), takeExtension, takeDirectory ) +import System.Directory ( canonicalizePath ) import Data.Char (isLower, toLower) import Data.Maybe (fromMaybe) import GHC.Generics hiding (Meta) @@ -189,7 +190,8 @@ instance (PandocMonad m, MonadIO m) toText _ = "" parseYAML n = failAtNode n "Expected a mapping" -resolveVarsInOpt :: (PandocMonad m, MonadIO m) => Opt -> m Opt +resolveVarsInOpt :: forall m. (PandocMonad m, MonadIO m) + => Opt -> StateT DefaultsState m Opt resolveVarsInOpt opt@Opt { optTemplate = oTemplate @@ -263,6 +265,7 @@ resolveVarsInOpt } where + resolveVars :: FilePath -> StateT DefaultsState m FilePath resolveVars [] = return [] resolveVars ('$':'{':xs) = let (ys, zs) = break (=='}') xs @@ -272,6 +275,12 @@ resolveVarsInOpt val <- lookupEnv' ys (val ++) <$> resolveVars (drop 1 zs) resolveVars (c:cs) = (c:) <$> resolveVars cs + lookupEnv' :: String -> StateT DefaultsState m String + lookupEnv' "." = do + mbCurDefaults <- gets curDefaults + maybe (return "") + (fmap takeDirectory . liftIO . canonicalizePath) + mbCurDefaults lookupEnv' "USERDATA" = do mbodatadir <- mapM resolveVars oDataDir mbdatadir <- getUserDataDir -- cgit v1.2.3 From 735a69de6b94e32ee9aceb44a893de4067f24354 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 6 Mar 2021 10:32:51 -0800 Subject: Allow `--resource-path` to accumulate. Previously, if `--resource-path` were used multiple times, the last resource path would replace the others. With this change, each time `--resource-path` is used, it prepends the specified path components to the existing resource path. Similarly, when `resource-path` is specified in a defaults file, the paths provided will be prepended to the existing resource path. This change also allows one to avoid using the OS-specific path separator; instead, one can simply use `--resource-path` a number of times with single paths. This form of command will not have an OS-dependent behavior. This change facilitates the use of multiple, small defaults files: each can specify a directory containing its own resources without clobbering the resource paths set by the others. Closes #6152. --- MANUAL.txt | 5 +++++ src/Text/Pandoc/App/CommandLineOptions.hs | 3 ++- src/Text/Pandoc/App/Opt.hs | 3 ++- 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/MANUAL.txt b/MANUAL.txt index 8e98ee1eb..a5036b361 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -891,6 +891,11 @@ header when requesting a document from a URL: will not be searched. For example: `--resource-path=.:test` will search the working directory and the `test` subdirectory, in that order. + This option can be used repeatedly. Search path components + that come later on the command line will be searched before + those that come earlier, so + `--resource-path foo:bar --resource-path baz:bim` is + equivalent to `--resource-path baz:bim:foo:bar`. `--request-header=`*NAME*`:`*VAL* diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs index ac92db0ae..b4483f756 100644 --- a/src/Text/Pandoc/App/CommandLineOptions.hs +++ b/src/Text/Pandoc/App/CommandLineOptions.hs @@ -288,7 +288,8 @@ options = , Option "" ["resource-path"] (ReqArg (\arg opt -> return opt { optResourcePath = - splitSearchPath arg }) + splitSearchPath arg ++ + optResourcePath opt }) "SEARCHPATH") "" -- "Paths to search for images and other resources" diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs index d09a6afc0..e5aaec9c5 100644 --- a/src/Text/Pandoc/App/Opt.hs +++ b/src/Text/Pandoc/App/Opt.hs @@ -583,7 +583,8 @@ doOpt (k',v) = do optIncludeInHeader o <> [unpack x] })) "resource-path" -> parseYAML v >>= \x -> - return (\o -> o{ optResourcePath = map unpack x }) + return (\o -> o{ optResourcePath = map unpack x <> + optResourcePath o }) "request-headers" -> parseYAML v >>= \x -> return (\o -> o{ optRequestHeaders = x }) -- cgit v1.2.3 From c652dcc16b208954ab30a27b1c72a889b20ef2e9 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 7 Mar 2021 13:21:45 -0800 Subject: LaTeX reader: support hyperref command. Closes #7127. --- src/Text/Pandoc/Readers/LaTeX.hs | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 552411db8..ceac261d2 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -398,17 +398,20 @@ inlineCommands = M.unions , ("footnote", skipopts >> note <$> grouped block) , ("passthrough", tok) -- \passthrough macro used by latex writer -- for listings + , ("includegraphics", do options <- option [] keyvals + src <- braced + mkImage options . + unescapeURL . + removeDoubleQuotes $ untokenize src) + -- hyperref , ("url", (\url -> link url "" (str url)) . unescapeURL . untokenize <$> bracedUrl) , ("nolinkurl", code . unescapeURL . untokenize <$> bracedUrl) , ("href", do url <- bracedUrl sp link (unescapeURL $ untokenize url) "" <$> tok) - , ("includegraphics", do options <- option [] keyvals - src <- braced - mkImage options . unescapeURL . removeDoubleQuotes $ - untokenize src) , ("hyperlink", hyperlink) + , ("hyperref", hyperref) , ("hypertarget", hypertargetInline) -- hyphenat , ("nohyphens", tok) @@ -463,6 +466,12 @@ hyperlink = try $ do lab <- tok return $ link ("#" <> src) "" lab +hyperref :: PandocMonad m => LP m Inlines +hyperref = try $ do + url <- (("#" <>) . untokenize <$> try (sp *> bracketedToks <* sp)) + <|> untokenize <$> (bracedUrl <* bracedUrl <* bracedUrl) + link url "" <$> tok + hypertargetBlock :: PandocMonad m => LP m Blocks hypertargetBlock = try $ do ref <- untokenize <$> braced -- cgit v1.2.3 From 5aa73bd0a2820a0c89b5990dbe53abfdd5ade32d Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 7 Mar 2021 15:49:02 -0800 Subject: LaTeX reader: handle table cells containing `&` in `\verb`. Closes #7129. --- src/Text/Pandoc/Readers/LaTeX/Table.hs | 7 ++++++- test/command/7129.md | 27 +++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 test/command/7129.md (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX/Table.hs b/src/Text/Pandoc/Readers/LaTeX/Table.hs index 2ea9caf58..7833da081 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Table.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Table.hs @@ -134,6 +134,11 @@ parseTableRow :: PandocMonad m -> LP m Row parseTableRow blocks inline envname prefsufs = do notFollowedBy (spaces *> end_ envname) + -- contexts that can contain & that is not colsep: + let canContainAmp (Tok _ (CtrlSeq "begin") _) = True + canContainAmp (Tok _ (CtrlSeq "verb") _) = True + canContainAmp (Tok _ (CtrlSeq "Verb") _) = True + canContainAmp _ = False -- add prefixes and suffixes in token stream: let celltoks (pref, suff) = do prefpos <- getPosition @@ -142,7 +147,7 @@ parseTableRow blocks inline envname prefsufs = do ((lookAhead (controlSeq "parbox") >> void blocks) -- #5711 <|> - (lookAhead (controlSeq "begin") >> void inline) + (lookAhead (satisfyTok canContainAmp) >> void inline) <|> (lookAhead (symbol '$') >> void inline)) <|> diff --git a/test/command/7129.md b/test/command/7129.md new file mode 100644 index 000000000..fef4ca2c3 --- /dev/null +++ b/test/command/7129.md @@ -0,0 +1,27 @@ +``` +% pandoc -f latex -t native +\begin{tabular}{ll} \hline + FOO & BAR \\ \hline + foo & \verb|b&r| \\ \hline +\end{tabular} +^D +[Table ("",[],[]) (Caption Nothing + []) + [(AlignLeft,ColWidthDefault) + ,(AlignLeft,ColWidthDefault)] + (TableHead ("",[],[]) + [Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "FOO"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "BAR"]]]]) + [(TableBody ("",[],[]) (RowHeadColumns 0) + [] + [Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "foo"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Code ("",[],[]) "b&r"]]]])] + (TableFoot ("",[],[]) + [])] +``` -- cgit v1.2.3 From eb184d9148e8a3e8c896a71550b1f0bee8da9a21 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Mon, 8 Mar 2021 12:40:25 +0100 Subject: Jira writer: use noformat instead of code for unknown languages. Code blocks that are not marked as a language supported by Jira are rendered as preformatted text with `{noformat}` blocks. Fixes: tarleb/jira-wiki-markup#4 --- src/Text/Pandoc/Writers/Jira.hs | 10 ++++---- test/Tests/Writers/Jira.hs | 10 ++++++++ test/writer.jira | 55 +++++++++++++++++------------------------ 3 files changed, 37 insertions(+), 38 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Jira.hs b/src/Text/Pandoc/Writers/Jira.hs index 131896201..a714dac2e 100644 --- a/src/Text/Pandoc/Writers/Jira.hs +++ b/src/Text/Pandoc/Writers/Jira.hs @@ -126,13 +126,13 @@ toJiraCode :: PandocMonad m -> Text -> JiraConverter m [Jira.Block] toJiraCode (ident, classes, _attribs) code = do - let lang = case find (\c -> T.toLower c `elem` knownLanguages) classes of - Nothing -> Jira.Language "java" - Just l -> Jira.Language l let addAnchor b = if T.null ident then b else [Jira.Para (singleton (Jira.Anchor ident))] <> b - return . addAnchor . singleton $ Jira.Code lang mempty code + return . addAnchor . singleton $ + case find (\c -> T.toLower c `elem` knownLanguages) classes of + Nothing -> Jira.NoFormat mempty code + Just l -> Jira.Code (Jira.Language l) mempty code -- | Creates a Jira definition list toJiraDefinitionList :: PandocMonad m @@ -310,7 +310,7 @@ registerNotes contents = do knownLanguages :: [Text] knownLanguages = [ "actionscript", "ada", "applescript", "bash", "c", "c#", "c++" - , "css", "erlang", "go", "groovy", "haskell", "html", "javascript" + , "css", "erlang", "go", "groovy", "haskell", "html", "java", "javascript" , "json", "lua", "nyan", "objc", "perl", "php", "python", "r", "ruby" , "scala", "sql", "swift", "visualbasic", "xml", "yaml" ] diff --git a/test/Tests/Writers/Jira.hs b/test/Tests/Writers/Jira.hs index aff8348d4..b618c3970 100644 --- a/test/Tests/Writers/Jira.hs +++ b/test/Tests/Writers/Jira.hs @@ -68,5 +68,15 @@ tests = spanWith ("unicorn", [], []) (str "Unicorn") =?> "{anchor:unicorn}Unicorn" ] + + , testGroup "code" + [ "code block with known language" =: + codeBlockWith ("", ["java"], []) "Book book = new Book(\"Algebra\")" =?> + "{code:java}\nBook book = new Book(\"Algebra\")\n{code}" + + , "code block without language" =: + codeBlockWith ("", [], []) "preformatted\n text.\n" =?> + "{noformat}\npreformatted\n text.\n{noformat}" + ] ] ] diff --git a/test/writer.jira b/test/writer.jira index aff0dc320..71b720d65 100644 --- a/test/writer.jira +++ b/test/writer.jira @@ -33,11 +33,10 @@ bq. This is a block quote. It is pretty short. {quote} Code in a block quote: -{code:java} +{noformat} sub status { print "working"; -} -{code} +}{noformat} A list: # item one @@ -56,22 +55,20 @@ And a following paragraph. h1. {anchor:code-blocks}Code Blocks Code: -{code:java} +{noformat} ---- (should be four hyphens) sub status { print "working"; } -this code block is indented by one tab -{code} +this code block is indented by one tab{noformat} And: -{code:java} +{noformat} this code block is indented by two tabs -These should not be escaped: \$ \\ \> \[ \{ -{code} +These should not be escaped: \$ \\ \> \[ \{{noformat} ---- h1. {anchor:lists}Lists h2. {anchor:unordered}Unordered @@ -236,9 +233,8 @@ red fruit contains seeds, crisp, pleasant to taste * *_orange_* orange fruit -{code:java} -{ orange code block } -{code} +{noformat} +{ orange code block }{noformat} bq. orange block quote Multiple definitions, tight: @@ -292,16 +288,14 @@ foo This should be a code block, though: -{code:java} +{noformat} <div> foo -</div> -{code} +</div>{noformat} As should this: -{code:java} -<div>foo</div> -{code} +{noformat} +<div>foo</div>{noformat} Now, nested: foo @@ -312,16 +306,14 @@ Multiline: Code block: -{code:java} -<!-- Comment --> -{code} +{noformat} +<!-- Comment -->{noformat} Just plain comment, with trailing spaces on the line: Code: -{code:java} -<hr /> -{code} +{noformat} +<hr />{noformat} Hr’s: ---- @@ -478,9 +470,8 @@ Indented [thrice|/url]. This should \[not\]\[\] be a link. -{code:java} -[not]: /url -{code} +{noformat} +[not]: /url{noformat} Foo [bar|/url/]. Foo [biz|/url/]. @@ -506,9 +497,8 @@ An e-mail address: [mailto:nobody@nowhere.net] bq. Blockquoted: [http://example.com/] Auto-links should not occur here: {{<http://example.com/>}} -{code:java} -or here: <http://example.com/> -{code} +{noformat} +or here: <http://example.com/>{noformat} ---- h1. {anchor:images}Images From "Voyage dans la Lune" by Georges Melies \(1902): @@ -534,9 +524,8 @@ This paragraph should not be part of the note, as it is not indented. Subsequent blocks are indented to show that they belong to the footnote \(as with list items). -{code:java} - { <code> } -{code} +{noformat} + { <code> }{noformat} If you want, you can indent every line, but you can also be lazy and just indent the first line of each block. -- cgit v1.2.3 From b9b2586ed3e9aac9c5ba86127fbf984fb3149844 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Tue, 9 Mar 2021 18:01:08 +0100 Subject: Org writer: prevent unintended creation of ordered list items Adjust line wrapping if default wrapping would cause a line to be read as an ordered list item. Fixes #7132 --- src/Text/Pandoc/Writers/Org.hs | 11 +++++++---- test/command/7132.md | 10 ++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) create mode 100644 test/command/7132.md (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 8dfc2749c..1b525831e 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -17,7 +17,7 @@ Org-Mode: <http://orgmode.org> -} module Text.Pandoc.Writers.Org (writeOrg) where import Control.Monad.State.Strict -import Data.Char (isAlphaNum) +import Data.Char (isAlphaNum, isDigit) import Data.List (intersect, intersperse, partition, transpose) import Data.Text (Text) import qualified Data.Text as T @@ -347,16 +347,19 @@ inlineListToOrg :: PandocMonad m => [Inline] -> Org m (Doc Text) inlineListToOrg lst = hcat <$> mapM inlineToOrg (fixMarkers lst) - where fixMarkers [] = [] -- prevent note refs and list markers from wrapping, see #4171 + where -- Prevent note refs and list markers from wrapping, see #4171 + -- and #7132. + fixMarkers [] = [] fixMarkers (Space : x : rest) | shouldFix x = Str " " : x : fixMarkers rest fixMarkers (SoftBreak : x : rest) | shouldFix x = Str " " : x : fixMarkers rest fixMarkers (x : rest) = x : fixMarkers rest - shouldFix Note{} = True -- Prevent footnotes + shouldFix Note{} = True -- Prevent footnotes shouldFix (Str "-") = True -- Prevent bullet list items - -- TODO: prevent ordered list items + shouldFix (Str x) -- Prevent ordered list items + | Just (cs, c) <- T.unsnoc x = T.all isDigit cs && c == '.' || c == ')' shouldFix _ = False -- | Convert Pandoc inline element to Org. diff --git a/test/command/7132.md b/test/command/7132.md new file mode 100644 index 000000000..30d4c0b3b --- /dev/null +++ b/test/command/7132.md @@ -0,0 +1,10 @@ +``` +% pandoc -f markdown -t org --columns=72 +- This line has exactly the wrong number of characters before the number 5. +- Long line ending with a number (this time it is in parentheses and a 23) +^D +- This line has exactly the wrong number of characters before the + number 5. +- Long line ending with a number (this time it is in parentheses and + a 23) +``` -- cgit v1.2.3 From d7f8fbf04b3d58b6b10d1a8db32c3c7e50b614c1 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Tue, 9 Mar 2021 21:15:16 +0100 Subject: Org writer: fix operator precedence mistake in previous commit --- src/Text/Pandoc/Writers/Org.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 1b525831e..29d58a161 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -359,7 +359,8 @@ inlineListToOrg lst = hcat <$> mapM inlineToOrg (fixMarkers lst) shouldFix Note{} = True -- Prevent footnotes shouldFix (Str "-") = True -- Prevent bullet list items shouldFix (Str x) -- Prevent ordered list items - | Just (cs, c) <- T.unsnoc x = T.all isDigit cs && c == '.' || c == ')' + | Just (cs, c) <- T.unsnoc x = T.all isDigit cs && + (c == '.' || c == ')') shouldFix _ = False -- | Convert Pandoc inline element to Org. -- cgit v1.2.3 From 1c23e3a824bc4d850a908193443ad1915a3ebc61 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 9 Mar 2021 13:03:27 -0800 Subject: RST reader: fix logic for ending comments. Previously comments sometimes got extended too far. Closes #7134. --- src/Text/Pandoc/Readers/RST.hs | 3 ++- test/command/7134.md | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 test/command/7134.md (limited to 'src') diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 29f81b046..514e3b88d 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -613,8 +613,9 @@ comment = try $ do string ".." skipMany1 spaceChar <|> (() <$ lookAhead newline) -- notFollowedBy' directiveLabel -- comment comes after directive so unnec. - manyTill anyChar blanklines + _ <- anyLine optional indentedBlock + optional blanklines return mempty directiveLabel :: Monad m => RSTParser m Text diff --git a/test/command/7134.md b/test/command/7134.md new file mode 100644 index 000000000..c69ae4bd0 --- /dev/null +++ b/test/command/7134.md @@ -0,0 +1,16 @@ +``` +% pandoc -f rst -t native +This is a paragraph. + + This is a block quote. + +.. + + This should be a second block quote. +^D +[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "paragraph."] +,BlockQuote + [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "block",Space,Str "quote."]] +,BlockQuote + [Para [Str "This",Space,Str "should",Space,Str "be",Space,Str "a",Space,Str "second",Space,Str "block",Space,Str "quote."]]] +``` -- cgit v1.2.3 From 5608dc01e5342d367fba3377042dec3944f4d86f Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 10 Mar 2021 10:19:40 -0800 Subject: HTML writer: Add warnings on duplicate attribute values. This prevents emitting invalid HTML. Ultimately it would be good to prevent this in the types themselves, but this is better for now. T.P.Logging: Add DuplicateAttribute constructor to LogMessage. [API change] --- src/Text/Pandoc/Logging.hs | 8 +++++++ src/Text/Pandoc/Writers/HTML.hs | 41 ++++++++++++++++++++---------------- test/command/duplicate_attributes.md | 7 ++++++ 3 files changed, 38 insertions(+), 18 deletions(-) create mode 100644 test/command/duplicate_attributes.md (limited to 'src') diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs index 1d7bc9d66..efd2188f1 100644 --- a/src/Text/Pandoc/Logging.hs +++ b/src/Text/Pandoc/Logging.hs @@ -36,6 +36,7 @@ import Data.Typeable (Typeable) import GHC.Generics (Generic) import Text.Pandoc.Definition import Text.Parsec.Pos +import Text.Pandoc.Shared (tshow) -- | Verbosity level. data Verbosity = ERROR | WARNING | INFO @@ -101,6 +102,7 @@ data LogMessage = | CiteprocWarning Text | ATXHeadingInLHS Int Text | EnvironmentVariableUndefined Text + | DuplicateAttribute Text Text deriving (Show, Eq, Data, Ord, Typeable, Generic) instance ToJSON LogMessage where @@ -232,6 +234,9 @@ instance ToJSON LogMessage where ,"contents" .= contents] EnvironmentVariableUndefined var -> ["variable" .= var ] + DuplicateAttribute attr val -> + ["attribute" .= attr + ,"value" .= val] showPos :: SourcePos -> Text showPos pos = Text.pack $ sn ++ "line " ++ @@ -350,6 +355,8 @@ showLogMessage msg = else "" EnvironmentVariableUndefined var -> "Undefined environment variable " <> var <> " in defaults file." + DuplicateAttribute attr val -> + "Ignoring duplicate attribute " <> attr <> "=" <> tshow val <> "." messageVerbosity :: LogMessage -> Verbosity messageVerbosity msg = @@ -397,3 +404,4 @@ messageVerbosity msg = CiteprocWarning{} -> WARNING ATXHeadingInLHS{} -> WARNING EnvironmentVariableUndefined{}-> WARNING + DuplicateAttribute{} -> WARNING diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 26df0325e..2f33cd467 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -32,7 +32,7 @@ import Control.Monad.State.Strict import Data.Char (ord) import Data.List (intercalate, intersperse, partition, delete, (\\)) import Data.List.NonEmpty (NonEmpty((:|))) -import Data.Maybe (fromMaybe, isJust, isNothing, mapMaybe) +import Data.Maybe (fromMaybe, isJust, isNothing) import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T @@ -551,23 +551,28 @@ toAttrs :: PandocMonad m toAttrs kvs = do html5 <- gets stHtml5 mbEpubVersion <- gets stEPUBVersion - return $ mapMaybe (\(x,y) -> - if html5 - then - if x `Set.member` (html5Attributes <> rdfaAttributes) - || T.any (== ':') x -- e.g. epub: namespace - || "data-" `T.isPrefixOf` x - || "aria-" `T.isPrefixOf` x - then Just $ customAttribute (textTag x) (toValue y) - else Just $ customAttribute (textTag ("data-" <> x)) - (toValue y) - else - if mbEpubVersion == Just EPUB2 && - not (x `Set.member` (html4Attributes <> rdfaAttributes) || - "xml:" `T.isPrefixOf` x) - then Nothing - else Just $ customAttribute (textTag x) (toValue y)) - kvs + reverse . snd <$> foldM (go html5 mbEpubVersion) (Set.empty, []) kvs + where + go html5 mbEpubVersion (keys, attrs) (k,v) = do + if k `Set.member` keys + then do + report $ DuplicateAttribute k v + return (keys, attrs) + else return (Set.insert k keys, addAttr html5 mbEpubVersion k v attrs) + addAttr html5 mbEpubVersion x y + | html5 + = if x `Set.member` (html5Attributes <> rdfaAttributes) + || T.any (== ':') x -- e.g. epub: namespace + || "data-" `T.isPrefixOf` x + || "aria-" `T.isPrefixOf` x + then (customAttribute (textTag x) (toValue y) :) + else (customAttribute (textTag ("data-" <> x)) (toValue y) :) + | mbEpubVersion == Just EPUB2 + , not (x `Set.member` (html4Attributes <> rdfaAttributes) || + "xml:" `T.isPrefixOf` x) + = id + | otherwise + = (customAttribute (textTag x) (toValue y) :) attrsToHtml :: PandocMonad m => WriterOptions -> Attr -> StateT WriterState m [Attribute] diff --git a/test/command/duplicate_attributes.md b/test/command/duplicate_attributes.md new file mode 100644 index 000000000..b6e8a4c21 --- /dev/null +++ b/test/command/duplicate_attributes.md @@ -0,0 +1,7 @@ +``` +% pandoc +[span]{.foobar style="color:blue" class="zip" style="color:red"} +^D +[WARNING] Ignoring duplicate attribute style="color:red". +<p><span class="foobar zip" style="color:blue">span</span></p> +``` -- cgit v1.2.3 From 92ffd374754e28939a855fe84fb5455cb91383fa Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 11 Mar 2021 15:58:37 -0800 Subject: Simplify compactDL. --- src/Text/Pandoc/Shared.hs | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 2aba9b2e1..d11ad13f5 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -464,22 +464,20 @@ plainToPara :: Block -> Block plainToPara (Plain ils) = Para ils plainToPara x = x + -- | Like @compactify@, but acts on items of definition lists. compactifyDL :: [(Inlines, [Blocks])] -> [(Inlines, [Blocks])] compactifyDL items = - let defs = concatMap snd items - in case reverse (concatMap B.toList defs) of - (Para x:xs) - | not (any isPara xs) -> - let (t,ds) = last items - lastDef = B.toList $ last ds - ds' = init ds ++ - if null lastDef - then [B.fromList lastDef] - else [B.fromList $ init lastDef ++ [Plain x]] - in init items ++ [(t, ds')] - | otherwise -> items - _ -> items + case reverse items of + ((t,ds):ys) -> + case reverse (map (reverse . B.toList) ds) of + ((Para x:xs) : zs) | not (any isPara xs) -> + reverse ys ++ + [(t, reverse (map B.fromList zs) ++ + [B.fromList (reverse (Plain x:xs))])] + _ -> items + _ -> items + -- | Combine a list of lines by adding hard linebreaks. combineLines :: [[Inline]] -> [Inline] -- cgit v1.2.3 From 894ed8ebb01ce11cbcc6afc13d00d6ac1dd05ed0 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 12 Mar 2021 11:57:31 -0800 Subject: Citeproc: apply fixLinks correctly. This is code that incorporates a prefix like `https://doi.org/` into a following link when appropriate. But it didn't work because we were walking with a `[Inline] -> [Inline]` function on an `Inlines`. Changed the point of application of `fixLink` to resolve the issue. Closes #7130. --- src/Text/Pandoc/Citeproc.hs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index 38e992ba1..af302f782 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -49,7 +49,6 @@ import Data.Text (Text) import qualified Data.Text as T import System.FilePath (takeExtension) import Safe (lastMay, initSafe) --- import Debug.Trace as Trace (trace, traceShowId) processCitations :: PandocMonad m => Pandoc -> m Pandoc @@ -91,11 +90,12 @@ processCitations (Pandoc meta bs) = do _ -> id) $ [] let bibs = mconcat $ map (\(ident, out) -> B.divWith ("ref-" <> ident,["csl-entry"],[]) . B.para . - walk (convertQuotes locale) . insertSpace $ out) + walk (convertQuotes locale) . + insertSpace $ out) (resultBibliography result) let moveNotes = maybe True truish $ lookupMeta "notes-after-punctuation" meta - let cits = map (walk fixLinks . walk (convertQuotes locale)) $ + let cits = map (walk (convertQuotes locale)) $ resultCitations result let fixQuotes = case localePunctuationInQuote locale of @@ -294,7 +294,7 @@ insertResolvedCitations (Cite cs ils) = do [] -> return (Cite cs ils) (x:xs) -> do put xs - return $ Cite cs (B.toList x) + return $ Cite cs (walk fixLinks $ B.toList x) insertResolvedCitations x = return x getCitations :: Locale @@ -431,7 +431,7 @@ mvPunct _ _ [] = [] -- move https://doi.org etc. prefix inside link text (#6723): fixLinks :: [Inline] -> [Inline] fixLinks (Str t : Link attr [Str u1] (u2,tit) : xs) - | t <> u1 == u2 + | u2 == t <> u1 = Link attr [Str (t <> u1)] (u2,tit) : fixLinks xs fixLinks (x:xs) = x : fixLinks xs fixLinks [] = [] -- cgit v1.2.3 From a8aa301428752d96cdd58d7f4ecaa7d054f3505d Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Sat, 13 Mar 2021 12:10:02 +0100 Subject: Jira writer: improve div/panel handling Include div attributes in panels, always render divs with class `panel` as panels, and avoid nesting of panels. --- src/Text/Pandoc/Writers/Jira.hs | 39 ++++++++++++++++++++++++++++----------- test/Tests/Writers/Jira.hs | 30 ++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Jira.hs b/src/Text/Pandoc/Writers/Jira.hs index a714dac2e..aa78d9419 100644 --- a/src/Text/Pandoc/Writers/Jira.hs +++ b/src/Text/Pandoc/Writers/Jira.hs @@ -39,11 +39,17 @@ writeJira :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeJira opts = runDefaultConverter (writerWrapText opts) (pandocToJira opts) -- | State to keep track of footnotes. -newtype ConverterState = ConverterState { stNotes :: [Text] } +data ConverterState = ConverterState + { stNotes :: [Text] -- ^ Footnotes to be appended to the end of the text + , stInPanel :: Bool -- ^ whether we are in a @{panel}@ block + } -- | Initial converter state. startState :: ConverterState -startState = ConverterState { stNotes = [] } +startState = ConverterState + { stNotes = [] + , stInPanel = False + } -- | Converter monad type JiraConverter m = ReaderT WrapOption (StateT ConverterState m) @@ -126,14 +132,20 @@ toJiraCode :: PandocMonad m -> Text -> JiraConverter m [Jira.Block] toJiraCode (ident, classes, _attribs) code = do - let addAnchor b = if T.null ident - then b - else [Jira.Para (singleton (Jira.Anchor ident))] <> b - return . addAnchor . singleton $ + return . addAnchor ident . singleton $ case find (\c -> T.toLower c `elem` knownLanguages) classes of Nothing -> Jira.NoFormat mempty code Just l -> Jira.Code (Jira.Language l) mempty code +-- | Prepends an anchor with the given identifier. +addAnchor :: Text -> [Jira.Block] -> [Jira.Block] +addAnchor ident = + if T.null ident + then id + else \case + Jira.Para xs : bs -> (Jira.Para (Jira.Anchor ident : xs) : bs) + bs -> (Jira.Para (singleton (Jira.Anchor ident)) : bs) + -- | Creates a Jira definition list toJiraDefinitionList :: PandocMonad m => [([Inline], [[Block]])] @@ -149,11 +161,16 @@ toJiraDefinitionList defItems = do toJiraPanel :: PandocMonad m => Attr -> [Block] -> JiraConverter m [Jira.Block] -toJiraPanel attr blocks = do - jiraBlocks <- toJiraBlocks blocks - return $ if attr == nullAttr - then jiraBlocks - else singleton (Jira.Panel [] jiraBlocks) +toJiraPanel (ident, classes, attribs) blocks = do + inPanel <- gets stInPanel + if inPanel || ("panel" `notElem` classes && null attribs) + then addAnchor ident <$> toJiraBlocks blocks + else do + modify $ \st -> st{ stInPanel = True } + jiraBlocks <- toJiraBlocks blocks + modify $ \st -> st{ stInPanel = inPanel } + let params = map (uncurry Jira.Parameter) attribs + return $ singleton (Jira.Panel params $ addAnchor ident jiraBlocks) -- | Creates a Jira header toJiraHeader :: PandocMonad m diff --git a/test/Tests/Writers/Jira.hs b/test/Tests/Writers/Jira.hs index b618c3970..0c6f48853 100644 --- a/test/Tests/Writers/Jira.hs +++ b/test/Tests/Writers/Jira.hs @@ -79,4 +79,34 @@ tests = "{noformat}\npreformatted\n text.\n{noformat}" ] ] + + , testGroup "blocks" + [ testGroup "div" + [ "empty attributes" =: + divWith nullAttr (para "interesting text") =?> + "interesting text" + + , "just identifier" =: + divWith ("a", [], []) (para "interesting text") =?> + "{anchor:a}interesting text" + + , "with class 'panel'" =: + divWith ("", ["panel"], []) (para "Contents!") =?> + "{panel}\nContents\\!\n{panel}\n" + + , "panel with id" =: + divWith ("b", ["panel"], []) (para "text") =?> + "{panel}\n{anchor:b}text\n{panel}\n" + + , "title attribute" =: + divWith ("", [], [("title", "Gimme!")]) (para "Contents!") =?> + "{panel:title=Gimme!}\nContents\\!\n{panel}\n" + + , "nested panels" =: + let panelAttr = ("", ["panel"], []) + in divWith panelAttr (para "hi" <> + divWith panelAttr (para "wassup?")) =?> + "{panel}\nhi\n\nwassup?\n{panel}\n" + ] + ] ] -- cgit v1.2.3 From 00e8d0678edac34af7a412642fbf7d85442b15aa Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Sat, 13 Mar 2021 14:28:30 +0100 Subject: Jira reader: mark divs created from panels with class "panel". Closes: tarleb/jira-wiki-markup#2 --- src/Text/Pandoc/Readers/Jira.hs | 4 ++-- test/Tests/Readers/Jira.hs | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Jira.hs b/src/Text/Pandoc/Readers/Jira.hs index 5a5d0ee1e..89aecbf56 100644 --- a/src/Text/Pandoc/Readers/Jira.hs +++ b/src/Text/Pandoc/Readers/Jira.hs @@ -71,10 +71,10 @@ toPandocCodeBlocks langMay params txt = Nothing -> [] in codeBlockWith ("", classes, map paramToPair params) txt --- | Create a pandoc @'Div'@ +-- | Create a pandoc @'Div'@ from a panel. toPandocDiv :: [Jira.Parameter] -> [Jira.Block] -> Blocks toPandocDiv params = - divWith ("", [], map paramToPair params) . foldMap jiraToPandocBlocks + divWith ("", ["panel"], map paramToPair params) . foldMap jiraToPandocBlocks paramToPair :: Jira.Parameter -> (Text, Text) paramToPair (Jira.Parameter key value) = (key, value) diff --git a/test/Tests/Readers/Jira.hs b/test/Tests/Readers/Jira.hs index 189b504f7..cb7dde4ea 100644 --- a/test/Tests/Readers/Jira.hs +++ b/test/Tests/Readers/Jira.hs @@ -96,6 +96,12 @@ tests = simpleTable [para "Name"] [[para "Test"]] ] + , testGroup "panel" + [ "simple panel" =: + "{panel}\nInterviewer: Jane Doe{panel}\n" =?> + divWith ("", ["panel"], []) (para "Interviewer: Jane Doe") + ] + , testGroup "inlines" [ "emphasis" =: "*quid pro quo*" =?> -- cgit v1.2.3 From eed18d231cc706e27a1495d46e8c05dd18a0938f Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 13 Mar 2021 12:05:52 -0800 Subject: Use integral values for w:tblW in docx. Cloess #7141. --- src/Text/Pandoc/Writers/Docx.hs | 2 +- test/docx/golden/table_one_row.docx | Bin 9908 -> 9906 bytes test/docx/golden/table_with_list_cell.docx | Bin 10227 -> 10225 bytes test/docx/golden/tables.docx | Bin 10244 -> 10241 bytes 4 files changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 89c71d773..c47bfb2ea 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -1031,7 +1031,7 @@ blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do map mkcell cells let textwidth = 7920 -- 5.5 in in twips, 1/20 pt let fullrow = 5000 -- 100% specified in pct - let rowwidth = fullrow * sum widths + let (rowwidth :: Int) = round $ fullrow * sum widths let mkgridcol w = mknode "w:gridCol" [("w:w", tshow (floor (textwidth * w) :: Integer))] () let hasHeader = not $ all null headers diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx index a1d2323c2..1178f7c6e 100644 Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx index 2f3a831a7..84be2720f 100644 Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx index af066107c..140366d8b 100644 Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ -- cgit v1.2.3 From 35b66a76718205c303f416bf0afc01c098e8a171 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 13 Mar 2021 12:50:44 -0800 Subject: MediaWiki reader: Allow block-level content in notes (ref). Closes #7145. --- src/Text/Pandoc/Readers/MediaWiki.hs | 10 +++++++++- test/command/7145.md | 12 ++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 test/command/7145.md (limited to 'src') diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index cdb746c67..9f4d5e170 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -112,12 +112,14 @@ newBlockTags = ["haskell","syntaxhighlight","source","gallery","references"] isBlockTag' :: Tag Text -> Bool isBlockTag' tag@(TagOpen t _) = (isBlockTag tag || t `elem` newBlockTags) && t `notElem` eitherBlockOrInline +isBlockTag' (TagClose "ref") = True -- needed so 'special' doesn't parse it isBlockTag' tag@(TagClose t) = (isBlockTag tag || t `elem` newBlockTags) && t `notElem` eitherBlockOrInline isBlockTag' tag = isBlockTag tag isInlineTag' :: Tag Text -> Bool isInlineTag' (TagComment _) = True +isInlineTag' (TagClose "ref") = False -- see below inlineTag isInlineTag' t = not (isBlockTag' t) eitherBlockOrInline :: [Text] @@ -554,11 +556,17 @@ variable = try $ do contents <- manyTillChar anyChar (try $ string "}}}") return $ "{{{" <> contents <> "}}}" +singleParaToPlain :: Blocks -> Blocks +singleParaToPlain bs = + case B.toList bs of + [Para ils] -> B.fromList [Plain ils] + _ -> bs + inlineTag :: PandocMonad m => MWParser m Inlines inlineTag = do (tag, _) <- lookAhead $ htmlTag isInlineTag' case tag of - TagOpen "ref" _ -> B.note . B.plain <$> inlinesInTags "ref" + TagOpen "ref" _ -> B.note . singleParaToPlain <$> blocksInTags "ref" TagOpen "nowiki" _ -> try $ do (_,raw) <- htmlTag (~== tag) if T.any (== '/') raw diff --git a/test/command/7145.md b/test/command/7145.md new file mode 100644 index 000000000..a04345890 --- /dev/null +++ b/test/command/7145.md @@ -0,0 +1,12 @@ +``` +% pandoc -f mediawiki -t native +Maecenas at sapien tempor, pretium turpis ut, imperdiet augue.<ref>This is a multiline + +reference +<i>with</i> +empty + +linebreaks</ref> Nulla ut massa eget ex venenatis lobortis id in eros. +^D +[Para [Str "Maecenas",Space,Str "at",Space,Str "sapien",Space,Str "tempor,",Space,Str "pretium",Space,Str "turpis",Space,Str "ut,",Space,Str "imperdiet",Space,Str "augue.",Note [Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "multiline"],Para [Str "reference",SoftBreak,RawInline (Format "html") "<i>",Str "with",RawInline (Format "html") "</i>",SoftBreak,Str "empty"],Para [Str "linebreaks"]],Space,Str "Nulla",Space,Str "ut",Space,Str "massa",Space,Str "eget",Space,Str "ex",Space,Str "venenatis",Space,Str "lobortis",Space,Str "id",Space,Str "in",Space,Str "eros."]] +``` -- cgit v1.2.3 From 35688c42627dce4641f4f61253e3d3786452b61a Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Sat, 13 Mar 2021 22:03:36 +0100 Subject: T.P.App.FormatHeuristics: shorten code, improve docs. --- src/Text/Pandoc/App/FormatHeuristics.hs | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/App/FormatHeuristics.hs b/src/Text/Pandoc/App/FormatHeuristics.hs index 65a1a7b82..bdf8c6667 100644 --- a/src/Text/Pandoc/App/FormatHeuristics.hs +++ b/src/Text/Pandoc/App/FormatHeuristics.hs @@ -15,18 +15,24 @@ module Text.Pandoc.App.FormatHeuristics ) where import Data.Char (toLower) +import Data.Foldable (asum) import Data.Text (Text) import System.FilePath (takeExtension) --- Determine default format based on file extensions. +-- | Determines default format based on file extensions; uses the format +-- of the first extension that's associated with a format. +-- +-- Examples: +-- +-- > formatFromFilePaths ["text.unknown", "no-extension"] +-- Nothing +-- +-- > formatFromFilePaths ["my.md", "other.rst"] +-- Just "markdown" formatFromFilePaths :: [FilePath] -> Maybe Text -formatFromFilePaths [] = Nothing -formatFromFilePaths (x:xs) = - case formatFromFilePath x of - Just f -> Just f - Nothing -> formatFromFilePaths xs +formatFromFilePaths = asum . map formatFromFilePath --- Determine format based on file extension +-- | Determines format based on file extension. formatFromFilePath :: FilePath -> Maybe Text formatFromFilePath x = case takeExtension (map toLower x) of -- cgit v1.2.3 From 3622097da360ab83112eb26b4f6231488d747a95 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 14 Mar 2021 00:09:36 -0800 Subject: Handle 'nocite' better with --biblatex and --natbib. Previously the nocite metadata field was ignored with these formats. Now it populates a `nocite-ids` template variable and causes a `\nocite` command to be issued. Closes #4585. --- data/templates/default.latex | 3 +++ src/Text/Pandoc/Writers/LaTeX.hs | 12 ++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/data/templates/default.latex b/data/templates/default.latex index 50fad2a49..830bf8012 100644 --- a/data/templates/default.latex +++ b/data/templates/default.latex @@ -372,6 +372,9 @@ $for(bibliography)$ \addbibresource{$bibliography$} $endfor$ $endif$ +$if(nocite-ids)$ +\nocite{$for(nocite-ids)$$it$$sep$, $endfor$} +$endif$ $if(csl-refs)$ \newlength{\cslhangindent} \setlength{\cslhangindent}{1.5em} diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 180aaa44d..6a205a798 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -1,4 +1,5 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -145,6 +146,11 @@ pandocToLaTeX options (Pandoc meta blocks) = do let dirs = query (extract "dir") blocks + let nociteIds = query (\case + Cite cs _ -> map citationId cs + _ -> []) + $ lookupMetaInlines "nocite" meta + let context = defField "toc" (writerTableOfContents options) $ defField "toc-depth" (tshow (writerTOCDepth options - @@ -177,9 +183,11 @@ pandocToLaTeX options (Pandoc meta blocks) = do else id) $ (case writerCiteMethod options of Natbib -> defField "biblio-title" biblioTitle . - defField "natbib" True + defField "natbib" True . + defField "nocite-ids" nociteIds Biblatex -> defField "biblio-title" biblioTitle . - defField "biblatex" True + defField "biblatex" True . + defField "nocite-ids" nociteIds _ -> id) $ defField "colorlinks" (any hasStringValue ["citecolor", "urlcolor", "linkcolor", "toccolor", -- cgit v1.2.3 From 24191a2a278c0dec30bacd66b78cbb8cc8d91324 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 15 Mar 2021 10:37:35 -0700 Subject: Use foldl' instead of foldl everywhere. --- src/Text/Pandoc/App/CommandLineOptions.hs | 4 ++-- src/Text/Pandoc/Citeproc/Locator.hs | 3 ++- src/Text/Pandoc/Class/PandocMonad.hs | 3 ++- src/Text/Pandoc/Extensions.hs | 3 ++- src/Text/Pandoc/Lua/Filter.hs | 3 ++- src/Text/Pandoc/Readers/Docx/Combine.hs | 4 ++-- src/Text/Pandoc/Readers/HTML/TagCategories.hs | 1 + src/Text/Pandoc/Readers/Markdown.hs | 6 +++--- src/Text/Pandoc/Readers/Odt/Arrows/State.hs | 4 ++-- src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs | 4 ++-- src/Text/Pandoc/Readers/Odt/StyleReader.hs | 4 ++-- src/Text/Pandoc/Readers/Textile.hs | 4 ++-- src/Text/Pandoc/Shared.hs | 4 ++-- src/Text/Pandoc/Writers/HTML.hs | 8 ++++---- src/Text/Pandoc/Writers/RST.hs | 4 ++-- src/Text/Pandoc/Writers/Texinfo.hs | 4 ++-- 16 files changed, 34 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/App/CommandLineOptions.hs b/src/Text/Pandoc/App/CommandLineOptions.hs index b4483f756..a6df12715 100644 --- a/src/Text/Pandoc/App/CommandLineOptions.hs +++ b/src/Text/Pandoc/App/CommandLineOptions.hs @@ -31,7 +31,7 @@ import Data.Aeson.Encode.Pretty (encodePretty', Config(..), keyOrder, defConfig, Indent(..), NumberFormat(..)) import Data.Bifunctor (second) import Data.Char (toLower) -import Data.List (intercalate, sort) +import Data.List (intercalate, sort, foldl') #ifdef _WINDOWS #if MIN_VERSION_base(4,12,0) import Data.List (isPrefixOf) @@ -93,7 +93,7 @@ parseOptionsFromArgs options' defaults prg rawArgs = do ("Try " ++ prg ++ " --help for more information.") -- thread option data structure through all supplied option actions - opts <- foldl (>>=) (return defaults) actions + opts <- foldl' (>>=) (return defaults) actions let mbArgs = case args of [] -> Nothing xs -> Just xs diff --git a/src/Text/Pandoc/Citeproc/Locator.hs b/src/Text/Pandoc/Citeproc/Locator.hs index dba762c02..44416ca12 100644 --- a/src/Text/Pandoc/Citeproc/Locator.hs +++ b/src/Text/Pandoc/Citeproc/Locator.hs @@ -7,6 +7,7 @@ where import Citeproc.Types import Data.Text (Text) import qualified Data.Text as T +import Data.List (foldl') import Text.Parsec import Text.Pandoc.Definition import Text.Pandoc.Parsing (romanNumeral) @@ -139,7 +140,7 @@ pBalancedBraces braces p = try $ do where except = notFollowedBy pBraces >> p -- outer and inner - surround = foldl (\a (open, close) -> sur open close except <|> a) + surround = foldl' (\a (open, close) -> sur open close except <|> a) except braces diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index 86c8de79e..293a822a0 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -59,6 +59,7 @@ import Control.Monad.Except (MonadError (catchError, throwError), MonadTrans, lift, when) import Data.Digest.Pure.SHA (sha1, showDigest) import Data.Maybe (fromMaybe) +import Data.List (foldl') import Data.Time (UTCTime) import Data.Time.Clock.POSIX (POSIXTime, utcTimeToPOSIXSeconds, posixSecondsToUTCTime) @@ -612,7 +613,7 @@ checkExistence fn = do -- | Canonicalizes a file path by removing redundant @.@ and @..@. makeCanonical :: FilePath -> FilePath makeCanonical = Posix.joinPath . transformPathParts . splitDirectories - where transformPathParts = reverse . foldl go [] + where transformPathParts = reverse . foldl' go [] go as "." = as go (_:as) ".." = as go as x = x : as diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 3b96f9e04..266a09e3c 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -34,6 +34,7 @@ module Text.Pandoc.Extensions ( Extension(..) where import Data.Bits (clearBit, setBit, testBit, (.|.)) import Data.Data (Data) +import Data.List (foldl') import qualified Data.Text as T import Data.Typeable (Typeable) import GHC.Generics (Generic) @@ -593,7 +594,7 @@ parseFormatSpec :: T.Text parseFormatSpec = parse formatSpec "" where formatSpec = do name <- formatName - (extsToEnable, extsToDisable) <- foldl (flip ($)) ([],[]) <$> + (extsToEnable, extsToDisable) <- foldl' (flip ($)) ([],[]) <$> many extMod return (T.pack name, reverse extsToEnable, reverse extsToDisable) formatName = many1 $ noneOf "-+" diff --git a/src/Text/Pandoc/Lua/Filter.hs b/src/Text/Pandoc/Lua/Filter.hs index bffe01a34..90967f295 100644 --- a/src/Text/Pandoc/Lua/Filter.hs +++ b/src/Text/Pandoc/Lua/Filter.hs @@ -22,6 +22,7 @@ import Control.Monad.Catch (finally, try) import Data.Data (Data, DataType, dataTypeConstrs, dataTypeName, dataTypeOf, showConstr, toConstr, tyconUQname) import Data.Foldable (foldrM) +import Data.List (foldl') import Data.Map (Map) import Data.Maybe (fromMaybe) import Foreign.Lua (Lua, Peekable, Pushable, StackIndex) @@ -204,7 +205,7 @@ walkMeta lf (Pandoc m bs) = do walkPandoc :: LuaFilter -> Pandoc -> Lua Pandoc walkPandoc (LuaFilter fnMap) = - case foldl mplus Nothing (map (`Map.lookup` fnMap) pandocFilterNames) of + case foldl' mplus Nothing (map (`Map.lookup` fnMap) pandocFilterNames) of Just fn -> \x -> runFilterFunction fn x *> singleElement x Nothing -> return diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs index bcf26c4a3..7c6d01769 100644 --- a/src/Text/Pandoc/Readers/Docx/Combine.hs +++ b/src/Text/Pandoc/Readers/Docx/Combine.hs @@ -182,7 +182,7 @@ isAttrModifier _ = False smushInlines :: [Inlines] -> Inlines smushInlines xs = combineInlines xs' mempty - where xs' = foldl combineInlines mempty xs + where xs' = foldl' combineInlines mempty xs smushBlocks :: [Blocks] -> Blocks -smushBlocks xs = foldl combineBlocks mempty xs +smushBlocks xs = foldl' combineBlocks mempty xs diff --git a/src/Text/Pandoc/Readers/HTML/TagCategories.hs b/src/Text/Pandoc/Readers/HTML/TagCategories.hs index b7bd40fee..67aba1cb1 100644 --- a/src/Text/Pandoc/Readers/HTML/TagCategories.hs +++ b/src/Text/Pandoc/Readers/HTML/TagCategories.hs @@ -23,6 +23,7 @@ where import Data.Set (Set, fromList, unions) import Data.Text (Text) +import Data.List (foldl') eitherBlockOrInline :: Set Text eitherBlockOrInline = fromList diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 34edbcc17..dc94fc2d6 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -21,7 +21,7 @@ module Text.Pandoc.Readers.Markdown ( import Control.Monad import Control.Monad.Except (throwError) import Data.Char (isAlphaNum, isPunctuation, isSpace) -import Data.List (transpose, elemIndex, sortOn) +import Data.List (transpose, elemIndex, sortOn, foldl') import qualified Data.Map as M import Data.Maybe import qualified Data.Set as Set @@ -357,7 +357,7 @@ referenceKey = try $ do addKvs <- option [] $ guardEnabled Ext_mmd_link_attributes >> many (try $ spnl >> keyValAttr) blanklines - let attr' = extractIdClass $ foldl (\x f -> f x) attr addKvs + let attr' = extractIdClass $ foldl' (\x f -> f x) attr addKvs target = (escapeURI $ trimr src, tit) st <- getState let oldkeys = stateKeys st @@ -613,7 +613,7 @@ attributes = try $ do spnl attrs <- many (attribute <* spnl) char '}' - return $ foldl (\x f -> f x) nullAttr attrs + return $ foldl' (\x f -> f x) nullAttr attrs attribute :: PandocMonad m => MarkdownParser m (Attr -> Attr) attribute = identifierAttr <|> classAttr <|> keyValAttr <|> specialAttr diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs index 93c6b5e79..96515bf56 100644 --- a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs +++ b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs @@ -22,7 +22,7 @@ module Text.Pandoc.Readers.Odt.Arrows.State where import Control.Arrow import qualified Control.Category as Cat import Control.Monad - +import Data.List (foldl') import Text.Pandoc.Readers.Odt.Arrows.Utils import Text.Pandoc.Readers.Odt.Generic.Fallible @@ -122,7 +122,7 @@ iterateS a = ArrowState $ \(s,f) -> foldr a' (s,mzero) f iterateSL :: (Foldable f, MonadPlus m) => ArrowState s x y -> ArrowState s (f x) (m y) -iterateSL a = ArrowState $ \(s,f) -> foldl a' (s,mzero) f +iterateSL a = ArrowState $ \(s,f) -> foldl' a' (s,mzero) f where a' (s',m) x = second (mplus m.return) $ runArrowState a (s',x) diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs index 0d921e23b..341903046 100644 --- a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs +++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs @@ -64,12 +64,12 @@ import qualified Data.Map as M import Data.Text (Text) import Data.Default import Data.Maybe +import Data.List (foldl') import qualified Text.Pandoc.XML.Light as XML import Text.Pandoc.Readers.Odt.Arrows.State import Text.Pandoc.Readers.Odt.Arrows.Utils - import Text.Pandoc.Readers.Odt.Generic.Namespaces import Text.Pandoc.Readers.Odt.Generic.Utils import Text.Pandoc.Readers.Odt.Generic.Fallible @@ -293,7 +293,7 @@ readNSattributes = fromState $ \state -> maybe (state, failEmpty ) => XMLConverterState nsID extraState -> Maybe (XMLConverterState nsID extraState) extractNSAttrs startState - = foldl (\state d -> state >>= addNS d) + = foldl' (\state d -> state >>= addNS d) (Just startState) nsAttribs where nsAttribs = mapMaybe readNSattr (XML.elAttribs element) diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs index 5e10f896c..b722aa07d 100644 --- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs +++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs @@ -44,7 +44,7 @@ import Control.Arrow import Data.Default import qualified Data.Foldable as F -import Data.List (unfoldr) +import Data.List (unfoldr, foldl') import qualified Data.Map as M import Data.Maybe import Data.Text (Text) @@ -120,7 +120,7 @@ fontPitchReader = executeInSub NsOffice "font-face-decls" ( &&& lookupDefaultingAttr NsStyle "font-pitch" )) - >>?^ ( M.fromList . foldl accumLegalPitches [] ) + >>?^ ( M.fromList . foldl' accumLegalPitches [] ) ) `ifFailedDo` returnV (Right M.empty) where accumLegalPitches ls (Nothing,_) = ls accumLegalPitches ls (Just n,p) = (n,p):ls diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 860da2dc3..99238c7f0 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -38,7 +38,7 @@ module Text.Pandoc.Readers.Textile ( readTextile) where import Control.Monad (guard, liftM) import Control.Monad.Except (throwError) import Data.Char (digitToInt, isUpper) -import Data.List (intersperse, transpose) +import Data.List (intersperse, transpose, foldl') import Data.Text (Text) import qualified Data.Text as T import Text.HTML.TagSoup (Tag (..), fromAttrib) @@ -627,7 +627,7 @@ code2 = do -- | Html / CSS attributes attributes :: PandocMonad m => ParserT Text ParserState m Attr -attributes = foldl (flip ($)) ("",[],[]) <$> +attributes = foldl' (flip ($)) ("",[],[]) <$> try (do special <- option id specialAttribute attrs <- many attribute return (special : attrs)) diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index d11ad13f5..0ce9396b3 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -109,7 +109,7 @@ import qualified Data.Bifunctor as Bifunctor import Data.Char (isAlpha, isLower, isSpace, isUpper, toLower, isAlphaNum, generalCategory, GeneralCategory(NonSpacingMark, SpacingCombiningMark, EnclosingMark, ConnectorPunctuation)) -import Data.List (find, intercalate, intersperse, sortOn) +import Data.List (find, intercalate, intersperse, sortOn, foldl') import qualified Data.Map as M import Data.Maybe (mapMaybe, fromMaybe) import Data.Monoid (Any (..)) @@ -840,7 +840,7 @@ mapLeft = Bifunctor.first -- > collapseFilePath "parent/foo/.." == "parent" -- > collapseFilePath "/parent/foo/../../bar" == "/bar" collapseFilePath :: FilePath -> FilePath -collapseFilePath = Posix.joinPath . reverse . foldl go [] . splitDirectories +collapseFilePath = Posix.joinPath . reverse . foldl' go [] . splitDirectories where go rs "." = rs go r@(p:rs) ".." = case p of diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 2f33cd467..332de1545 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -30,7 +30,7 @@ module Text.Pandoc.Writers.HTML ( ) where import Control.Monad.State.Strict import Data.Char (ord) -import Data.List (intercalate, intersperse, partition, delete, (\\)) +import Data.List (intercalate, intersperse, partition, delete, (\\), foldl') import Data.List.NonEmpty (NonEmpty((:|))) import Data.Maybe (fromMaybe, isJust, isNothing) import qualified Data.Set as Set @@ -544,7 +544,7 @@ tagWithAttributes opts html5 selfClosing tagname attr = addAttrs :: PandocMonad m => WriterOptions -> Attr -> Html -> StateT WriterState m Html -addAttrs opts attr h = foldl (!) h <$> attrsToHtml opts attr +addAttrs opts attr h = foldl' (!) h <$> attrsToHtml opts attr toAttrs :: PandocMonad m => [(Text, Text)] -> StateT WriterState m [Attribute] @@ -926,7 +926,7 @@ blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do numstyle'] else []) l <- ordList opts contents - return $ foldl (!) l attribs + return $ foldl' (!) l attribs blockToHtml opts (DefinitionList lst) = do contents <- mapM (\(term, defs) -> do term' <- liftM H.dt $ inlineListToHtml opts term @@ -1407,7 +1407,7 @@ inlineToHtml opts inline = do Just "audio" -> mediaTag H5.audio "Audio" Just _ -> (H5.embed, []) _ -> imageTag - return $ foldl (!) tag $ attributes ++ specAttrs + return $ foldl' (!) tag $ attributes ++ specAttrs -- note: null title included, as in Markdown.pl (Note contents) -> do notes <- gets stNotes diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index d01e13db4..54d042332 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -16,7 +16,7 @@ reStructuredText: <http://docutils.sourceforge.net/rst.html> module Text.Pandoc.Writers.RST ( writeRST, flatten ) where import Control.Monad.State.Strict import Data.Char (isSpace) -import Data.List (transpose, intersperse) +import Data.List (transpose, intersperse, foldl') import Data.Maybe (fromMaybe) import qualified Data.Text as T import Data.Text (Text) @@ -509,7 +509,7 @@ flatten outer | null contents = [outer] | otherwise = combineAll contents where contents = dropInlineParent outer - combineAll = foldl combine [] + combineAll = foldl' combine [] combine :: [Inline] -> Inline -> [Inline] combine f i = diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 53da70f84..9d695563f 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -15,7 +15,7 @@ module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where import Control.Monad.Except (throwError) import Control.Monad.State.Strict import Data.Char (chr, ord) -import Data.List (maximumBy, transpose) +import Data.List (maximumBy, transpose, foldl') import Data.Ord (comparing) import qualified Data.Set as Set import Data.Text (Text) @@ -271,7 +271,7 @@ tableAnyRowToTexinfo :: PandocMonad m -> [[Block]] -> TI m (Doc Text) tableAnyRowToTexinfo itemtype aligns cols = - (literal itemtype $$) . foldl (\row item -> row $$ + (literal itemtype $$) . foldl' (\row item -> row $$ (if isEmpty row then empty else text " @tab ") <> item) empty <$> zipWithM alignedBlock aligns cols alignedBlock :: PandocMonad m -- cgit v1.2.3 From 805d12ac9cc91913cbb6275e8ef94f49985df5e1 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 15 Mar 2021 14:21:52 -0700 Subject: Remove an unneeded import --- src/Text/Pandoc/Readers/HTML/TagCategories.hs | 1 - 1 file changed, 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/HTML/TagCategories.hs b/src/Text/Pandoc/Readers/HTML/TagCategories.hs index 67aba1cb1..b7bd40fee 100644 --- a/src/Text/Pandoc/Readers/HTML/TagCategories.hs +++ b/src/Text/Pandoc/Readers/HTML/TagCategories.hs @@ -23,7 +23,6 @@ where import Data.Set (Set, fromList, unions) import Data.Text (Text) -import Data.List (foldl') eitherBlockOrInline :: Set Text eitherBlockOrInline = fromList -- cgit v1.2.3 From 87538966a029358d42bc71ab0cd4c1ed9e125520 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 16 Mar 2021 13:05:29 -0700 Subject: Removed unused LANGUAGE pragmas. --- src/Text/Pandoc/Writers/LaTeX/Caption.hs | 1 - src/Text/Pandoc/XML/Light/Proc.hs | 1 - 2 files changed, 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/LaTeX/Caption.hs b/src/Text/Pandoc/Writers/LaTeX/Caption.hs index 7b9ce186f..ab4d365cc 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Caption.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Caption.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.LaTeX.Caption Copyright : Copyright (C) 2006-2021 John MacFarlane diff --git a/src/Text/Pandoc/XML/Light/Proc.hs b/src/Text/Pandoc/XML/Light/Proc.hs index b53c4b545..a1fb200ff 100644 --- a/src/Text/Pandoc/XML/Light/Proc.hs +++ b/src/Text/Pandoc/XML/Light/Proc.hs @@ -1,5 +1,4 @@ {-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.XML.Light.Proc Copyright : Copyright (C) 2007 Galois, Inc., 2021 John MacFarlane -- cgit v1.2.3 From 7bf4be04b0c04ccfd02a2a907071b8747380904f Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 17 Mar 2021 09:10:44 -0700 Subject: Fix regression with `tex_math_backslash` in Markdown reader. Added regression test. Closes #7155. --- src/Text/Pandoc/Readers/Markdown.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index dc94fc2d6..c836a896b 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1503,7 +1503,7 @@ inline = do '$' -> math '~' -> strikeout <|> subscript '<' -> autoLink <|> spanHtml <|> rawHtmlInline <|> ltSign - '\\' -> escapedNewline <|> escapedChar <|> rawLaTeXInline' + '\\' -> math <|> escapedNewline <|> escapedChar <|> rawLaTeXInline' '@' -> cite <|> exampleRef '"' -> smart '\'' -> smart -- cgit v1.2.3 From c3f9e8c12256d19ed6c89d15470945855ee16a94 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 17 Mar 2021 22:31:20 -0700 Subject: Docx writer: make nsid in abstractNum deterministic. Previously we assigned a random number (though in a deterministic way). But changes in the random package mean we get different results now on different architectures, even with the same random seed. We don't need random values; so now we just assign a value based on the list number id, which is guaranteed to be unique to the list marker. --- src/Text/Pandoc/Writers/Docx.hs | 10 ++++------ test/docx/golden/block_quotes.docx | Bin 10071 -> 10067 bytes test/docx/golden/codeblock.docx | Bin 9920 -> 9916 bytes test/docx/golden/comments.docx | Bin 10258 -> 10254 bytes test/docx/golden/custom_style_no_reference.docx | Bin 10021 -> 10017 bytes test/docx/golden/custom_style_preserve.docx | Bin 10650 -> 10646 bytes test/docx/golden/custom_style_reference.docx | Bin 12403 -> 12400 bytes test/docx/golden/definition_list.docx | Bin 9920 -> 9916 bytes .../docx/golden/document-properties-short-desc.docx | Bin 9925 -> 9922 bytes test/docx/golden/document-properties.docx | Bin 10404 -> 10399 bytes test/docx/golden/headers.docx | Bin 10059 -> 10055 bytes test/docx/golden/image.docx | Bin 26736 -> 26733 bytes test/docx/golden/inline_code.docx | Bin 9859 -> 9855 bytes test/docx/golden/inline_formatting.docx | Bin 10038 -> 10035 bytes test/docx/golden/inline_images.docx | Bin 26793 -> 26789 bytes test/docx/golden/link_in_notes.docx | Bin 10081 -> 10077 bytes test/docx/golden/links.docx | Bin 10251 -> 10248 bytes test/docx/golden/lists.docx | Bin 10332 -> 10314 bytes test/docx/golden/lists_continuing.docx | Bin 10123 -> 10110 bytes test/docx/golden/lists_multiple_initial.docx | Bin 10210 -> 10192 bytes test/docx/golden/lists_restarting.docx | Bin 10122 -> 10108 bytes test/docx/golden/nested_anchors_in_header.docx | Bin 10216 -> 10212 bytes test/docx/golden/notes.docx | Bin 10028 -> 10024 bytes test/docx/golden/raw-blocks.docx | Bin 9960 -> 9956 bytes test/docx/golden/raw-bookmarks.docx | Bin 10094 -> 10090 bytes test/docx/golden/table_one_row.docx | Bin 9906 -> 9903 bytes test/docx/golden/table_with_list_cell.docx | Bin 10225 -> 10212 bytes test/docx/golden/tables.docx | Bin 10241 -> 10238 bytes test/docx/golden/track_changes_deletion.docx | Bin 9903 -> 9899 bytes test/docx/golden/track_changes_insertion.docx | Bin 9886 -> 9882 bytes test/docx/golden/track_changes_move.docx | Bin 9920 -> 9916 bytes .../golden/track_changes_scrubbed_metadata.docx | Bin 10032 -> 10028 bytes test/docx/golden/unicode.docx | Bin 9845 -> 9841 bytes test/docx/golden/verbatim_subsuper.docx | Bin 9892 -> 9888 bytes 34 files changed, 4 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index c47bfb2ea..20bcd0324 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -36,7 +36,6 @@ import qualified Data.Text.Lazy as TL import Data.Time.Clock.POSIX import Data.Digest.Pure.SHA (sha1, showDigest) import Skylighting -import System.Random (randomRs, mkStdGen) import Text.Pandoc.BCP47 (getLang, renderLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import qualified Text.Pandoc.Class.PandocMonad as P @@ -695,8 +694,7 @@ baseListId = 1000 mkNumbering :: [ListMarker] -> [Element] mkNumbering lists = elts ++ zipWith mkNum lists [baseListId..(baseListId + length lists - 1)] - where elts = zipWith mkAbstractNum (ordNub lists) $ - randomRs (0x10000000, 0xFFFFFFFF) $ mkStdGen 1848 + where elts = map mkAbstractNum (ordNub lists) maxListLevel :: Int maxListLevel = 8 @@ -713,10 +711,10 @@ mkNum marker numid = $ mknode "w:startOverride" [("w:val",tshow start)] ()) [0..maxListLevel] -mkAbstractNum :: ListMarker -> Integer -> Element -mkAbstractNum marker nsid = +mkAbstractNum :: ListMarker -> Element +mkAbstractNum marker = mknode "w:abstractNum" [("w:abstractNumId",listMarkerToId marker)] - $ mknode "w:nsid" [("w:val", T.pack $ printf "%8x" nsid)] () + $ mknode "w:nsid" [("w:val", "A" <> listMarkerToId marker)] () : mknode "w:multiLevelType" [("w:val","multilevel")] () : map (mkLvl marker) [0..maxListLevel] diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx index ed7d1165c..af1b33ca2 100644 Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx index 07ae75676..f748f1f01 100644 Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx index e5f034378..ac9d56680 100644 Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx index 174942135..f27727edd 100644 Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx index b5c31a851..1da499d6a 100644 Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx index c42ca1b05..4d2fe245d 100644 Binary files a/test/docx/golden/custom_style_reference.docx and b/test/docx/golden/custom_style_reference.docx differ diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx index 1cb4c1fd7..f386fcea3 100644 Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx index 7122456ea..debe9a3f6 100644 Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx index 616ba0f81..cd17400bf 100644 Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx index c30dcdee9..d3af8a3dd 100644 Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx index 8a704b41e..1c4e738c0 100644 Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx index b1906c8c4..35f43f19f 100644 Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx index 8adf1cf75..8de3f70f6 100644 Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx index 584117503..e76558be9 100644 Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx index 8859fe55c..88bae8142 100644 Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx index b80f3b3ba..455adcfc7 100644 Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx index 35beed68a..081d9ddba 100644 Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx index 2c29fd674..fc9213fc5 100644 Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx index 10a948886..b636fd3f8 100644 Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx index 5b90e74a0..252623215 100644 Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx index cc81b46d1..a8c3f5478 100644 Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx index 1394dc442..43e650ebd 100644 Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx index 0d1688694..fe4f7845b 100644 Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx index be1caef2d..45e90608f 100644 Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx index 1178f7c6e..6eaea2ac2 100644 Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx index 84be2720f..45a97ccaa 100644 Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx index 140366d8b..115a16a48 100644 Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx index 9cc7a075f..247725aaa 100644 Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx index f8b8dcfde..3863afef2 100644 Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx index 1c3baf0bf..5c848b63a 100644 Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx index 28686970d..e0c843713 100644 Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx index 7051cefbd..78a773bdd 100644 Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx index 9df631640..c66a45b74 100644 Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ -- cgit v1.2.3 From fd76e605cde2639d6bb1b6891dd4b097a6eded01 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 18 Mar 2021 21:02:05 -0700 Subject: T.P.Readers.Odt.StyleReader: rewrite foldr1 use as foldr. This avoids a partial function. --- src/Text/Pandoc/Readers/Odt/StyleReader.hs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs index b722aa07d..ca791ad1e 100644 --- a/src/Text/Pandoc/Readers/Odt/StyleReader.hs +++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs @@ -563,12 +563,13 @@ readListLevelStyle levelType = readAttr NsText "level" -- chooseMostSpecificListLevelStyle :: S.Set ListLevelStyle -> Maybe ListLevelStyle -chooseMostSpecificListLevelStyle ls | ls == mempty = Nothing - | otherwise = Just ( F.foldr1 select ls ) +chooseMostSpecificListLevelStyle ls = F.foldr select Nothing ls where - select ( ListLevelStyle t1 p1 s1 f1 b1 ) - ( ListLevelStyle t2 p2 s2 f2 _ ) - = ListLevelStyle (select' t1 t2) (p1 <|> p2) (s1 <|> s2) (selectLinf f1 f2) b1 + select l Nothing = Just l + select ( ListLevelStyle t1 p1 s1 f1 b1 ) + ( Just ( ListLevelStyle t2 p2 s2 f2 _ )) + = Just $ ListLevelStyle (select' t1 t2) (p1 <|> p2) (s1 <|> s2) + (selectLinf f1 f2) b1 select' LltNumbered _ = LltNumbered select' _ LltNumbered = LltNumbered select' _ _ = LltBullet -- cgit v1.2.3 From 67e173bda116ba9793dd625f782a12ab8e35839b Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 18 Mar 2021 21:10:22 -0700 Subject: Remove another foldr1 partial function use. --- src/Text/Pandoc/Readers/Txt2Tags.hs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index 08083b177..0ce8e286f 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -261,7 +261,7 @@ table = try $ do rows <- many1 (many commentLine *> tableRow) let columns = transpose rows let ncolumns = length columns - let aligns = map (foldr1 findAlign . map fst) columns + let aligns = map (fromMaybe AlignDefault . foldr findAlign Nothing) columns let rows' = map (map snd) rows let size = maximum (map length rows') let rowsPadded = map (pad size) rows' @@ -278,10 +278,11 @@ pad :: (Monoid a) => Int -> [a] -> [a] pad n xs = xs ++ replicate (n - length xs) mempty -findAlign :: Alignment -> Alignment -> Alignment -findAlign x y - | x == y = x - | otherwise = AlignDefault +findAlign :: (Alignment, a) -> Maybe Alignment -> Maybe Alignment +findAlign (x,_) (Just y) + | x == y = Just x + | otherwise = Just AlignDefault +findAlign (x,_) Nothing = Just x headerRow :: T2T [(Alignment, Blocks)] headerRow = genericRow (string "||") -- cgit v1.2.3 From 1da62083156cf493771274164be67a2bead196dc Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 18 Mar 2021 21:30:59 -0700 Subject: Rewrite a foldl1 as a foldl'. --- src/Text/Pandoc/Readers/HTML.hs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index b73c138ab..c3e68afd8 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -32,6 +32,7 @@ import Data.Char (isAlphaNum, isLetter) import Data.Default (Default (..), def) import Data.Foldable (for_) import Data.List.Split (splitWhen) +import Data.List (foldl') import qualified Data.Map as M import Data.Maybe (fromMaybe, isJust, isNothing) import Data.Monoid (First (..)) @@ -360,7 +361,10 @@ pDefListItem = try $ do terms <- many1 (try $ skipMany nonItem >> pInTags "dt" inline) defs <- many1 (try $ skipMany nonItem >> pInTags "dd" block) skipMany nonItem - let term = foldl1 (\x y -> x <> B.linebreak <> y) $ map trimInlines terms + let term = foldl' (\x y -> if null x + then trimInlines y + else x <> B.linebreak <> trimInlines y) + mempty terms return (term, map (fixPlains True) defs) fixPlains :: Bool -> Blocks -> Blocks -- cgit v1.2.3 From f0e4b9cc3cde01d64e25d331c5b4f3d62d2129b5 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 18 Mar 2021 21:37:56 -0700 Subject: Require safe >= 0.3.18 and remove cpp. --- pandoc.cabal | 2 +- src/Text/Pandoc/Readers/LaTeX.hs | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index 5936d113f..e46518e96 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -477,7 +477,7 @@ library parsec >= 3.1 && < 3.2, process >= 1.2.3 && < 1.7, random >= 1 && < 1.3, - safe >= 0.3 && < 0.4, + safe >= 0.3.18 && < 0.4, scientific >= 0.3 && < 0.4, skylighting >= 0.10.4.1 && < 0.11, skylighting-core >= 0.10.4.1 && < 0.11, diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index ceac261d2..21a563bed 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE ScopedTypeVariables #-} @@ -97,11 +96,7 @@ parseLaTeX = do let doc' = doc bs let headerLevel (Header n _ _) = [n] headerLevel _ = [] -#if MIN_VERSION_safe(0,3,18) let bottomLevel = minimumBound 1 $ query headerLevel doc' -#else - let bottomLevel = minimumDef 1 $ query headerLevel doc' -#endif let adjustHeaders m (Header n attr ils) = Header (n+m) attr ils adjustHeaders _ x = x let (Pandoc _ bs') = -- cgit v1.2.3 From 3428248debb14066e7fb1ef216927d3d62e3a43a Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 18 Mar 2021 23:01:12 -0700 Subject: Use minimumDef instead of minimum (partial function). --- src/Text/Pandoc/Readers/LaTeX.hs | 2 +- src/Text/Pandoc/Readers/Org/Blocks.hs | 4 ++-- src/Text/Pandoc/Readers/Textile.hs | 4 +++- src/Text/Pandoc/Writers/Powerpoint/Output.hs | 7 ++----- 4 files changed, 8 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 21a563bed..32f7fc5af 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -96,7 +96,7 @@ parseLaTeX = do let doc' = doc bs let headerLevel (Header n _ _) = [n] headerLevel _ = [] - let bottomLevel = minimumBound 1 $ query headerLevel doc' + let bottomLevel = minimumDef 1 $ query headerLevel doc' let adjustHeaders m (Header n attr ils) = Header (n+m) attr ils adjustHeaders _ x = x let (Pandoc _ bs') = diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index d1aff701e..c6f27118b 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -39,6 +39,7 @@ import Data.Functor (($>)) import Data.List (foldl', intersperse) import Data.Maybe (fromMaybe, isJust, isNothing) import Data.Text (Text) +import Safe (minimumDef) import qualified Data.Text as T import qualified Text.Pandoc.Builder as B @@ -542,8 +543,7 @@ include = try $ do in case (minlvl >>= safeRead :: Maybe Int) of Nothing -> blks Just lvl -> let levels = Walk.query headerLevel blks - -- CAVE: partial function in else - curMin = if null levels then 0 else minimum levels + curMin = minimumDef 0 levels in Walk.walk (shiftHeader (curMin - lvl)) blks headerLevel :: Block -> [Int] diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 99238c7f0..50ffd36ec 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -53,6 +53,7 @@ import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag) import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline) import Text.Pandoc.Shared (crFilter, trim, tshow) +import Safe (minimumDef) -- | Parse a Textile text and return a Pandoc document. readTextile :: PandocMonad m @@ -376,7 +377,8 @@ table = try $ do (toprow, rest) _ -> (mempty, rawrows) let nbOfCols = maximum $ map length (headers:rows) - let aligns = map minimum $ transpose $ map (map (snd . fst)) (headers:rows) + let aligns = map (minimumDef AlignDefault) $ + transpose $ map (map (snd . fst)) (headers:rows) let toRow = Row nullAttr . map B.simpleCell toHeaderRow l = [toRow l | not (null l)] return $ B.table (B.simpleCaption $ B.plain caption) diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs index 5caeb0753..b26a7ff3e 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs @@ -50,6 +50,7 @@ import Text.Pandoc.Writers.Math (convertMath) import Text.Pandoc.Writers.Powerpoint.Presentation import Text.Pandoc.Shared (tshow) import Skylighting (fromColor) +import Safe (minimumDef) -- |The 'EMU' type is used to specify sizes in English Metric Units. type EMU = Integer @@ -1427,11 +1428,7 @@ presentationToRels pres@(Presentation _ slides) = do -- all relWithoutSlide rels (unless they're 1) -- 3. If we have a notesmaster slide, we make space for that as well. - let minRelNotOne = case filter (1<) $ map relId relsWeKeep of - [] -> 0 -- doesn't matter in this case, since - -- there will be nothing to map the - -- function over - l -> minimum l + let minRelNotOne = minimumDef 0 $ filter (1 <) $ map relId relsWeKeep modifyRelNum :: Int -> Int modifyRelNum 1 = 1 -- cgit v1.2.3 From a31731b8e2825c5bc8d7fcc9a61ce92b9d28d040 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 19 Mar 2021 10:11:08 -0700 Subject: Docx reader: Don't reimplement NonEmpty. --- src/Text/Pandoc/Readers/Docx.hs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 00de6a0cd..22dd54193 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -85,6 +85,7 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad) import qualified Text.Pandoc.Class.PandocMonad as P import Text.Pandoc.Error import Text.Pandoc.Logging +import Data.List.NonEmpty (nonEmpty) readDocx :: PandocMonad m => ReaderOptions @@ -648,11 +649,6 @@ bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do cells <- mapM rowToBlocksList rows let width = maybe 0 maximum $ nonEmpty $ map rowLength parts - -- Data.List.NonEmpty is not available with ghc 7.10 so we roll out - -- our own, see - -- https://github.com/jgm/pandoc/pull/4361#issuecomment-365416155 - nonEmpty [] = Nothing - nonEmpty l = Just l rowLength :: Docx.Row -> Int rowLength (Docx.Row c) = length c -- cgit v1.2.3 From 8d5116381b20442bb3fa58dac1ef7d44db618823 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 19 Mar 2021 10:30:32 -0700 Subject: Use NonEmpty instead of minimumDef. --- src/Text/Pandoc/Readers/LaTeX.hs | 4 ++-- src/Text/Pandoc/Readers/Org/Blocks.hs | 4 ++-- src/Text/Pandoc/Readers/Textile.hs | 4 ++-- src/Text/Pandoc/Writers/Powerpoint/Output.hs | 5 +++-- 4 files changed, 9 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 32f7fc5af..851756065 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -68,7 +68,7 @@ import Text.Pandoc.Readers.LaTeX.Inline (acronymCommands, refCommands, listingsLanguage) import Text.Pandoc.Shared import Text.Pandoc.Walk -import Safe +import Data.List.NonEmpty (nonEmpty) -- for debugging: -- import Text.Pandoc.Extensions (getDefaultExtensions) @@ -96,7 +96,7 @@ parseLaTeX = do let doc' = doc bs let headerLevel (Header n _ _) = [n] headerLevel _ = [] - let bottomLevel = minimumDef 1 $ query headerLevel doc' + let bottomLevel = maybe 1 minimum $ nonEmpty $ query headerLevel doc' let adjustHeaders m (Header n attr ils) = Header (n+m) attr ils adjustHeaders _ x = x let (Pandoc _ bs') = diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index c6f27118b..883434cdc 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -39,7 +39,7 @@ import Data.Functor (($>)) import Data.List (foldl', intersperse) import Data.Maybe (fromMaybe, isJust, isNothing) import Data.Text (Text) -import Safe (minimumDef) +import Data.List.NonEmpty (nonEmpty) import qualified Data.Text as T import qualified Text.Pandoc.Builder as B @@ -543,7 +543,7 @@ include = try $ do in case (minlvl >>= safeRead :: Maybe Int) of Nothing -> blks Just lvl -> let levels = Walk.query headerLevel blks - curMin = minimumDef 0 levels + curMin = maybe 0 minimum $ nonEmpty levels in Walk.walk (shiftHeader (curMin - lvl)) blks headerLevel :: Block -> [Int] diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 50ffd36ec..e26b902f1 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -53,7 +53,7 @@ import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag) import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline) import Text.Pandoc.Shared (crFilter, trim, tshow) -import Safe (minimumDef) +import Data.List.NonEmpty (nonEmpty) -- | Parse a Textile text and return a Pandoc document. readTextile :: PandocMonad m @@ -377,7 +377,7 @@ table = try $ do (toprow, rest) _ -> (mempty, rawrows) let nbOfCols = maximum $ map length (headers:rows) - let aligns = map (minimumDef AlignDefault) $ + let aligns = map (maybe AlignDefault minimum . nonEmpty) $ transpose $ map (map (snd . fst)) (headers:rows) let toRow = Row nullAttr . map B.simpleCell toHeaderRow l = [toRow l | not (null l)] diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs index b26a7ff3e..4dbf32c4e 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs @@ -50,7 +50,7 @@ import Text.Pandoc.Writers.Math (convertMath) import Text.Pandoc.Writers.Powerpoint.Presentation import Text.Pandoc.Shared (tshow) import Skylighting (fromColor) -import Safe (minimumDef) +import Data.List.NonEmpty (nonEmpty) -- |The 'EMU' type is used to specify sizes in English Metric Units. type EMU = Integer @@ -1428,7 +1428,8 @@ presentationToRels pres@(Presentation _ slides) = do -- all relWithoutSlide rels (unless they're 1) -- 3. If we have a notesmaster slide, we make space for that as well. - let minRelNotOne = minimumDef 0 $ filter (1 <) $ map relId relsWeKeep + let minRelNotOne = maybe 0 minimum $ nonEmpty + $ filter (1 <) $ map relId relsWeKeep modifyRelNum :: Int -> Int modifyRelNum 1 = 1 -- cgit v1.2.3 From 4002c35a9184ecc1c9a6553e9ee28e283cb1fd0a Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 19 Mar 2021 11:55:59 -0700 Subject: Protect partial uses of maximum with NonEmpty. --- src/Text/Pandoc/Readers/DocBook.hs | 6 ++-- src/Text/Pandoc/Readers/HTML/Table.hs | 3 +- src/Text/Pandoc/Readers/Haddock.hs | 3 +- src/Text/Pandoc/Readers/Markdown.hs | 3 +- src/Text/Pandoc/Readers/Textile.hs | 4 +-- src/Text/Pandoc/Readers/Txt2Tags.hs | 22 +++++++----- src/Text/Pandoc/Writers/AsciiDoc.hs | 3 +- src/Text/Pandoc/Writers/ConTeXt.hs | 6 ++-- src/Text/Pandoc/Writers/DokuWiki.hs | 4 ++- src/Text/Pandoc/Writers/Man.hs | 4 +-- src/Text/Pandoc/Writers/Markdown.hs | 37 ++++++++++--------- src/Text/Pandoc/Writers/Markdown/Inline.hs | 9 ++--- src/Text/Pandoc/Writers/Ms.hs | 4 +-- src/Text/Pandoc/Writers/Muse.hs | 9 +++-- src/Text/Pandoc/Writers/Org.hs | 5 +-- src/Text/Pandoc/Writers/Powerpoint/Output.hs | 42 +++++++++++----------- src/Text/Pandoc/Writers/Powerpoint/Presentation.hs | 5 ++- src/Text/Pandoc/Writers/RST.hs | 6 ++-- src/Text/Pandoc/Writers/Shared.hs | 6 ++-- src/Text/Pandoc/Writers/Texinfo.hs | 9 +++-- src/Text/Pandoc/Writers/ZimWiki.hs | 4 ++- 21 files changed, 108 insertions(+), 86 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index d38b07864..6f5bb0ad4 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -18,6 +18,7 @@ import Data.Either (rights) import Data.Foldable (asum) import Data.Generics import Data.List (intersperse,elemIndex) +import Data.List.NonEmpty (nonEmpty) import Data.Maybe (fromMaybe,mapMaybe) import Data.Text (Text) import qualified Data.Text as T @@ -949,9 +950,8 @@ parseBlock (Elem e) = (x >= '0' && x <= '9') || x == '.') w if n > 0 then Just n else Nothing - let numrows = case bodyrows of - [] -> 0 - xs -> maximum $ map length xs + let numrows = maybe 0 maximum $ nonEmpty + $ map length bodyrows let aligns = case colspecs of [] -> replicate numrows AlignDefault cs -> map toAlignment cs diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs index 6179ea8e7..ad0b51253 100644 --- a/src/Text/Pandoc/Readers/HTML/Table.hs +++ b/src/Text/Pandoc/Readers/HTML/Table.hs @@ -17,6 +17,7 @@ module Text.Pandoc.Readers.HTML.Table (pTable) where import Control.Applicative ((<|>)) import Data.Maybe (fromMaybe) +import Data.List.NonEmpty (nonEmpty) import Data.Text (Text) import Text.HTML.TagSoup import Text.Pandoc.Builder (Blocks) @@ -216,7 +217,7 @@ normalize widths head' bodies foot = do let rows = headRows head' <> concatMap bodyRows bodies <> footRows foot let cellWidth (Cell _ _ _ (ColSpan cs) _) = cs let rowLength = foldr (\cell acc -> cellWidth cell + acc) 0 . rowCells - let ncols = maximum (map rowLength rows) + let ncols = maybe 0 maximum $ nonEmpty $ map rowLength rows let tblType = tableType (map rowCells rows) -- fail on empty table if null rows diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index 25d69f040..48454e353 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -17,6 +17,7 @@ module Text.Pandoc.Readers.Haddock import Control.Monad.Except (throwError) import Data.List (intersperse) +import Data.List.NonEmpty (nonEmpty) import Data.Maybe (fromMaybe) import Data.Text (Text, unpack) import qualified Data.Text as T @@ -92,7 +93,7 @@ docHToBlocks d' = then ([], map toCells bodyRows) else (toCells (head headerRows), map toCells (tail headerRows ++ bodyRows)) - colspecs = replicate (maximum (map length body)) + colspecs = replicate (maybe 0 maximum (nonEmpty (map length body))) (AlignDefault, ColWidthDefault) in B.table B.emptyCaption colspecs diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index c836a896b..a86286b3a 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -22,6 +22,7 @@ import Control.Monad import Control.Monad.Except (throwError) import Data.Char (isAlphaNum, isPunctuation, isSpace) import Data.List (transpose, elemIndex, sortOn, foldl') +import Data.List.NonEmpty (NonEmpty(..)) import qualified Data.Map as M import Data.Maybe import qualified Data.Set as Set @@ -1364,7 +1365,7 @@ pipeTable = try $ do lines' <- many pipeTableRow let lines'' = map (take (length aligns) <$>) lines' let maxlength = maximum $ - map (\x -> T.length . stringify $ runF x def) (heads' : lines'') + fmap (\x -> T.length . stringify $ runF x def) (heads' :| lines'') numColumns <- getOption readerColumns let widths = if maxlength > numColumns then map (\len -> diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index e26b902f1..8d7900de4 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -39,6 +39,7 @@ import Control.Monad (guard, liftM) import Control.Monad.Except (throwError) import Data.Char (digitToInt, isUpper) import Data.List (intersperse, transpose, foldl') +import Data.List.NonEmpty (NonEmpty(..), nonEmpty) import Data.Text (Text) import qualified Data.Text as T import Text.HTML.TagSoup (Tag (..), fromAttrib) @@ -53,7 +54,6 @@ import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag) import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline) import Text.Pandoc.Shared (crFilter, trim, tshow) -import Data.List.NonEmpty (nonEmpty) -- | Parse a Textile text and return a Pandoc document. readTextile :: PandocMonad m @@ -376,7 +376,7 @@ table = try $ do (toprow:rest) | any (fst . fst) toprow -> (toprow, rest) _ -> (mempty, rawrows) - let nbOfCols = maximum $ map length (headers:rows) + let nbOfCols = maximum $ fmap length (headers :| rows) let aligns = map (maybe AlignDefault minimum . nonEmpty) $ transpose $ map (map (snd . fst)) (headers:rows) let toRow = Row nullAttr . map B.simpleCell diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index 0ce8e286f..f27a3fc2c 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -1,4 +1,5 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE LambdaCase #-} {- | Module : Text.Pandoc.Readers.Txt2Tags Copyright : Copyright (C) 2014 Matthew Pickering @@ -19,6 +20,7 @@ import Control.Monad.Except (catchError, throwError) import Control.Monad.Reader (Reader, asks, runReader) import Data.Default import Data.List (intercalate, transpose) +import Data.List.NonEmpty (nonEmpty) import Data.Maybe (fromMaybe) import Data.Text (Text) import qualified Data.Text as T @@ -53,14 +55,16 @@ getT2TMeta = do inps <- P.getInputFiles outp <- fromMaybe "" <$> P.getOutputFile curDate <- formatTime defaultTimeLocale "%F" <$> P.getZonedTime - let getModTime = fmap (formatTime defaultTimeLocale "%T") . - P.getModificationTime - curMtime <- case inps of - [] -> formatTime defaultTimeLocale "%T" <$> P.getZonedTime - _ -> catchError - (maximum <$> mapM getModTime inps) - (const (return "")) - return $ T2TMeta (T.pack curDate) (T.pack curMtime) (intercalate ", " inps) outp + curMtime <- catchError + ((nonEmpty <$> mapM P.getModificationTime inps) >>= + \case + Nothing -> + formatTime defaultTimeLocale "%T" <$> P.getZonedTime + Just ts -> return $ + formatTime defaultTimeLocale "%T" $ maximum ts) + (const (return "")) + return $ T2TMeta (T.pack curDate) (T.pack curMtime) + (intercalate ", " inps) outp -- | Read Txt2Tags from an input string returning a Pandoc document readTxt2Tags :: PandocMonad m @@ -263,7 +267,7 @@ table = try $ do let ncolumns = length columns let aligns = map (fromMaybe AlignDefault . foldr findAlign Nothing) columns let rows' = map (map snd) rows - let size = maximum (map length rows') + let size = maybe 0 maximum $ nonEmpty $ map length rows' let rowsPadded = map (pad size) rows' let headerPadded = if null tableHeader then mempty else pad size tableHeader let toRow = Row nullAttr . map B.simpleCell diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index b4ef7c8b9..69e608ef9 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -22,6 +22,7 @@ module Text.Pandoc.Writers.AsciiDoc (writeAsciiDoc, writeAsciiDoctor) where import Control.Monad.State.Strict import Data.Char (isPunctuation, isSpace) import Data.List (intercalate, intersperse) +import Data.List.NonEmpty (NonEmpty(..)) import Data.Maybe (fromMaybe, isJust) import qualified Data.Set as Set import qualified Data.Text as T @@ -274,7 +275,7 @@ blockToAsciiDoc opts block@(Table _ blkCapt specs thead tbody tfoot) = do let colwidth = if writerWrapText opts == WrapAuto then writerColumns opts else 100000 - let maxwidth = maximum $ map offset (head':rows') + let maxwidth = maximum $ fmap offset (head' :| rows') let body = if maxwidth > colwidth then vsep rows' else vcat rows' let border = separator <> text "===" return $ diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 4d44842e2..1c56388ed 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -16,6 +16,7 @@ module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where import Control.Monad.State.Strict import Data.Char (ord, isDigit) import Data.List (intersperse) +import Data.List.NonEmpty (nonEmpty) import Data.Maybe (mapMaybe) import Data.Text (Text) import qualified Data.Text as T @@ -228,8 +229,9 @@ blockToConTeXt (OrderedList (start, style', delim) lst) = do Period -> "stopper=." OneParen -> "stopper=)" TwoParens -> "left=(,stopper=)" - let width = maximum $ map T.length $ take (length contents) - (orderedListMarkers (start, style', delim)) + let width = maybe 0 maximum $ nonEmpty $ map T.length $ + take (length contents) + (orderedListMarkers (start, style', delim)) let width' = (toEnum width + 1) / 2 let width'' = if width' > (1.5 :: Double) then "width=" <> tshow width' <> "em" diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs index 7df47c912..602c70ebe 100644 --- a/src/Text/Pandoc/Writers/DokuWiki.hs +++ b/src/Text/Pandoc/Writers/DokuWiki.hs @@ -27,6 +27,7 @@ import Control.Monad.Reader (ReaderT, asks, local, runReaderT) import Control.Monad.State.Strict (StateT, evalStateT) import Data.Default (Default (..)) import Data.List (transpose) +import Data.List.NonEmpty (nonEmpty) import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.Class.PandocMonad (PandocMonad, report) @@ -172,7 +173,8 @@ blockToDokuWiki opts (Table _ blkCapt specs thead tbody tfoot) = do then return [] else zipWithM (tableItemToDokuWiki opts) aligns headers rows' <- mapM (zipWithM (tableItemToDokuWiki opts) aligns) rows - let widths = map (maximum . map T.length) $ transpose (headers':rows') + let widths = map (maybe 0 maximum . nonEmpty . map T.length) + $ transpose (headers':rows') let padTo (width, al) s = case width - T.length s of x | x > 0 -> diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index edb70f53e..87b2d8d21 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -16,6 +16,7 @@ Conversion of 'Pandoc' documents to roff man page format. module Text.Pandoc.Writers.Man ( writeMan ) where import Control.Monad.State.Strict import Data.List (intersperse) +import Data.List.NonEmpty (nonEmpty) import Data.Maybe (fromMaybe) import Data.Text (Text) import qualified Data.Text as T @@ -175,8 +176,7 @@ blockToMan opts (BulletList items) = do return (vcat contents) blockToMan opts (OrderedList attribs items) = do let markers = take (length items) $ orderedListMarkers attribs - let indent = 1 + - maximum (map T.length markers) + let indent = 1 + maybe 0 maximum (nonEmpty (map T.length markers)) contents <- mapM (\(num, item) -> orderedListItemToMan opts num indent item) $ zip markers items return (vcat contents) diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 533bcc071..4d9f3d5b0 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -24,6 +24,7 @@ import Control.Monad.Reader import Control.Monad.State.Strict import Data.Default import Data.List (intersperse, sortOn, transpose) +import Data.List.NonEmpty (nonEmpty, NonEmpty(..)) import qualified Data.Map as M import Data.Maybe (fromMaybe, mapMaybe) import qualified Data.Set as Set @@ -492,19 +493,20 @@ blockToMarkdown' opts (CodeBlock attribs str) = do | isEnabled Ext_fenced_code_blocks opts -> tildes <> attrs <> cr <> literal str <> cr <> tildes <> blankline _ -> nest (writerTabStop opts) (literal str) <> blankline - where endline c = literal $ case [T.length ln - | ln <- map trim (T.lines str) - , T.pack [c,c,c] `T.isPrefixOf` ln - , T.all (== c) ln] of - [] -> T.replicate 3 $ T.singleton c - xs -> T.replicate (maximum xs + 1) $ T.singleton c - backticks = endline '`' - tildes = endline '~' - attrs = if isEnabled Ext_fenced_code_attributes opts - then nowrap $ " " <> attrsToMarkdown attribs - else case attribs of - (_,cls:_,_) -> " " <> literal cls - _ -> empty + where + endlineLen c = maybe 3 ((+1) . maximum) $ nonEmpty $ + [T.length ln + | ln <- map trim (T.lines str) + , T.pack [c,c,c] `T.isPrefixOf` ln + , T.all (== c) ln] + endline c = literal $ T.replicate (endlineLen c) $ T.singleton c + backticks = endline '`' + tildes = endline '~' + attrs = if isEnabled Ext_fenced_code_attributes opts + then nowrap $ " " <> attrsToMarkdown attribs + else case attribs of + (_,cls:_,_) -> " " <> literal cls + _ -> empty blockToMarkdown' opts (BlockQuote blocks) = do variant <- asks envVariant -- if we're writing literate haskell, put a space before the bird tracks @@ -517,7 +519,7 @@ blockToMarkdown' opts (BlockQuote blocks) = do return $ prefixed leader contents <> blankline blockToMarkdown' opts t@(Table _ blkCapt specs thead tbody tfoot) = do let (caption, aligns, widths, headers, rows) = toLegacyTable blkCapt specs thead tbody tfoot - let numcols = maximum (length aligns : length widths : + let numcols = maximum (length aligns :| length widths : map length (headers:rows)) caption' <- inlineListToMarkdown opts caption let caption'' @@ -619,7 +621,8 @@ pipeTable headless aligns rawHeaders rawRows = do blockFor AlignCenter x y = cblock (x + 2) (sp <> y <> sp) <> lblock 0 empty blockFor AlignRight x y = rblock (x + 2) (y <> sp) <> lblock 0 empty blockFor _ x y = lblock (x + 2) (sp <> y) <> lblock 0 empty - let widths = map (max 3 . maximum . map offset) $ transpose (rawHeaders : rawRows) + let widths = map (max 3 . maybe 3 maximum . nonEmpty . map offset) $ + transpose (rawHeaders : rawRows) let torow cs = nowrap $ literal "|" <> hcat (intersperse (literal "|") $ zipWith3 blockFor aligns widths (map chomp cs)) @@ -653,11 +656,11 @@ pandocTable opts multiline headless aligns widths rawHeaders rawRows = do -- Number of characters per column necessary to output every cell -- without requiring a line break. -- The @+2@ is needed for specifying the alignment. - let numChars = (+ 2) . maximum . map offset + let numChars = (+ 2) . maybe 0 maximum . nonEmpty . map offset -- Number of characters per column necessary to output every cell -- without requiring a line break *inside a word*. -- The @+2@ is needed for specifying the alignment. - let minNumChars = (+ 2) . maximum . map minOffset + let minNumChars = (+ 2) . maybe 0 maximum . nonEmpty . map minOffset let columns = transpose (rawHeaders : rawRows) -- minimal column width without wrapping a single word let relWidth w col = diff --git a/src/Text/Pandoc/Writers/Markdown/Inline.hs b/src/Text/Pandoc/Writers/Markdown/Inline.hs index 19157701e..e35e1a0b9 100644 --- a/src/Text/Pandoc/Writers/Markdown/Inline.hs +++ b/src/Text/Pandoc/Writers/Markdown/Inline.hs @@ -17,6 +17,7 @@ import Control.Monad.Reader import Control.Monad.State.Strict import Data.Char (isAlphaNum, isDigit) import Data.List (find, intersperse) +import Data.List.NonEmpty (nonEmpty) import qualified Data.Map as M import Data.Maybe (fromMaybe) import Data.Text (Text) @@ -383,9 +384,7 @@ inlineToMarkdown opts (Quoted DoubleQuote lst) = do else "“" <> contents <> "”" inlineToMarkdown opts (Code attr str) = do let tickGroups = filter (T.any (== '`')) $ T.group str - let longest = if null tickGroups - then 0 - else maximum $ map T.length tickGroups + let longest = maybe 0 maximum $ nonEmpty $ map T.length tickGroups let marker = T.replicate (longest + 1) "`" let spacer = if longest == 0 then "" else " " let attrs = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr @@ -438,9 +437,7 @@ inlineToMarkdown opts (Math DisplayMath str) = (texMathToInlines DisplayMath str >>= inlineListToMarkdown opts) inlineToMarkdown opts il@(RawInline f str) = do let tickGroups = filter (T.any (== '`')) $ T.group str - let numticks = if null tickGroups - then 1 - else 1 + maximum (map T.length tickGroups) + let numticks = 1 + maybe 0 maximum (nonEmpty (map T.length tickGroups)) variant <- asks envVariant let Format fmt = f let rawAttribInline = return $ diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs index 48395c420..0ed7a8a64 100644 --- a/src/Text/Pandoc/Writers/Ms.hs +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -23,6 +23,7 @@ module Text.Pandoc.Writers.Ms ( writeMs ) where import Control.Monad.State.Strict import Data.Char (isLower, isUpper, ord) import Data.List (intercalate, intersperse) +import Data.List.NonEmpty (nonEmpty) import qualified Data.Map as Map import Data.Maybe (catMaybes) import Data.Text (Text) @@ -274,8 +275,7 @@ blockToMs opts (BulletList items) = do return (vcat contents) blockToMs opts (OrderedList attribs items) = do let markers = take (length items) $ orderedListMarkers attribs - let indent = 2 + - maximum (map T.length markers) + let indent = 2 + maybe 0 maximum (nonEmpty (map T.length markers)) contents <- mapM (\(num, item) -> orderedListItemToMs opts num indent item) $ zip markers items setFirstPara diff --git a/src/Text/Pandoc/Writers/Muse.hs b/src/Text/Pandoc/Writers/Muse.hs index bf3265107..d5100f43f 100644 --- a/src/Text/Pandoc/Writers/Muse.hs +++ b/src/Text/Pandoc/Writers/Muse.hs @@ -31,6 +31,7 @@ import Control.Monad.State.Strict import Data.Char (isAlphaNum, isAsciiLower, isAsciiUpper, isDigit, isSpace) import Data.Default import Data.List (intersperse, transpose) +import Data.List.NonEmpty (nonEmpty, NonEmpty(..)) import qualified Data.Set as Set import qualified Data.Text as T import Data.Text (Text) @@ -158,7 +159,8 @@ simpleTable caption headers rows = do caption' <- inlineListToMuse caption headers' <- mapM blockListToMuse headers rows' <- mapM (mapM blockListToMuse) rows - let widthsInChars = maximum . map offset <$> transpose (headers' : rows') + let widthsInChars = maybe 0 maximum . nonEmpty . map offset <$> + transpose (headers' : rows') let hpipeBlocks sep blocks = hcat $ intersperse sep' blocks where sep' = lblock (T.length sep) $ literal sep let makeRow sep = hpipeBlocks sep . zipWith lblock widthsInChars @@ -238,7 +240,7 @@ blockToMuse (DefinitionList items) = do label' <- local (\env -> env { envOneLine = True, envAfterSpace = True }) $ inlineListToMuse' label let ind = offset' label' -- using Text.DocLayout.offset results in round trip failures hang ind (nowrap label') . vcat <$> mapM descriptionToMuse defs - where offset' d = maximum (0: map T.length + where offset' d = maximum (0 :| map T.length (T.lines $ render Nothing d)) descriptionToMuse :: PandocMonad m => [Block] @@ -269,7 +271,8 @@ blockToMuse (Table _ blkCapt specs thead tbody tfoot) = (caption, aligns, widths, headers, rows) = toLegacyTable blkCapt specs thead tbody tfoot blocksToDoc opts blocks = local (\env -> env { envOptions = opts }) $ blockListToMuse blocks - numcols = maximum (length aligns : length widths : map length (headers:rows)) + numcols = maximum + (length aligns :| length widths : map length (headers:rows)) isSimple = onlySimpleTableCells (headers : rows) && all (== 0) widths blockToMuse (Div _ bs) = flatBlockListToMuse bs blockToMuse Null = return empty diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 29d58a161..bb645eaf9 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -19,6 +19,7 @@ module Text.Pandoc.Writers.Org (writeOrg) where import Control.Monad.State.Strict import Data.Char (isAlphaNum, isDigit) import Data.List (intersect, intersperse, partition, transpose) +import Data.List.NonEmpty (nonEmpty) import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.Class.PandocMonad (PandocMonad, report) @@ -163,7 +164,7 @@ blockToOrg (Table _ blkCapt specs thead tbody tfoot) = do else "#+caption: " <> caption'' headers' <- mapM blockListToOrg headers rawRows <- mapM (mapM blockListToOrg) rows - let numChars = maximum . map offset + let numChars = maybe 0 maximum . nonEmpty . map offset -- FIXME: width is not being used. let widthsInChars = map numChars $ transpose (headers' : rawRows) @@ -198,7 +199,7 @@ blockToOrg (OrderedList (start, _, delim) items) = do x -> x let markers = take (length items) $ orderedListMarkers (start, Decimal, delim') - let maxMarkerLength = maximum $ map T.length markers + let maxMarkerLength = maybe 0 maximum . nonEmpty $ map T.length markers let markers' = map (\m -> let s = maxMarkerLength - T.length m in m <> T.replicate s " ") markers contents <- zipWithM orderedListItemToOrg markers' items diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs index 4dbf32c4e..0e515b3c2 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs @@ -475,15 +475,16 @@ registerLink link = do linkReg <- gets stLinkIds mediaReg <- gets stMediaIds hasSpeakerNotes <- curSlideHasSpeakerNotes - let maxLinkId = case M.lookup curSlideId linkReg of - Just mp -> case M.keys mp of - [] -> if hasSpeakerNotes then 2 else 1 - ks -> maximum ks - Nothing -> if hasSpeakerNotes then 2 else 1 - maxMediaId = case M.lookup curSlideId mediaReg of - Just [] -> if hasSpeakerNotes then 2 else 1 - Just mInfos -> maximum $ map mInfoLocalId mInfos - Nothing -> if hasSpeakerNotes then 2 else 1 + let maxLinkId = case M.lookup curSlideId linkReg >>= nonEmpty . M.keys of + Just xs -> maximum xs + Nothing + | hasSpeakerNotes -> 2 + | otherwise -> 1 + maxMediaId = case M.lookup curSlideId mediaReg >>= nonEmpty of + Just mInfos -> maximum $ fmap mInfoLocalId mInfos + Nothing + | hasSpeakerNotes -> 2 + | otherwise -> 1 maxId = max maxLinkId maxMediaId slideLinks = case M.lookup curSlideId linkReg of Just mp -> M.insert (maxId + 1) link mp @@ -498,20 +499,19 @@ registerMedia fp caption = do mediaReg <- gets stMediaIds globalIds <- gets stMediaGlobalIds hasSpeakerNotes <- curSlideHasSpeakerNotes - let maxLinkId = case M.lookup curSlideId linkReg of - Just mp -> case M.keys mp of - [] -> if hasSpeakerNotes then 2 else 1 - ks -> maximum ks - Nothing -> if hasSpeakerNotes then 2 else 1 - maxMediaId = case M.lookup curSlideId mediaReg of - Just [] -> if hasSpeakerNotes then 2 else 1 - Just mInfos -> maximum $ map mInfoLocalId mInfos - Nothing -> if hasSpeakerNotes then 2 else 1 + let maxLinkId = case M.lookup curSlideId linkReg >>= nonEmpty . M.keys of + Just ks -> maximum ks + Nothing + | hasSpeakerNotes -> 2 + | otherwise -> 1 + maxMediaId = case M.lookup curSlideId mediaReg >>= nonEmpty of + Just mInfos -> maximum $ fmap mInfoLocalId mInfos + Nothing + | hasSpeakerNotes -> 2 + | otherwise -> 1 maxLocalId = max maxLinkId maxMediaId - maxGlobalId = case M.elems globalIds of - [] -> 0 - ids -> maximum ids + maxGlobalId = maybe 0 maximum $ nonEmpty $ M.elems globalIds (imgBytes, mbMt) <- P.fetchItem $ T.pack fp let imgExt = (mbMt >>= extensionFromMimeType >>= (\x -> return $ "." <> x)) diff --git a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs index affec38aa..9246a93e9 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Presentation.hs @@ -45,6 +45,7 @@ module Text.Pandoc.Writers.Powerpoint.Presentation ( documentToPresentation import Control.Monad.Reader import Control.Monad.State import Data.List (intercalate) +import Data.List.NonEmpty (nonEmpty) import Data.Default import Text.Pandoc.Definition import Text.Pandoc.ImageSize @@ -363,9 +364,7 @@ inlineToParElems (Note blks) = do then return [] else do notes <- gets stNoteIds - let maxNoteId = case M.keys notes of - [] -> 0 - lst -> maximum lst + let maxNoteId = maybe 0 maximum $ nonEmpty $ M.keys notes curNoteId = maxNoteId + 1 modify $ \st -> st { stNoteIds = M.insert curNoteId blks notes } local (\env -> env{envRunProps = (envRunProps env){rLink = Just $ InternalTarget endNotesSlideId}}) $ diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 54d042332..0b9fc8331 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -17,6 +17,7 @@ module Text.Pandoc.Writers.RST ( writeRST, flatten ) where import Control.Monad.State.Strict import Data.Char (isSpace) import Data.List (transpose, intersperse, foldl') +import Data.List.NonEmpty (nonEmpty) import Data.Maybe (fromMaybe) import qualified Data.Text as T import Data.Text (Text) @@ -335,7 +336,7 @@ blockToRST (OrderedList (start, style', delim) items) = do then replicate (length items) "#." else take (length items) $ orderedListMarkers (start, style', delim) - let maxMarkerLength = maximum $ map T.length markers + let maxMarkerLength = maybe 0 maximum $ nonEmpty $ map T.length markers let markers' = map (\m -> let s = maxMarkerLength - T.length m in m <> T.replicate s " ") markers contents <- zipWithM orderedListItemToRST markers' items @@ -761,8 +762,7 @@ simpleTable opts blocksToDoc headers rows = do then return [] else fixEmpties <$> mapM (blocksToDoc opts) headers rowDocs <- mapM (fmap fixEmpties . mapM (blocksToDoc opts)) rows - let numChars [] = 0 - numChars xs = maximum . map offset $ xs + let numChars = maybe 0 maximum . nonEmpty . map offset let colWidths = map numChars $ transpose (headerDocs : rowDocs) let toRow = mconcat . intersperse (lblock 1 " ") . zipWith lblock colWidths let hline = nowrap $ hsep (map (\n -> literal (T.replicate n "=")) colWidths) diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index fc3f8ff3a..91ecb310b 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -44,6 +44,7 @@ import Control.Monad (zipWithM) import Data.Aeson (ToJSON (..), encode) import Data.Char (chr, ord, isSpace) import Data.List (groupBy, intersperse, transpose, foldl') +import Data.List.NonEmpty (NonEmpty(..), nonEmpty) import Data.Text.Conversions (FromText(..)) import qualified Data.Map as M import qualified Data.Text as T @@ -224,7 +225,7 @@ gridTable :: (Monad m, HasChars a) -> m (Doc a) gridTable opts blocksToDoc headless aligns widths headers rows = do -- the number of columns will be used in case of even widths - let numcols = maximum (length aligns : length widths : + let numcols = maximum (length aligns :| length widths : map length (headers:rows)) let officialWidthsInChars widths' = map ( (\x -> if x < 1 then 1 else x) . @@ -253,8 +254,7 @@ gridTable opts blocksToDoc headless aligns widths headers rows = do let handleFullWidths widths' = do rawHeaders' <- mapM (blocksToDoc opts) headers rawRows' <- mapM (mapM (blocksToDoc opts)) rows - let numChars [] = 0 - numChars xs = maximum . map offset $ xs + let numChars = maybe 0 maximum . nonEmpty . map offset let minWidthsInChars = map numChars $ transpose (rawHeaders' : rawRows') let widthsInChars' = zipWith max diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 9d695563f..0146fdfd8 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -16,6 +16,7 @@ import Control.Monad.Except (throwError) import Control.Monad.State.Strict import Data.Char (chr, ord) import Data.List (maximumBy, transpose, foldl') +import Data.List.NonEmpty (nonEmpty) import Data.Ord (comparing) import qualified Data.Set as Set import Data.Text (Text) @@ -238,9 +239,13 @@ blockToTexinfo (Table _ blkCapt specs thead tbody tfoot) = do colDescriptors <- if all (== 0) widths then do -- use longest entry instead of column widths - cols <- mapM (mapM (liftM (T.unpack . render Nothing . hcat) . mapM blockToTexinfo)) $ + cols <- mapM (mapM (fmap (T.unpack . render Nothing . hcat) . + mapM blockToTexinfo)) $ transpose $ heads : rows - return $ concatMap ((\x -> "{"++x++"} ") . maximumBy (comparing length)) cols + return $ concatMap + ((\x -> "{"++x++"} ") . + maybe "" (maximumBy (comparing length)) . nonEmpty) + cols else return $ "@columnfractions " ++ concatMap (printf "%.2f ") widths let tableBody = text ("@multitable " ++ colDescriptors) $$ headers $$ diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs index 9e45f0417..fcf9e000d 100644 --- a/src/Text/Pandoc/Writers/ZimWiki.hs +++ b/src/Text/Pandoc/Writers/ZimWiki.hs @@ -20,6 +20,7 @@ import Control.Monad (zipWithM) import Control.Monad.State.Strict (StateT, evalStateT, gets, modify) import Data.Default (Default (..)) import Data.List (transpose) +import Data.List.NonEmpty (nonEmpty) import qualified Data.Map as Map import Text.DocLayout (render, literal) import Data.Maybe (fromMaybe) @@ -143,7 +144,8 @@ blockToZimWiki opts (Table _ blkCapt specs thead tbody tfoot) = do then zipWithM (tableItemToZimWiki opts) aligns (head rows) else mapM (inlineListToZimWiki opts . removeFormatting)headers -- emphasis, links etc. are not allowed in table headers rows' <- mapM (zipWithM (tableItemToZimWiki opts) aligns) rows - let widths = map (maximum . map T.length) $ transpose (headers':rows') + let widths = map (maybe 0 maximum . nonEmpty . map T.length) $ + transpose (headers':rows') let padTo (width, al) s = case width - T.length s of x | x > 0 -> -- cgit v1.2.3 From 005f0fbcd558636f0d5db1203427a4d7b341f36e Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 19 Mar 2021 12:34:20 -0700 Subject: T.P.Shared: Remove ToString, ToText typeclasses [API change]. T.P.Parsing: revise type of readWithM so that it takes a Text rather than a polymorphic ToText value. These typeclasses were there to ease the transition from String to Text. They are no longer needed, and they may clash with more useful versions under the same name. This will require a bump to 2.13. --- src/Text/Pandoc/Parsing.hs | 8 ++++---- src/Text/Pandoc/Shared.hs | 20 -------------------- 2 files changed, 4 insertions(+), 24 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 8d3799c3e..10a08d410 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -1128,13 +1128,13 @@ gridTableFooter = optional blanklines --- -- | Removes the ParsecT layer from the monad transformer stack -readWithM :: (Stream s m Char, ToText s) - => ParserT s st m a -- ^ parser +readWithM :: Monad m + => ParserT Text st m a -- ^ parser -> st -- ^ initial state - -> s -- ^ input + -> Text -- ^ input -> m (Either PandocError a) readWithM parser state input = - mapLeft (PandocParsecError $ toText input) `liftM` runParserT parser state "source" input + mapLeft (PandocParsecError input) <$> runParserT parser state "source" input -- | Parse a string with a given parser and state readWith :: Parser Text st a diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 0ce9396b3..46aea9c03 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -25,8 +25,6 @@ module Text.Pandoc.Shared ( ordNub, findM, -- * Text processing - ToString (..), - ToText (..), tshow, backslashEscapes, escapeStringUsing, @@ -183,24 +181,6 @@ findM p = foldr go (pure Nothing) -- Text processing -- -class ToString a where - toString :: a -> String - -instance ToString String where - toString = id - -instance ToString T.Text where - toString = T.unpack - -class ToText a where - toText :: a -> T.Text - -instance ToText String where - toText = T.pack - -instance ToText T.Text where - toText = id - tshow :: Show a => a -> T.Text tshow = T.pack . show -- cgit v1.2.3 From 7678c48122bb9bb7041b75ddc33061b93cbcdbb2 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 19 Mar 2021 14:43:42 -0700 Subject: Hlint suggestion. --- src/Text/Pandoc/Readers/Txt2Tags.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index f27a3fc2c..d355a4b55 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -56,12 +56,13 @@ getT2TMeta = do outp <- fromMaybe "" <$> P.getOutputFile curDate <- formatTime defaultTimeLocale "%F" <$> P.getZonedTime curMtime <- catchError - ((nonEmpty <$> mapM P.getModificationTime inps) >>= - \case + (mapM P.getModificationTime inps >>= + (\case Nothing -> formatTime defaultTimeLocale "%T" <$> P.getZonedTime Just ts -> return $ formatTime defaultTimeLocale "%T" $ maximum ts) + . nonEmpty) (const (return "")) return $ T2TMeta (T.pack curDate) (T.pack curMtime) (intercalate ", " inps) outp -- cgit v1.2.3 From eacead3eb316dd8e2d75737589dc017de36326fb Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 19 Mar 2021 22:57:48 -0700 Subject: Fix fallback to default partials on templates. If the directory containing a template does not contain the partial, it should be sought in the default data files. Closes #7164. --- src/Text/Pandoc/Templates.hs | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src') diff --git a/src/Text/Pandoc/Templates.hs b/src/Text/Pandoc/Templates.hs index e83f26329..7fd896641 100644 --- a/src/Text/Pandoc/Templates.hs +++ b/src/Text/Pandoc/Templates.hs @@ -34,6 +34,7 @@ import Control.Monad.Except (catchError, throwError) import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.Error +import System.IO.Error (isDoesNotExistError) -- | Wrap a Monad in this if you want partials to -- be taken only from the default data files. @@ -70,6 +71,9 @@ getTemplate tp = UTF8.toText <$> PandocResourceNotFound _ -> -- see #5987 on reason for takeFileName readDataFile ("templates" </> takeFileName tp) + PandocIOError _ ioe | isDoesNotExistError ioe -> + -- see #5987 on reason for takeFileName + readDataFile ("templates" </> takeFileName tp) _ -> throwError e)) -- | Get default template for the specified writer. -- cgit v1.2.3 From a1a57bce4e32cc26b968bcc2847a8e8da30f725b Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 20 Mar 2021 00:02:24 -0700 Subject: T.P.Shared: remove `backslashEscapes`, `escapeStringUsing`. [API change] These are inefficient association list lookups. Replace with more efficient functions in the writers that used them (with 10-25% performance improvements in haddock, org, rtf, texinfo writers). --- src/Text/Pandoc/Shared.hs | 13 ------------- src/Text/Pandoc/Writers/AsciiDoc.hs | 11 ++++++++--- src/Text/Pandoc/Writers/ConTeXt.hs | 6 +++++- src/Text/Pandoc/Writers/Haddock.hs | 15 +++++++++++++-- src/Text/Pandoc/Writers/LaTeX.hs | 14 +++++++++++++- src/Text/Pandoc/Writers/Org.hs | 15 +++++++++------ src/Text/Pandoc/Writers/RTF.hs | 26 ++++++++++++++++---------- src/Text/Pandoc/Writers/Texinfo.hs | 24 +++++++++++++----------- 8 files changed, 77 insertions(+), 47 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 46aea9c03..23adff909 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -26,8 +26,6 @@ module Text.Pandoc.Shared ( findM, -- * Text processing tshow, - backslashEscapes, - escapeStringUsing, elemText, notElemText, stripTrailingNewlines, @@ -184,17 +182,6 @@ findM p = foldr go (pure Nothing) tshow :: Show a => a -> T.Text tshow = T.pack . show --- | Returns an association list of backslash escapes for the --- designated characters. -backslashEscapes :: [Char] -- ^ list of special characters to escape - -> [(Char, T.Text)] -backslashEscapes = map (\ch -> (ch, T.pack ['\\',ch])) - --- | Escape a string of characters, using an association list of --- characters and strings. -escapeStringUsing :: [(Char, T.Text)] -> T.Text -> T.Text -escapeStringUsing tbl = T.concatMap $ \c -> fromMaybe (T.singleton c) $ lookup c tbl - -- | @True@ exactly when the @Char@ appears in the @Text@. elemText :: Char -> T.Text -> Bool elemText c = T.any (== c) diff --git a/src/Text/Pandoc/Writers/AsciiDoc.hs b/src/Text/Pandoc/Writers/AsciiDoc.hs index 69e608ef9..ab7e5f1a9 100644 --- a/src/Text/Pandoc/Writers/AsciiDoc.hs +++ b/src/Text/Pandoc/Writers/AsciiDoc.hs @@ -105,8 +105,11 @@ pandocToAsciiDoc opts (Pandoc meta blocks) = do -- | Escape special characters for AsciiDoc. escapeString :: Text -> Text -escapeString = escapeStringUsing escs - where escs = backslashEscapes "{" +escapeString t + | T.any (== '{') t = T.concatMap escChar t + | otherwise = t + where escChar '{' = "\\{" + escChar c = T.singleton c -- | Ordered list start parser for use in Para below. olMarker :: Parser Text ParserState Char @@ -496,7 +499,9 @@ inlineToAsciiDoc opts (Quoted qt lst) = do | otherwise -> [Str "``"] ++ lst ++ [Str "''"] inlineToAsciiDoc _ (Code _ str) = do isAsciidoctor <- gets asciidoctorVariant - let contents = literal (escapeStringUsing (backslashEscapes "`") str) + let escChar '`' = "\\'" + escChar c = T.singleton c + let contents = literal (T.concatMap escChar str) return $ if isAsciidoctor then text "`+" <> contents <> "+`" diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 1c56388ed..3c9975be8 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -434,9 +434,13 @@ inlineToConTeXt (Link _ txt (src, _)) = do put $ st {stNextRef = next + 1} let ref = "url" <> tshow next contents <- inlineListToConTeXt txt + let escChar '#' = "\\#" + escChar '%' = "\\%" + escChar c = T.singleton c + let escContextURL = T.concatMap escChar return $ "\\useURL" <> brackets (literal ref) - <> brackets (literal $ escapeStringUsing [('#',"\\#"),('%',"\\%")] src) + <> brackets (literal $ escContextURL src) <> (if isAutolink then empty else brackets empty <> brackets contents) diff --git a/src/Text/Pandoc/Writers/Haddock.hs b/src/Text/Pandoc/Writers/Haddock.hs index aaa19ed07..75e14714b 100644 --- a/src/Text/Pandoc/Writers/Haddock.hs +++ b/src/Text/Pandoc/Writers/Haddock.hs @@ -15,6 +15,7 @@ Haddock: <http://www.haskell.org/haddock/doc/html/> -} module Text.Pandoc.Writers.Haddock (writeHaddock) where import Control.Monad.State.Strict +import Data.Char (isAlphaNum) import Data.Default import Data.Text (Text) import qualified Data.Text as T @@ -71,8 +72,18 @@ notesToHaddock opts notes = -- | Escape special characters for Haddock. escapeString :: Text -> Text -escapeString = escapeStringUsing haddockEscapes - where haddockEscapes = backslashEscapes "\\/'`\"@<" +escapeString t + | T.all isAlphaNum t = t + | otherwise = T.concatMap escChar t + where + escChar '\\' = "\\\\" + escChar '/' = "\\/" + escChar '\'' = "\\'" + escChar '`' = "\\`" + escChar '"' = "\\\"" + escChar '@' = "\\@" + escChar '<' = "\\<" + escChar c = T.singleton c -- | Convert Pandoc block element to haddock. blockToHaddock :: PandocMonad m diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 6a205a798..1c970e6ad 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -825,7 +825,19 @@ inlineToLaTeX (Code (_,classes,kvs) str) = do let chr = case "!\"'()*,-./:;?@" \\ T.unpack str of (c:_) -> c [] -> '!' - let str' = escapeStringUsing (backslashEscapes "\\{}%~_&#^") str + let isEscapable '\\' = True + isEscapable '{' = True + isEscapable '}' = True + isEscapable '%' = True + isEscapable '~' = True + isEscapable '_' = True + isEscapable '&' = True + isEscapable '#' = True + isEscapable '^' = True + isEscapable _ = False + let escChar c | isEscapable c = T.pack ['\\',c] + | otherwise = T.singleton c + let str' = T.concatMap escChar str -- we always put lstinline in a dummy 'passthrough' command -- (defined in the default template) so that we don't have -- to change the way we escape characters depending on whether diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index bb645eaf9..88a2b8314 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -84,12 +84,15 @@ noteToOrg num note = do -- | Escape special characters for Org. escapeString :: Text -> Text -escapeString = escapeStringUsing - [ ('\x2014',"---") - , ('\x2013',"--") - , ('\x2019',"'") - , ('\x2026',"...") - ] +escapeString t + | T.all (\c -> c < '\x2013' || c > '\x2026') t = t + | otherwise = T.concatMap escChar t + where + escChar '\x2013' = "--" + escChar '\x2014' = "---" + escChar '\x2019' = "'" + escChar '\x2026' = "..." + escChar c = T.singleton c isRawFormat :: Format -> Bool isRawFormat f = diff --git a/src/Text/Pandoc/Writers/RTF.hs b/src/Text/Pandoc/Writers/RTF.hs index cf27011c2..3527949b4 100644 --- a/src/Text/Pandoc/Writers/RTF.hs +++ b/src/Text/Pandoc/Writers/RTF.hs @@ -16,7 +16,7 @@ module Text.Pandoc.Writers.RTF ( writeRTF import Control.Monad.Except (catchError, throwError) import Control.Monad import qualified Data.ByteString as B -import Data.Char (chr, isDigit, ord) +import Data.Char (chr, isDigit, ord, isAlphaNum) import qualified Data.Map as M import Data.Text (Text) import qualified Data.Text as T @@ -137,15 +137,21 @@ handleUnicode = T.concatMap $ \c -> -- | Escape special characters. escapeSpecial :: Text -> Text -escapeSpecial = escapeStringUsing $ - [ ('\t',"\\tab ") - , ('\8216',"\\u8216'") - , ('\8217',"\\u8217'") - , ('\8220',"\\u8220\"") - , ('\8221',"\\u8221\"") - , ('\8211',"\\u8211-") - , ('\8212',"\\u8212-") - ] <> backslashEscapes "{\\}" +escapeSpecial t + | T.all isAlphaNum t = t + | otherwise = T.concatMap escChar t + where + escChar '\t' = "\\tab " + escChar '\8216' = "\\u8216'" + escChar '\8217' = "\\u8217'" + escChar '\8220' = "\\u8220\"" + escChar '\8221' = "\\u8221\"" + escChar '\8211' = "\\u8211-" + escChar '\8212' = "\\u8212-" + escChar '{' = "\\{" + escChar '}' = "\\}" + escChar '\\' = "\\\\" + escChar c = T.singleton c -- | Escape strings as needed for rich text format. stringToRTF :: Text -> Text diff --git a/src/Text/Pandoc/Writers/Texinfo.hs b/src/Text/Pandoc/Writers/Texinfo.hs index 0146fdfd8..6a33b4283 100644 --- a/src/Text/Pandoc/Writers/Texinfo.hs +++ b/src/Text/Pandoc/Writers/Texinfo.hs @@ -14,7 +14,7 @@ Conversion of 'Pandoc' format into Texinfo. module Text.Pandoc.Writers.Texinfo ( writeTexinfo ) where import Control.Monad.Except (throwError) import Control.Monad.State.Strict -import Data.Char (chr, ord) +import Data.Char (chr, ord, isAlphaNum) import Data.List (maximumBy, transpose, foldl') import Data.List.NonEmpty (nonEmpty) import Data.Ord (comparing) @@ -85,16 +85,18 @@ pandocToTexinfo options (Pandoc meta blocks) = do -- | Escape things as needed for Texinfo. stringToTexinfo :: Text -> Text -stringToTexinfo = escapeStringUsing texinfoEscapes - where texinfoEscapes = [ ('{', "@{") - , ('}', "@}") - , ('@', "@@") - , ('\160', "@ ") - , ('\x2014', "---") - , ('\x2013', "--") - , ('\x2026', "@dots{}") - , ('\x2019', "'") - ] +stringToTexinfo t + | T.all isAlphaNum t = t + | otherwise = T.concatMap escChar t + where escChar '{' = "@{" + escChar '}' = "@}" + escChar '@' = "@@" + escChar '\160' = "@ " + escChar '\x2014' = "---" + escChar '\x2013' = "--" + escChar '\x2026' = "@dots{}" + escChar '\x2019' = "'" + escChar c = T.singleton c escapeCommas :: PandocMonad m => TI m (Doc Text) -> TI m (Doc Text) escapeCommas parser = do -- cgit v1.2.3 From 82e8c29cb0a89d7129f459bef6696254ec56e0c6 Mon Sep 17 00:00:00 2001 From: Erik Rask <erik.rask@paligo.net> Date: Fri, 19 Feb 2021 13:05:35 +0100 Subject: Include Header.Attr.attributes as XML attributes on section Add key-value pairs found in the attributes list of Header.Attr as XML attributes on the corresponding section element. Any key name not allowed as an XML attribute name is dropped, as are keys with invalid values where they are defined as enums in DocBook, and xml:id (for DocBook 5)/id (for DocBook 4) to not intervene with computed identifiers. --- src/Text/Pandoc/Writers/Docbook.hs | 47 ++++++++++++++++++++++++++++++++++++-- test/Tests/Writers/Docbook.hs | 37 ++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index a6776608d..1f10c9d04 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -168,7 +168,7 @@ blockToDocbook :: PandocMonad m => WriterOptions -> Block -> DB m (Doc Text) blockToDocbook _ Null = return empty -- Add ids to paragraphs in divs with ids - this is needed for -- pandoc-citeproc to get link anchors in bibliographies: -blockToDocbook opts (Div (id',"section":_,_) (Header lvl _ ils : xs)) = do +blockToDocbook opts (Div (id',"section":_,_) (Header lvl (_,_,attrs) ils : xs)) = do version <- ask -- Docbook doesn't allow sections with no content, so insert some if needed let bs = if null xs @@ -188,7 +188,10 @@ blockToDocbook opts (Div (id',"section":_,_) (Header lvl _ ils : xs)) = do -- standalone documents will include them in the template. then [("xmlns", "http://docbook.org/ns/docbook"),("xmlns:xlink", "http://www.w3.org/1999/xlink")] else [] - attribs = nsAttr <> idAttr + + -- Populate miscAttr with Header.Attr.attributes, filtering out non-valid DocBook section attributes, id, and xml:id + miscAttr = filter (isSectionAttr version) attrs + attribs = nsAttr <> idAttr <> miscAttr title' <- inlinesToDocbook opts ils contents <- blocksToDocbook opts bs return $ inTags True tag attribs $ inTagsSimple "title" title' $$ contents @@ -451,3 +454,43 @@ idAndRole (id',cls,_) = ident <> role where ident = [("id", id') | not (T.null id')] role = [("role", T.unwords cls) | not (null cls)] + +isSectionAttr :: DocBookVersion -> (Text, Text) -> Bool +isSectionAttr _ ("label",_) = True +isSectionAttr _ ("status",_) = True +isSectionAttr DocBook5 ("annotations",_) = True +isSectionAttr DocBook5 ("dir","ltr") = True +isSectionAttr DocBook5 ("dir","rtl") = True +isSectionAttr DocBook5 ("dir","lro") = True +isSectionAttr DocBook5 ("dir","rlo") = True +isSectionAttr _ ("remap",_) = True +isSectionAttr _ ("revisionflag","changed") = True +isSectionAttr _ ("revisionflag","added") = True +isSectionAttr _ ("revisionflag","deleted") = True +isSectionAttr _ ("revisionflag","off") = True +isSectionAttr _ ("role",_) = True +isSectionAttr DocBook5 ("version",_) = True +isSectionAttr DocBook5 ("xml:base",_) = True +isSectionAttr DocBook5 ("xml:lang",_) = True +isSectionAttr _ ("xreflabel",_) = True +isSectionAttr DocBook5 ("linkend",_) = True +isSectionAttr DocBook5 ("linkends",_) = True +isSectionAttr DocBook5 ("xlink:actuate",_) = True +isSectionAttr DocBook5 ("xlink:arcrole",_) = True +isSectionAttr DocBook5 ("xlink:from",_) = True +isSectionAttr DocBook5 ("xlink:href",_) = True +isSectionAttr DocBook5 ("xlink:label",_) = True +isSectionAttr DocBook5 ("xlink:role",_) = True +isSectionAttr DocBook5 ("xlink:show",_) = True +isSectionAttr DocBook5 ("xlink:title",_) = True +isSectionAttr DocBook5 ("xlink:to",_) = True +isSectionAttr DocBook5 ("xlink:type",_) = True +isSectionAttr DocBook4 ("arch",_) = True +isSectionAttr DocBook4 ("condition",_) = True +isSectionAttr DocBook4 ("conformance",_) = True +isSectionAttr DocBook4 ("lang",_) = True +isSectionAttr DocBook4 ("os",_) = True +isSectionAttr DocBook4 ("revision",_) = True +isSectionAttr DocBook4 ("security",_) = True +isSectionAttr DocBook4 ("vendor",_) = True +isSectionAttr _ (_,_) = False \ No newline at end of file diff --git a/test/Tests/Writers/Docbook.hs b/test/Tests/Writers/Docbook.hs index 842aed7ae..46203eeae 100644 --- a/test/Tests/Writers/Docbook.hs +++ b/test/Tests/Writers/Docbook.hs @@ -11,9 +11,14 @@ import Text.Pandoc.Builder docbook :: (ToPandoc a) => a -> String docbook = docbookWithOpts def{ writerWrapText = WrapNone } +docbook5 :: (ToPandoc a) => a -> String +docbook5 = docbook5WithOpts def{ writerWrapText = WrapNone } + docbookWithOpts :: ToPandoc a => WriterOptions -> a -> String docbookWithOpts opts = unpack . purely (writeDocbook4 opts) . toPandoc +docbook5WithOpts :: ToPandoc a => WriterOptions -> a -> String +docbook5WithOpts opts = unpack . purely (writeDocbook5 opts) . toPandoc {- "my test" =: X =?> Y @@ -366,4 +371,36 @@ tests = [ testGroup "line blocks" ] ] ] + , testGroup "section attributes" $ + let + headers = headerWith ("myid1",[],[("role","internal"),("xml:id","anotherid"),("dir","rtl")]) 1 "header1" + <> headerWith ("myid2",[],[("invalidname","value"),("arch","linux"),("dir","invaliddir")]) 1 "header2" + in + [ test docbook5 "sections with attributes (db5)" $ + headers =?> + unlines [ "<section xmlns=\"http://docbook.org/ns/docbook\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:id=\"myid1\" role=\"internal\" dir=\"rtl\">" + , " <title>header1</title>" + , " <para>" + , " </para>" + , "</section>" + , "<section xmlns=\"http://docbook.org/ns/docbook\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:id=\"myid2\">" + , " <title>header2</title>" + , " <para>" + , " </para>" + , "</section>" + ] + , test docbook "sections with attributes (db4)" $ + headers =?> + unlines [ "<sect1 id=\"myid1\" role=\"internal\">" + , " <title>header1</title>" + , " <para>" + , " </para>" + , "</sect1>" + , "<sect1 id=\"myid2\" arch=\"linux\">" + , " <title>header2</title>" + , " <para>" + , " </para>" + , "</sect1>" + ] + ] ] -- cgit v1.2.3 From 84d8f3efd8a45194752209b7d55b18a2291ed1ae Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 20 Mar 2021 10:07:51 -0700 Subject: RST writer: use NonEmpty for init, last. --- src/Text/Pandoc/Writers/RST.hs | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/RST.hs b/src/Text/Pandoc/Writers/RST.hs index 0b9fc8331..983ef412a 100644 --- a/src/Text/Pandoc/Writers/RST.hs +++ b/src/Text/Pandoc/Writers/RST.hs @@ -17,7 +17,7 @@ module Text.Pandoc.Writers.RST ( writeRST, flatten ) where import Control.Monad.State.Strict import Data.Char (isSpace) import Data.List (transpose, intersperse, foldl') -import Data.List.NonEmpty (nonEmpty) +import qualified Data.List.NonEmpty as NE import Data.Maybe (fromMaybe) import qualified Data.Text as T import Data.Text (Text) @@ -336,7 +336,7 @@ blockToRST (OrderedList (start, style', delim) items) = do then replicate (length items) "#." else take (length items) $ orderedListMarkers (start, style', delim) - let maxMarkerLength = maybe 0 maximum $ nonEmpty $ map T.length markers + let maxMarkerLength = maybe 0 maximum $ NE.nonEmpty $ map T.length markers let markers' = map (\m -> let s = maxMarkerLength - T.length m in m <> T.replicate s " ") markers contents <- zipWithM orderedListItemToRST markers' items @@ -538,11 +538,15 @@ flatten outer collapse f i = appendToLast f $ dropInlineParent i appendToLast :: [Inline] -> [Inline] -> [Inline] - appendToLast [] toAppend = [setInlineChildren outer toAppend] - appendToLast flattened toAppend - | isOuter lastFlat = init flattened <> [appendTo lastFlat toAppend] - | otherwise = flattened <> [setInlineChildren outer toAppend] - where lastFlat = last flattened + appendToLast flattened toAppend = + case NE.nonEmpty flattened of + Nothing -> [setInlineChildren outer toAppend] + Just xs -> + if isOuter lastFlat + then NE.init xs <> [appendTo lastFlat toAppend] + else flattened <> [setInlineChildren outer toAppend] + where + lastFlat = NE.last xs appendTo o i = mapNested (<> i) o isOuter i = emptyParent i == emptyParent outer emptyParent i = setInlineChildren i [] @@ -762,7 +766,7 @@ simpleTable opts blocksToDoc headers rows = do then return [] else fixEmpties <$> mapM (blocksToDoc opts) headers rowDocs <- mapM (fmap fixEmpties . mapM (blocksToDoc opts)) rows - let numChars = maybe 0 maximum . nonEmpty . map offset + let numChars = maybe 0 maximum . NE.nonEmpty . map offset let colWidths = map numChars $ transpose (headerDocs : rowDocs) let toRow = mconcat . intersperse (lblock 1 " ") . zipWith lblock colWidths let hline = nowrap $ hsep (map (\n -> literal (T.replicate n "=")) colWidths) -- cgit v1.2.3 From 4d041953f56b85d4db241cea11c764856ccbeebe Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 20 Mar 2021 10:41:12 -0700 Subject: T.P.Readers.Metadata: made `yamlBsToMeta`, `yamlBsToRefs` polymorphic... on the parser state, instead of requiring ParserState. [API change] --- src/Text/Pandoc/Readers/Metadata.hs | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs index 927291776..f4a27496f 100644 --- a/src/Text/Pandoc/Readers/Metadata.hs +++ b/src/Text/Pandoc/Readers/Metadata.hs @@ -31,10 +31,10 @@ import Text.Pandoc.Error import Text.Pandoc.Parsing hiding (tableWith) import Text.Pandoc.Shared -yamlBsToMeta :: PandocMonad m - => ParserT Text ParserState m (F MetaValue) +yamlBsToMeta :: (PandocMonad m, HasLastStrPosition st) + => ParserT Text st m (Future st MetaValue) -> BL.ByteString - -> ParserT Text ParserState m (F Meta) + -> ParserT Text st m (Future st Meta) yamlBsToMeta pMetaValue bstr = do case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of Right (YAML.Doc (YAML.Mapping _ _ o):_) @@ -63,11 +63,11 @@ lookupYAML t (YAML.Mapping _ _ m) = lookupYAML _ _ = Nothing -- Returns filtered list of references. -yamlBsToRefs :: PandocMonad m - => ParserT Text ParserState m (F MetaValue) +yamlBsToRefs :: (PandocMonad m, HasLastStrPosition st) + => ParserT Text st m (Future st MetaValue) -> (Text -> Bool) -- ^ Filter for id -> BL.ByteString - -> ParserT Text ParserState m (F [MetaValue]) + -> ParserT Text st m (Future st [MetaValue]) yamlBsToRefs pMetaValue idpred bstr = case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of Right (YAML.Doc o@YAML.Mapping{}:_) @@ -104,10 +104,10 @@ nodeToKey (YAML.Scalar _ (YAML.SStr t)) = Just t nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = Just t nodeToKey _ = Nothing -normalizeMetaValue :: PandocMonad m - => ParserT Text ParserState m (F MetaValue) +normalizeMetaValue :: (PandocMonad m, HasLastStrPosition st) + => ParserT Text st m (Future st MetaValue) -> Text - -> ParserT Text ParserState m (F MetaValue) + -> ParserT Text st m (Future st MetaValue) normalizeMetaValue pMetaValue x = -- Note: a standard quoted or unquoted YAML value will -- not end in a newline, but a "block" set off with @@ -129,10 +129,10 @@ checkBoolean t | t == T.pack "false" || t == T.pack "False" || t == T.pack "FALSE" = Just False | otherwise = Nothing -yamlToMetaValue :: PandocMonad m - => ParserT Text ParserState m (F MetaValue) +yamlToMetaValue :: (PandocMonad m, HasLastStrPosition st) + => ParserT Text st m (Future st MetaValue) -> YAML.Node YE.Pos - -> ParserT Text ParserState m (F MetaValue) + -> ParserT Text st m (Future st MetaValue) yamlToMetaValue pMetaValue (YAML.Scalar _ x) = case x of YAML.SStr t -> normalizeMetaValue pMetaValue t @@ -152,10 +152,10 @@ yamlToMetaValue pMetaValue (YAML.Mapping _ _ o) = fmap MetaMap <$> yamlMap pMetaValue o yamlToMetaValue _ _ = return $ return $ MetaString "" -yamlMap :: PandocMonad m - => ParserT Text ParserState m (F MetaValue) +yamlMap :: (PandocMonad m, HasLastStrPosition st) + => ParserT Text st m (Future st MetaValue) -> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos) - -> ParserT Text ParserState m (F (M.Map Text MetaValue)) + -> ParserT Text st m (Future st (M.Map Text MetaValue)) yamlMap pMetaValue o = do kvs <- forM (M.toList o) $ \(key, v) -> do k <- maybe (throwError $ PandocParseError -- cgit v1.2.3 From ce418667ae8a3e6e5bbf2523eef43edf4f803bcf Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 20 Mar 2021 10:41:55 -0700 Subject: Text.Pandoc.Parsing: remove F type synonym. Muse and Org were defining their own F anyway, with their own state. We therefore move this definition to the Markdown reader. --- src/Text/Pandoc/App/Opt.hs | 3 ++- src/Text/Pandoc/Parsing.hs | 7 ++----- src/Text/Pandoc/Readers/Markdown.hs | 2 ++ src/Text/Pandoc/Readers/Muse.hs | 2 +- src/Text/Pandoc/Readers/Org/Parsing.hs | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs index e5aaec9c5..c72f63464 100644 --- a/src/Text/Pandoc/App/Opt.hs +++ b/src/Text/Pandoc/App/Opt.hs @@ -686,7 +686,8 @@ yamlToMeta (Mapping _ _ m) = either (fail . show) return $ runEverything (yamlMap pMetaString m) where pMetaString = pure . MetaString <$> P.manyChar P.anyChar - runEverything p = runPure (P.readWithM p def "") + runEverything p = + runPure (P.readWithM p (def :: P.ParserState) "") >>= fmap (Meta . flip P.runF def) yamlToMeta _ = return mempty diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 10a08d410..44e6af59e 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -112,7 +112,6 @@ module Text.Pandoc.Parsing ( take1WhileP, citeKey, Parser, ParserT, - F, Future(..), runF, askF, @@ -229,8 +228,6 @@ type ParserT = ParsecT newtype Future s a = Future { runDelayed :: Reader s a } deriving (Monad, Applicative, Functor) -type F = Future ParserState - runF :: Future s a -> s -> a runF = runReader . runDelayed @@ -1169,7 +1166,7 @@ data ParserState = ParserState stateInNote :: Bool, -- ^ True if parsing note contents stateNoteNumber :: Int, -- ^ Last note number for citations stateMeta :: Meta, -- ^ Document metadata - stateMeta' :: F Meta, -- ^ Document metadata + stateMeta' :: Future ParserState Meta, -- ^ Document metadata stateCitations :: M.Map Text Text, -- ^ RST-style citations stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used stateIdentifiers :: Set.Set Text, -- ^ Header identifiers used @@ -1348,7 +1345,7 @@ data QuoteContext type NoteTable = [(Text, Text)] -type NoteTable' = M.Map Text (SourcePos, F Blocks) +type NoteTable' = M.Map Text (SourcePos, Future ParserState Blocks) -- used in markdown reader newtype Key = Key Text deriving (Show, Read, Eq, Ord) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index a86286b3a..7c557b5a7 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -52,6 +52,8 @@ import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs) type MarkdownParser m = ParserT Text ParserState m +type F = Future ParserState + -- | Read markdown from an input string and return a Pandoc document. readMarkdown :: PandocMonad m => ReaderOptions -- ^ Reader options diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index b4eea9d3a..698bfd3d7 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -36,7 +36,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Error (PandocError (PandocParsecError)) import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Parsing hiding (F) +import Text.Pandoc.Parsing import Text.Pandoc.Shared (crFilter, trimr, tshow) -- | Read Muse from an input string and return a Pandoc document. diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index d33920d47..6ed24a602 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -106,8 +106,8 @@ module Text.Pandoc.Readers.Org.Parsing import Data.Text (Text) import Text.Pandoc.Readers.Org.ParserState -import Text.Pandoc.Parsing hiding (F, anyLine, blanklines, newline, - parseFromString) +import Text.Pandoc.Parsing hiding (anyLine, blanklines, newline, + parseFromString) import qualified Text.Pandoc.Parsing as P import Control.Monad (guard) -- cgit v1.2.3 From bea86f394e6bc77d7441bb98f10a8b8ccfee04c9 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 20 Mar 2021 11:23:41 -0700 Subject: Markdown reader: export `yamlMetaBlock`. [API change] This will allow us to parse YAML metadata blocks in other readers, potentially. --- src/Text/Pandoc/Readers/Markdown.hs | 40 +++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 7c557b5a7..8d68510c5 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -15,6 +15,7 @@ Conversion of markdown-formatted plain text to 'Pandoc' document. -} module Text.Pandoc.Readers.Markdown ( readMarkdown, + yamlMetaBlock, yamlToMeta, yamlToRefs ) where @@ -274,24 +275,29 @@ pandocTitleBlock = do $ nullMeta updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } -yamlMetaBlock :: PandocMonad m => MarkdownParser m (F Blocks) -yamlMetaBlock = do +yamlMetaBlock :: (HasLastStrPosition st, PandocMonad m) + => ParserT Text st m (Future st Blocks) + -> ParserT Text st m (Future st Meta) +yamlMetaBlock parser = try $ do + string "---" + blankline + notFollowedBy blankline -- if --- is followed by a blank it's an HRULE + rawYamlLines <- manyTill anyLine stopLine + -- by including --- and ..., we allow yaml blocks with just comments: + let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."])) + optional blanklines + yamlBsToMeta (fmap B.toMetaValue <$> parser) + $ UTF8.fromTextLazy $ TL.fromStrict rawYaml + +yamlMetaBlock' :: PandocMonad m => MarkdownParser m (F Blocks) +yamlMetaBlock' = do guardEnabled Ext_yaml_metadata_block - try $ do - string "---" - blankline - notFollowedBy blankline -- if --- is followed by a blank it's an HRULE - rawYamlLines <- manyTill anyLine stopLine - -- by including --- and ..., we allow yaml blocks with just comments: - let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."])) - optional blanklines - newMetaF <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks) - $ UTF8.fromTextLazy $ TL.fromStrict rawYaml - -- Since `<>` is left-biased, existing values are not touched: - updateState $ \st -> st{ stateMeta' = stateMeta' st <> newMetaF } - return mempty + newMetaF <- yamlMetaBlock parseBlocks + -- Since `<>` is left-biased, existing values are not touched: + updateState $ \st -> st{ stateMeta' = stateMeta' st <> newMetaF } + return mempty -stopLine :: PandocMonad m => MarkdownParser m () +stopLine :: PandocMonad m => ParserT Text st m () stopLine = try $ (string "---" <|> string "...") >> blankline >> return () mmdTitleBlock :: PandocMonad m => MarkdownParser m () @@ -456,7 +462,7 @@ block :: PandocMonad m => MarkdownParser m (F Blocks) block = do res <- choice [ mempty <$ blanklines , codeBlockFenced - , yamlMetaBlock + , yamlMetaBlock' -- note: bulletList needs to be before header because of -- the possibility of empty list items: - , bulletList -- cgit v1.2.3 From 2274eb88a4dddf622d86bee94bb6f20db6e148b2 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 20 Mar 2021 11:37:09 -0700 Subject: Move yamlMetaBlock from Markdown reader to T.P.Readers.Metadata. --- src/Text/Pandoc/Readers/Markdown.hs | 24 ++---------------------- src/Text/Pandoc/Readers/Metadata.hs | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 8d68510c5..6c3947a81 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -15,7 +15,6 @@ Conversion of markdown-formatted plain text to 'Pandoc' document. -} module Text.Pandoc.Readers.Markdown ( readMarkdown, - yamlMetaBlock, yamlToMeta, yamlToRefs ) where @@ -29,7 +28,6 @@ import Data.Maybe import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T -import qualified Data.Text.Lazy as TL import qualified Data.ByteString.Lazy as BL import System.FilePath (addExtension, takeExtension) import Text.HTML.TagSoup hiding (Row) @@ -47,9 +45,8 @@ import Text.Pandoc.Readers.HTML (htmlInBalanced, htmlTag, isBlockTag, isCommentTag, isInlineTag, isTextTag) import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline) import Text.Pandoc.Shared -import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.XML (fromEntities) -import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs) +import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs, yamlMetaBlock) type MarkdownParser m = ParserT Text ParserState m @@ -275,31 +272,14 @@ pandocTitleBlock = do $ nullMeta updateState $ \st -> st{ stateMeta' = stateMeta' st <> meta' } -yamlMetaBlock :: (HasLastStrPosition st, PandocMonad m) - => ParserT Text st m (Future st Blocks) - -> ParserT Text st m (Future st Meta) -yamlMetaBlock parser = try $ do - string "---" - blankline - notFollowedBy blankline -- if --- is followed by a blank it's an HRULE - rawYamlLines <- manyTill anyLine stopLine - -- by including --- and ..., we allow yaml blocks with just comments: - let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."])) - optional blanklines - yamlBsToMeta (fmap B.toMetaValue <$> parser) - $ UTF8.fromTextLazy $ TL.fromStrict rawYaml - yamlMetaBlock' :: PandocMonad m => MarkdownParser m (F Blocks) yamlMetaBlock' = do guardEnabled Ext_yaml_metadata_block - newMetaF <- yamlMetaBlock parseBlocks + newMetaF <- yamlMetaBlock (fmap B.toMetaValue <$> parseBlocks) -- Since `<>` is left-biased, existing values are not touched: updateState $ \st -> st{ stateMeta' = stateMeta' st <> newMetaF } return mempty -stopLine :: PandocMonad m => ParserT Text st m () -stopLine = try $ (string "---" <|> string "...") >> blankline >> return () - mmdTitleBlock :: PandocMonad m => MarkdownParser m () mmdTitleBlock = do guardEnabled Ext_mmd_title_block diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs index f4a27496f..cb141cba5 100644 --- a/src/Text/Pandoc/Readers/Metadata.hs +++ b/src/Text/Pandoc/Readers/Metadata.hs @@ -14,6 +14,7 @@ Parse YAML/JSON metadata to 'Pandoc' 'Meta'. module Text.Pandoc.Readers.Metadata ( yamlBsToMeta, yamlBsToRefs, + yamlMetaBlock, yamlMap ) where import Control.Monad @@ -30,6 +31,8 @@ import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Parsing hiding (tableWith) import Text.Pandoc.Shared +import qualified Data.Text.Lazy as TL +import qualified Text.Pandoc.UTF8 as UTF8 yamlBsToMeta :: (PandocMonad m, HasLastStrPosition st) => ParserT Text st m (Future st MetaValue) @@ -171,3 +174,20 @@ yamlMap pMetaValue o = do return $ do v' <- fv return (k, v') + +-- | Parse a YAML metadata block using the supplied 'MetaValue' parser. +yamlMetaBlock :: (HasLastStrPosition st, PandocMonad m) + => ParserT Text st m (Future st MetaValue) + -> ParserT Text st m (Future st Meta) +yamlMetaBlock parser = try $ do + string "---" + blankline + notFollowedBy blankline -- if --- is followed by a blank it's an HRULE + rawYamlLines <- manyTill anyLine stopLine + -- by including --- and ..., we allow yaml blocks with just comments: + let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."])) + optional blanklines + yamlBsToMeta parser $ UTF8.fromTextLazy $ TL.fromStrict rawYaml + +stopLine :: Monad m => ParserT Text st m () +stopLine = try $ (string "---" <|> string "...") >> blankline >> return () -- cgit v1.2.3 From c389211e2f678d9327cd2c008d54a1e438f07a07 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 20 Mar 2021 15:45:09 -0700 Subject: Support `yaml_metadata_block` extension form commonmark, gfm. This is a bit more limited than with markdown, as documented in the manual: - The YAML block must be the first thing in the input. - The leaf notes are parsed in isolation from the rest of the document. So, for example, you can't use reference links if the references are defined later in the document. Closes #6537. --- MANUAL.txt | 16 ++++++++++++++++ src/Text/Pandoc/Extensions.hs | 3 +++ src/Text/Pandoc/Parsing.hs | 2 +- src/Text/Pandoc/Readers/CommonMark.hs | 30 ++++++++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/MANUAL.txt b/MANUAL.txt index bc9e226e9..845d1dbba 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -4240,6 +4240,22 @@ will be interpreted as markdown. For example: \renewcommand{\section}[1]{\clearpage\oldsection{#1}} ``` +Note: the `yaml_metadata_block` extension works with +`commonmark` as well as `markdown` (and it is enabled by default +in `gfm` and `commonmark_x`). However, in these formats the +following restrictions apply: + +- The YAML metadata block must occur at the beginning of the + document (and there can be only one). If multiple files are + given as arguments to pandoc, only the first can be a YAML + metadata block. + +- The leaf nodes of the YAML structure are parsed in isolation from + each other and from the rest of the document. So, for + example, you can't use a reference link in these contexts + if the link definition is somewhere else in the document. + + ## Backslash escapes #### Extension: `all_symbols_escapable` #### diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 266a09e3c..6423d5f56 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -354,6 +354,7 @@ getDefaultExtensions "gfm" = extensionsFromList , Ext_strikeout , Ext_task_lists , Ext_emoji + , Ext_yaml_metadata_block ] getDefaultExtensions "commonmark" = extensionsFromList [Ext_raw_html] @@ -379,6 +380,7 @@ getDefaultExtensions "commonmark_x" = extensionsFromList , Ext_raw_attribute , Ext_implicit_header_references , Ext_attributes + , Ext_yaml_metadata_block ] getDefaultExtensions "org" = extensionsFromList [Ext_citations, @@ -511,6 +513,7 @@ getAllExtensions f = universalExtensions <> getAll f , Ext_implicit_header_references , Ext_attributes , Ext_sourcepos + , Ext_yaml_metadata_block ] getAll "commonmark_x" = getAll "commonmark" getAll "org" = autoIdExtensions <> diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 44e6af59e..0c2078721 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -517,7 +517,7 @@ parseFromString :: (Stream s m Char, IsString s) -> ParserT s st m r parseFromString parser str = do oldPos <- getPosition - setPosition $ initialPos "chunk" + setPosition $ initialPos " chunk" oldInput <- getInput setInput $ fromString $ T.unpack str result <- parser diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index 150a837e4..244f77940 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -26,13 +26,43 @@ import Text.Pandoc.Definition import Text.Pandoc.Builder as B import Text.Pandoc.Options import Text.Pandoc.Error +import Text.Pandoc.Readers.Metadata (yamlMetaBlock) import Control.Monad.Except import Data.Functor.Identity (runIdentity) import Data.Typeable +import Text.Pandoc.Parsing (runParserT, getPosition, sourceLine, + runF, defaultParserState, take1WhileP, option) +import qualified Data.Text as T -- | Parse a CommonMark formatted string into a 'Pandoc' structure. readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readCommonMark opts s + | isEnabled Ext_yaml_metadata_block opts + , "---" `T.isPrefixOf` s = do + let metaValueParser = do + inp <- option "" $ take1WhileP (const True) + case runIdentity + (commonmarkWith (specFor opts) "metadata value" inp) of + Left _ -> mzero + Right (Cm bls :: Cm () Blocks) + -> return $ return $ B.toMetaValue bls + res <- runParserT (do meta <- yamlMetaBlock metaValueParser + pos <- getPosition + return (meta, pos)) + defaultParserState "YAML metadata" s + case res of + Left _ -> readCommonMarkBody opts s + Right (meta, pos) -> do + let dropLines 0 = id + dropLines n = dropLines (n - 1) . T.drop 1 . T.dropWhile (/='\n') + let metaLines = sourceLine pos - 1 + let body = T.replicate metaLines "\n" <> dropLines metaLines s + Pandoc _ bs <- readCommonMarkBody opts body + return $ Pandoc (runF meta defaultParserState) bs + | otherwise = readCommonMarkBody opts s + +readCommonMarkBody :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readCommonMarkBody opts s | isEnabled Ext_sourcepos opts = case runIdentity (commonmarkWith (specFor opts) "" s) of Left err -> throwError $ PandocParsecError s err -- cgit v1.2.3 From 052056289fc6f884a2a8799dacca64a16248a5c2 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 21 Mar 2021 23:38:47 -0700 Subject: Simplify T.P.Asciify and export toAsciiText [API change]. Instead of encoding a giant (and incomplete) map, we now just use unicode-transforms to normalize the text to a canonical decomposition, and manipulate the result. The new `toAsciiText` is equivalent to the old `T.pack . mapMaybe toAsciiChar . T.unpack` but should be faster. --- src/Text/Pandoc/Asciify.hs | 403 ++------------------------------------------- src/Text/Pandoc/Parsing.hs | 6 +- src/Text/Pandoc/Shared.hs | 4 +- 3 files changed, 18 insertions(+), 395 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Asciify.hs b/src/Text/Pandoc/Asciify.hs index 01a7b624a..620546c13 100644 --- a/src/Text/Pandoc/Asciify.hs +++ b/src/Text/Pandoc/Asciify.hs @@ -10,396 +10,19 @@ Function to convert accented latin letters to their unaccented ascii equivalents (used in constructing HTML identifiers). -} -module Text.Pandoc.Asciify (toAsciiChar) +module Text.Pandoc.Asciify (toAsciiChar, toAsciiText) where -import Data.Char (isAscii) -import qualified Data.Map as M +import Data.Char (isAscii, isMark) +import qualified Data.Text.Normalize as TN +import Data.Text (Text) +import qualified Data.Text as T -toAsciiChar :: Char -> Maybe Char -toAsciiChar c | isAscii c = Just c - | otherwise = M.lookup c asciiMap +toAsciiText :: Text -> Text +toAsciiText = T.filter isAscii . TN.normalize (TN.NFD) -asciiMap :: M.Map Char Char -asciiMap = M.fromList - [('\192','A') - ,('\193','A') - ,('\194','A') - ,('\195','A') - ,('\196','A') - ,('\197','A') - ,('\199','C') - ,('\200','E') - ,('\201','E') - ,('\202','E') - ,('\203','E') - ,('\204','I') - ,('\205','I') - ,('\206','I') - ,('\207','I') - ,('\209','N') - ,('\210','O') - ,('\211','O') - ,('\212','O') - ,('\213','O') - ,('\214','O') - ,('\217','U') - ,('\218','U') - ,('\219','U') - ,('\220','U') - ,('\221','Y') - ,('\224','a') - ,('\225','a') - ,('\226','a') - ,('\227','a') - ,('\228','a') - ,('\229','a') - ,('\231','c') - ,('\232','e') - ,('\233','e') - ,('\234','e') - ,('\235','e') - ,('\236','i') - ,('\237','i') - ,('\238','i') - ,('\239','i') - ,('\241','n') - ,('\242','o') - ,('\243','o') - ,('\244','o') - ,('\245','o') - ,('\246','o') - ,('\249','u') - ,('\250','u') - ,('\251','u') - ,('\252','u') - ,('\253','y') - ,('\255','y') - ,('\256','A') - ,('\257','a') - ,('\258','A') - ,('\259','a') - ,('\260','A') - ,('\261','a') - ,('\262','C') - ,('\263','c') - ,('\264','C') - ,('\265','c') - ,('\266','C') - ,('\267','c') - ,('\268','C') - ,('\269','c') - ,('\270','D') - ,('\271','d') - ,('\274','E') - ,('\275','e') - ,('\276','E') - ,('\277','e') - ,('\278','E') - ,('\279','e') - ,('\280','E') - ,('\281','e') - ,('\282','E') - ,('\283','e') - ,('\284','G') - ,('\285','g') - ,('\286','G') - ,('\287','g') - ,('\288','G') - ,('\289','g') - ,('\290','G') - ,('\291','g') - ,('\292','H') - ,('\293','h') - ,('\296','I') - ,('\297','i') - ,('\298','I') - ,('\299','i') - ,('\300','I') - ,('\301','i') - ,('\302','I') - ,('\303','i') - ,('\304','I') - ,('\305','i') - ,('\308','J') - ,('\309','j') - ,('\310','K') - ,('\311','k') - ,('\313','L') - ,('\314','l') - ,('\315','L') - ,('\316','l') - ,('\317','L') - ,('\318','l') - ,('\323','N') - ,('\324','n') - ,('\325','N') - ,('\326','n') - ,('\327','N') - ,('\328','n') - ,('\332','O') - ,('\333','o') - ,('\334','O') - ,('\335','o') - ,('\336','O') - ,('\337','o') - ,('\340','R') - ,('\341','r') - ,('\342','R') - ,('\343','r') - ,('\344','R') - ,('\345','r') - ,('\346','S') - ,('\347','s') - ,('\348','S') - ,('\349','s') - ,('\350','S') - ,('\351','s') - ,('\352','S') - ,('\353','s') - ,('\354','T') - ,('\355','t') - ,('\356','T') - ,('\357','t') - ,('\360','U') - ,('\361','u') - ,('\362','U') - ,('\363','u') - ,('\364','U') - ,('\365','u') - ,('\366','U') - ,('\367','u') - ,('\368','U') - ,('\369','u') - ,('\370','U') - ,('\371','u') - ,('\372','W') - ,('\373','w') - ,('\374','Y') - ,('\375','y') - ,('\376','Y') - ,('\377','Z') - ,('\378','z') - ,('\379','Z') - ,('\380','z') - ,('\381','Z') - ,('\382','z') - ,('\416','O') - ,('\417','o') - ,('\431','U') - ,('\432','u') - ,('\461','A') - ,('\462','a') - ,('\463','I') - ,('\464','i') - ,('\465','O') - ,('\466','o') - ,('\467','U') - ,('\468','u') - ,('\486','G') - ,('\487','g') - ,('\488','K') - ,('\489','k') - ,('\490','O') - ,('\491','o') - ,('\496','j') - ,('\500','G') - ,('\501','g') - ,('\504','N') - ,('\505','n') - ,('\512','A') - ,('\513','a') - ,('\514','A') - ,('\515','a') - ,('\516','E') - ,('\517','e') - ,('\518','E') - ,('\519','e') - ,('\520','I') - ,('\521','i') - ,('\522','I') - ,('\523','i') - ,('\524','O') - ,('\525','o') - ,('\526','O') - ,('\527','o') - ,('\528','R') - ,('\529','r') - ,('\530','R') - ,('\531','r') - ,('\532','U') - ,('\533','u') - ,('\534','U') - ,('\535','u') - ,('\536','S') - ,('\537','s') - ,('\538','T') - ,('\539','t') - ,('\542','H') - ,('\543','h') - ,('\550','A') - ,('\551','a') - ,('\552','E') - ,('\553','e') - ,('\558','O') - ,('\559','o') - ,('\562','Y') - ,('\563','y') - ,('\894',';') - ,('\7680','A') - ,('\7681','a') - ,('\7682','B') - ,('\7683','b') - ,('\7684','B') - ,('\7685','b') - ,('\7686','B') - ,('\7687','b') - ,('\7690','D') - ,('\7691','d') - ,('\7692','D') - ,('\7693','d') - ,('\7694','D') - ,('\7695','d') - ,('\7696','D') - ,('\7697','d') - ,('\7698','D') - ,('\7699','d') - ,('\7704','E') - ,('\7705','e') - ,('\7706','E') - ,('\7707','e') - ,('\7710','F') - ,('\7711','f') - ,('\7712','G') - ,('\7713','g') - ,('\7714','H') - ,('\7715','h') - ,('\7716','H') - ,('\7717','h') - ,('\7718','H') - ,('\7719','h') - ,('\7720','H') - ,('\7721','h') - ,('\7722','H') - ,('\7723','h') - ,('\7724','I') - ,('\7725','i') - ,('\7728','K') - ,('\7729','k') - ,('\7730','K') - ,('\7731','k') - ,('\7732','K') - ,('\7733','k') - ,('\7734','L') - ,('\7735','l') - ,('\7738','L') - ,('\7739','l') - ,('\7740','L') - ,('\7741','l') - ,('\7742','M') - ,('\7743','m') - ,('\7744','M') - ,('\7745','m') - ,('\7746','M') - ,('\7747','m') - ,('\7748','N') - ,('\7749','n') - ,('\7750','N') - ,('\7751','n') - ,('\7752','N') - ,('\7753','n') - ,('\7754','N') - ,('\7755','n') - ,('\7764','P') - ,('\7765','p') - ,('\7766','P') - ,('\7767','p') - ,('\7768','R') - ,('\7769','r') - ,('\7770','R') - ,('\7771','r') - ,('\7774','R') - ,('\7775','r') - ,('\7776','S') - ,('\7777','s') - ,('\7778','S') - ,('\7779','s') - ,('\7786','T') - ,('\7787','t') - ,('\7788','T') - ,('\7789','t') - ,('\7790','T') - ,('\7791','t') - ,('\7792','T') - ,('\7793','t') - ,('\7794','U') - ,('\7795','u') - ,('\7796','U') - ,('\7797','u') - ,('\7798','U') - ,('\7799','u') - ,('\7804','V') - ,('\7805','v') - ,('\7806','V') - ,('\7807','v') - ,('\7808','W') - ,('\7809','w') - ,('\7810','W') - ,('\7811','w') - ,('\7812','W') - ,('\7813','w') - ,('\7814','W') - ,('\7815','w') - ,('\7816','W') - ,('\7817','w') - ,('\7818','X') - ,('\7819','x') - ,('\7820','X') - ,('\7821','x') - ,('\7822','Y') - ,('\7823','y') - ,('\7824','Z') - ,('\7825','z') - ,('\7826','Z') - ,('\7827','z') - ,('\7828','Z') - ,('\7829','z') - ,('\7830','h') - ,('\7831','t') - ,('\7832','w') - ,('\7833','y') - ,('\7840','A') - ,('\7841','a') - ,('\7842','A') - ,('\7843','a') - ,('\7864','E') - ,('\7865','e') - ,('\7866','E') - ,('\7867','e') - ,('\7868','E') - ,('\7869','e') - ,('\7880','I') - ,('\7881','i') - ,('\7882','I') - ,('\7883','i') - ,('\7884','O') - ,('\7885','o') - ,('\7886','O') - ,('\7887','o') - ,('\7908','U') - ,('\7909','u') - ,('\7910','U') - ,('\7911','u') - ,('\7922','Y') - ,('\7923','y') - ,('\7924','Y') - ,('\7925','y') - ,('\7926','Y') - ,('\7927','y') - ,('\7928','Y') - ,('\7929','y') - ,('\8175','`') - ,('\8490','K') - ,('\8800','=') - ,('\8814','<') - ,('\8815','>') - ] +toAsciiChar :: Char -> Maybe Char +toAsciiChar c = case T.unpack (TN.normalize TN.NFD (T.singleton c)) of + (x:xs) | isAscii x + , all isMark xs + -> Just x + _ -> Nothing diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 0c2078721..847fd2e05 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -195,13 +195,13 @@ import Data.Default import Data.Functor (($>)) import Data.List (intercalate, transpose) import qualified Data.Map as M -import Data.Maybe (mapMaybe, fromMaybe) +import Data.Maybe (fromMaybe) import qualified Data.Set as Set import Data.String import Data.Text (Text) import qualified Data.Text as T import Text.HTML.TagSoup.Entity (lookupEntity) -import Text.Pandoc.Asciify (toAsciiChar) +import Text.Pandoc.Asciify (toAsciiText) import Text.Pandoc.Builder (Blocks, HasMeta (..), Inlines, trimInlines) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class.PandocMonad (PandocMonad, readFileFromDirs, report) @@ -1380,7 +1380,7 @@ registerHeader (ident,classes,kvs) header' = do then do let id' = uniqueIdent exts (B.toList header') ids let id'' = if Ext_ascii_identifiers `extensionEnabled` exts - then T.pack $ mapMaybe toAsciiChar $ T.unpack id' + then toAsciiText id' else id' updateState $ updateIdentifierList $ Set.insert id' updateState $ updateIdentifierList $ Set.insert id'' diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 23adff909..3292b32f4 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -123,7 +123,7 @@ import Text.HTML.TagSoup (RenderOptions (..), Tag (..), renderOptions, import Text.Pandoc.Builder (Blocks, Inlines, ToMetaValue (..)) import qualified Text.Pandoc.Builder as B import Data.Time -import Text.Pandoc.Asciify (toAsciiChar) +import Text.Pandoc.Asciify (toAsciiText) import Text.Pandoc.Definition import Text.Pandoc.Extensions (Extensions, Extension(..), extensionEnabled) import Text.Pandoc.Generic (bottomUp) @@ -478,7 +478,7 @@ inlineListToIdentifier exts = | otherwise = T.dropWhile (not . isAlpha) filterAscii | extensionEnabled Ext_ascii_identifiers exts - = T.pack . mapMaybe toAsciiChar . T.unpack + = toAsciiText | otherwise = id toIdent | extensionEnabled Ext_gfm_auto_identifiers exts = -- cgit v1.2.3 From 56ce1fc12651e9cf60b882a18d72a410967d6540 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 24 Mar 2021 11:57:49 -0700 Subject: Fix DocBook reader mathml regression... ...caused by the switch in XML libraries. Also fixed a similar issue in JATS. Closes #7173. --- src/Text/Pandoc/Readers/DocBook.hs | 3 +- src/Text/Pandoc/Readers/JATS.hs | 8 ++- test/command/7173.md | 137 +++++++++++++++++++++++++++++++++++++ test/docbook-reader.docbook | 4 +- 4 files changed, 147 insertions(+), 5 deletions(-) create mode 100644 test/command/7173.md (limited to 'src') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 6f5bb0ad4..4f525cfb1 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -1239,7 +1239,8 @@ equation e constructor = where mathMLEquations :: [Text] mathMLEquations = map writeTeX $ rights $ readMath - (\x -> qName (elName x) == "math" && qPrefix (elName x) == Just "mml") + (\x -> qName (elName x) == "math" && + qURI (elName x) == Just "http://www.w3.org/1998/Math/MathML") (readMathML . showElement) latexEquations :: [Text] diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index 602f3b4f2..c068f3774 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -491,7 +491,8 @@ parseInline (Elem e) = "disp-formula" -> formula displayMath "inline-formula" -> formula math - "math" | qPrefix (elName e) == Just "mml" -> return . math $ mathML e + "math" | qURI (elName e) == Just "http://www.w3.org/1998/Math/MathML" + -> return . math $ mathML e "tex-math" -> return . math $ textContent e "email" -> return $ link ("mailto:" <> textContent e) "" @@ -514,8 +515,9 @@ parseInline (Elem e) = filterChildren isMathML whereToLook return . mconcat . take 1 . map constructor $ texMaths ++ mathMLs - isMathML x = qName (elName x) == "math" && - qPrefix (elName x) == Just "mml" + isMathML x = qName (elName x) == "math" && + qURI (elName x) == + Just "http://www.w3.org/1998/Math/MathML" removePrefix elname = elname { qPrefix = Nothing } codeWithLang = do let classes' = case attrValue "language" e of diff --git a/test/command/7173.md b/test/command/7173.md new file mode 100644 index 000000000..2599dc19b --- /dev/null +++ b/test/command/7173.md @@ -0,0 +1,137 @@ +``` +% pandoc -f docbook -t latex +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE article PUBLIC "-//W3C//DTD MathML 2.0//EN" "http://www.w3.org/Math/DTD/mathml2/mathml2.dtd"> +<?xml-model href="http://www.oxygenxml.com/docbook/xml/5.0/rng/dbmathml.rng" schematypens="http://relaxng.org/ns/structure/1.0"?> +<?xml-model href="http://docbook.org/xml/5.0/rng/docbook.rng" type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"?> +<article xmlns="http://docbook.org/ns/docbook" + xmlns:xlink="http://www.w3.org/1999/xlink" + version="5.1"> + <title>Untitled Document</title> + <para>Word</para> + <informalequation> + <alt role='tex'>1+2</alt> + <m:math xmlns:m="http://www.w3.org/1998/Math/MathML"> + <m:mrow> + <m:mrow><m:mn>1</m:mn><m:mo>+</m:mo><m:mn>2</m:mn> + </m:mrow> + </m:mrow> + </m:math> + </informalequation> +</article> +^D +Word + +\[1 + 2\] +``` + +``` +% pandoc -f docbook -t latex +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE article PUBLIC "-//W3C//DTD MathML 2.0//EN" "http://www.w3.org/Math/DTD/mathml2/mathml2.dtd"> +<?xml-model href="http://www.oxygenxml.com/docbook/xml/5.0/rng/dbmathml.rng" schematypens="http://relaxng.org/ns/structure/1.0"?> +<?xml-model href="http://docbook.org/xml/5.0/rng/docbook.rng" type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"?> +<article xmlns="http://docbook.org/ns/docbook" + xmlns:xlink="http://www.w3.org/1999/xlink" + version="5.1"> + <title>Untitled Document</title> + <para>Word</para> + <informalequation> + <alt role='tex'>1+2</alt> + <mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML"> + <mml:mrow> + <mml:mrow><mml:mn>1</mml:mn><mml:mo>+</mml:mo><mml:mn>2</mml:mn> + </mml:mrow> + </mml:mrow> + </mml:math> + </informalequation> +</article> +^D +Word + +\[1 + 2\] +``` + +``` +% pandoc -f docbook -t latex +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE article PUBLIC "-//W3C//DTD MathML 2.0//EN" "http://www.w3.org/Math/DTD/mathml2/mathml2.dtd"> +<?xml-model href="http://www.oxygenxml.com/docbook/xml/5.0/rng/dbmathml.rng" schematypens="http://relaxng.org/ns/structure/1.0"?> +<?xml-model href="http://docbook.org/xml/5.0/rng/docbook.rng" type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"?> +<article xmlns="http://docbook.org/ns/docbook" + xmlns:xlink="http://www.w3.org/1999/xlink" + version="5.1"> + <title>Untitled Document</title> + <para>Word</para> + <informalequation> + <alt role='tex'>1+2</alt> + <math xmlns="http://www.w3.org/1998/Math/MathML"> + <mrow> + <mrow><mn>1</mn><mo>+</mo><mn>2</mn> + </mrow> + </mrow> + </math> + </informalequation> +</article> +^D +Word + +\[1 + 2\] +``` + +``` +% pandoc -f docbook -t latex +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE article PUBLIC "-//W3C//DTD MathML 2.0//EN" "http://www.w3.org/Math/DTD/mathml2/mathml2.dtd"> +<?xml-model href="http://www.oxygenxml.com/docbook/xml/5.0/rng/dbmathml.rng" schematypens="http://relaxng.org/ns/structure/1.0"?> +<?xml-model href="http://docbook.org/xml/5.0/rng/docbook.rng" type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"?> +<article xmlns="http://docbook.org/ns/docbook" + xmlns:xlink="http://www.w3.org/1999/xlink" + xmlns:m="http://www.w3.org/1998/Math/MathML" + version="5.1"> + <title>Untitled Document</title> + <para>Word</para> + <informalequation> + <alt role='tex'>1+2</alt> + <m:math> + <m:mrow> + <m:mrow><m:mn>1</m:mn><m:mo>+</m:mo><m:mn>2</m:mn> + </m:mrow> + </m:mrow> + </m:math> + </informalequation> +</article> +^D +Word + +\[1 + 2\] +``` + +``` +% pandoc -f docbook -t latex +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE article PUBLIC "-//W3C//DTD MathML 2.0//EN" "http://www.w3.org/Math/DTD/mathml2/mathml2.dtd"> +<?xml-model href="http://www.oxygenxml.com/docbook/xml/5.0/rng/dbmathml.rng" schematypens="http://relaxng.org/ns/structure/1.0"?> +<?xml-model href="http://docbook.org/xml/5.0/rng/docbook.rng" type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"?> +<article xmlns="http://docbook.org/ns/docbook" + xmlns:xlink="http://www.w3.org/1999/xlink" + xmlns:mml="http://www.w3.org/1998/Math/MathML" + version="5.1"> + <title>Untitled Document</title> + <para>Word</para> + <informalequation> + <alt role='tex'>1+2</alt> + <mml:math> + <mml:mrow> + <mml:mrow><mml:mn>1</mml:mn><mml:mo>+</mml:mo><mml:mn>2</mml:mn> + </mml:mrow> + </mml:mrow> + </mml:math> + </informalequation> +</article> +^D +Word + +\[1 + 2\] +``` + diff --git a/test/docbook-reader.docbook b/test/docbook-reader.docbook index 5717d78d0..51e62942b 100644 --- a/test/docbook-reader.docbook +++ b/test/docbook-reader.docbook @@ -6,7 +6,9 @@ <!ENTITY let "LET" > <!ENTITY case "CASE" > ]> -<article> +<article xmlns="http://docbook.org/ns/docbook" + xmlns:xlink="http://www.w3.org/1999/xlink" + xmlns:mml="http://www.w3.org/1998/Math/MathML"> <articleinfo> <title>Pandoc Test Suite</title> <authorgroup> -- cgit v1.2.3 From e22d1fbb144d63d595c5db9225dd912b09fd938f Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 29 Mar 2021 14:56:44 -0700 Subject: Powerpoint writer: allow monofont to be specified in metadata... ...not just using `--variable` on the command line (as in other writers). Closes #7187. --- src/Text/Pandoc/Writers/Powerpoint.hs | 2 +- src/Text/Pandoc/Writers/Powerpoint/Output.hs | 23 ++++++++++++++++++----- 2 files changed, 19 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Powerpoint.hs b/src/Text/Pandoc/Writers/Powerpoint.hs index ca3b74a1d..e0573beca 100644 --- a/src/Text/Pandoc/Writers/Powerpoint.hs +++ b/src/Text/Pandoc/Writers/Powerpoint.hs @@ -39,5 +39,5 @@ writePowerpoint opts (Pandoc meta blks) = do let blks' = walk fixDisplayMath blks let (pres, logMsgs) = documentToPresentation opts (Pandoc meta blks') mapM_ report logMsgs - archv <- presentationToArchive opts pres + archv <- presentationToArchive opts meta pres return $ fromArchive archv diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs index 0e515b3c2..f2f54a91c 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs @@ -38,17 +38,19 @@ import qualified Text.Pandoc.Class.PandocMonad as P import Text.Pandoc.Options import Text.Pandoc.MIME import qualified Data.ByteString.Lazy as BL +import Text.Pandoc.Writers.Shared (metaToContext) import Text.Pandoc.Writers.OOXML import qualified Data.Map as M import Data.Maybe (mapMaybe, listToMaybe, fromMaybe, maybeToList, catMaybes, isJust) import Text.Pandoc.ImageSize import Control.Applicative ((<|>)) import System.FilePath.Glob -import Text.DocTemplates (FromContext(lookupContext)) +import Text.DocTemplates (FromContext(lookupContext), Context) +import Text.DocLayout (literal) import Text.TeXMath import Text.Pandoc.Writers.Math (convertMath) import Text.Pandoc.Writers.Powerpoint.Presentation -import Text.Pandoc.Shared (tshow) +import Text.Pandoc.Shared (tshow, stringify) import Skylighting (fromColor) import Data.List.NonEmpty (nonEmpty) @@ -97,6 +99,7 @@ data WriterEnv = WriterEnv { envRefArchive :: Archive , envDistArchive :: Archive , envUTCTime :: UTCTime , envOpts :: WriterOptions + , envContext :: Context Text , envPresentationSize :: (Integer, Integer) , envSlideHasHeader :: Bool , envInList :: Bool @@ -122,6 +125,7 @@ instance Default WriterEnv where , envDistArchive = emptyArchive , envUTCTime = posixSecondsToUTCTime 0 , envOpts = def + , envContext = mempty , envPresentationSize = (720, 540) , envSlideHasHeader = False , envInList = False @@ -168,7 +172,7 @@ runP env st p = evalStateT (runReaderT p env) st monospaceFont :: Monad m => P m T.Text monospaceFont = do - vars <- writerVariables <$> asks envOpts + vars <- asks envContext case lookupContext "monofont" vars of Just s -> return s Nothing -> return "Courier" @@ -304,8 +308,9 @@ makeSpeakerNotesMap (Presentation _ slides) = then Nothing else Just n -presentationToArchive :: PandocMonad m => WriterOptions -> Presentation -> m Archive -presentationToArchive opts pres = do +presentationToArchive :: PandocMonad m + => WriterOptions -> Meta -> Presentation -> m Archive +presentationToArchive opts meta pres = do distArchive <- toArchive . BL.fromStrict <$> P.readDefaultDataFile "reference.pptx" refArchive <- case writerReferenceDoc opts of @@ -321,10 +326,18 @@ presentationToArchive opts pres = do PandocSomeError "Could not determine presentation size" + -- note, we need writerTemplate to be Just _ or metaToContext does + -- nothing + context <- metaToContext opts{ writerTemplate = + writerTemplate opts <|> Just mempty } + (return . literal . stringify) + (return . literal . stringify) meta + let env = def { envRefArchive = refArchive , envDistArchive = distArchive , envUTCTime = utctime , envOpts = opts + , envContext = context , envPresentationSize = presSize , envSlideIdMap = makeSlideIdMap pres , envSpeakerNotesIdMap = makeSpeakerNotesMap pres -- cgit v1.2.3 From 40da6c402beb64e47838254c04228098129165df Mon Sep 17 00:00:00 2001 From: niszet <niszet0016@gmail.com> Date: Thu, 1 Apr 2021 08:44:34 +0900 Subject: Treat tabs as spaces in ODT Reader. (#7185) --- src/Text/Pandoc/Readers/Odt/ContentReader.hs | 8 +++++++- test/Tests/Readers/Odt.hs | 1 + test/odt/native/tab.native | 1 + test/odt/odt/tab.odt | Bin 0 -> 10170 bytes 4 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 test/odt/native/tab.native create mode 100644 test/odt/odt/tab.odt (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs index df90880fa..c4220b0db 100644 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs @@ -577,7 +577,10 @@ read_spaces = matchingElement NsText "s" ( read_line_break :: InlineMatcher read_line_break = matchingElement NsText "line-break" $ returnV linebreak - +-- +read_tab :: InlineMatcher +read_tab = matchingElement NsText "tab" + $ returnV space -- read_span :: InlineMatcher read_span = matchingElement NsText "span" @@ -585,6 +588,7 @@ read_span = matchingElement NsText "span" $ matchChildContent [ read_span , read_spaces , read_line_break + , read_tab , read_link , read_note , read_citation @@ -604,6 +608,7 @@ read_paragraph = matchingElement NsText "p" $ matchChildContent [ read_span , read_spaces , read_line_break + , read_tab , read_link , read_note , read_citation @@ -630,6 +635,7 @@ read_header = matchingElement NsText "h" children <- ( matchChildContent [ read_span , read_spaces , read_line_break + , read_tab , read_link , read_note , read_citation diff --git a/test/Tests/Readers/Odt.hs b/test/Tests/Readers/Odt.hs index 9b5ec6b9e..edff4fe2c 100644 --- a/test/Tests/Readers/Odt.hs +++ b/test/Tests/Readers/Odt.hs @@ -176,6 +176,7 @@ namesOfTestsComparingToNative = [ "blockquote" , "referenceToText" , "simpleTable" , "simpleTableWithCaption" + , "tab" -- , "table" , "textMixedStyles" , "tableWithContents" diff --git a/test/odt/native/tab.native b/test/odt/native/tab.native new file mode 100644 index 000000000..948e81cd5 --- /dev/null +++ b/test/odt/native/tab.native @@ -0,0 +1 @@ +[Para [Str "Three",Space,Str "tabs",Space,Str "between",Space,Str "A",Space,Str "and",Space,Str "B",Space,Str "will",Space,Str "be",Space,Str "converted",Space,Str "to",Space,Str "one",Space,Str "Space:",Space,Str "A",Space,Str "B."]] \ No newline at end of file diff --git a/test/odt/odt/tab.odt b/test/odt/odt/tab.odt new file mode 100644 index 000000000..a2c80f45c Binary files /dev/null and b/test/odt/odt/tab.odt differ -- cgit v1.2.3 From 4371223d132ef822cff0ddee6099a5fe295385cb Mon Sep 17 00:00:00 2001 From: tecosaur <tec@tecosaur.com> Date: Fri, 2 Apr 2021 05:36:02 +0800 Subject: Org writer: Use LaTeX style maths deliminators (#7196) Org works better with LaTeX-style delimiters. --- src/Text/Pandoc/Writers/Org.hs | 4 ++-- test/writer.org | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index 88a2b8314..d0c9813da 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -404,8 +404,8 @@ inlineToOrg (Str str) = return . literal $ escapeString str inlineToOrg (Math t str) = do modify $ \st -> st{ stHasMath = True } return $ if t == InlineMath - then "$" <> literal str <> "$" - else "$$" <> literal str <> "$$" + then "\\(" <> literal str <> "\\)" + else "\\[" <> literal str <> "\\]" inlineToOrg il@(RawInline f str) | isRawFormat f = return $ literal str | otherwise = do diff --git a/test/writer.org b/test/writer.org index 34dd51f43..df9d2b3a5 100644 --- a/test/writer.org +++ b/test/writer.org @@ -600,14 +600,14 @@ Ellipses...and...and.... :END: - \cite[22-23]{smith.1899} -- $2+2=4$ -- $x \in y$ -- $\alpha \wedge \omega$ -- $223$ -- $p$-Tree +- \(2+2=4\) +- \(x \in y\) +- \(\alpha \wedge \omega\) +- \(223\) +- \(p\)-Tree - Here's some display math: - $$\frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}$$ -- Here's one that has a line break in it: $\alpha + \omega \times x^2$. + \[\frac{d}{dx}f(x)=\lim_{h\to 0}\frac{f(x+h)-f(x)}{h}\] +- Here's one that has a line break in it: \(\alpha + \omega \times x^2\). These shouldn't be math: -- cgit v1.2.3 From 935d10769df682fa08d947a873ca7665136e0b79 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 2 Apr 2021 17:04:55 -0700 Subject: Fix "phrase" in DocBook: take classes from "role" not "class". Closes #7195. Revises #6438. --- src/Text/Pandoc/Readers/DocBook.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 4f525cfb1..ac3caa2c0 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -1075,7 +1075,7 @@ parseInline (Elem e) = return $ spanWith (attrValue "id" e, [], []) mempty "phrase" -> do let ident = attrValue "id" e - let classes = T.words $ attrValue "class" e + let classes = T.words $ attrValue "role" e if ident /= "" || classes /= [] then innerInlines (spanWith (ident,classes,[])) else innerInlines id -- cgit v1.2.3 From 65a9d3a8786c23f79de9dcdf56ab7efb789726ff Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 5 Apr 2021 08:49:03 -0700 Subject: SelfContained: use application/octet-stream for unknown mime types... instead of halting with an error. Closes #7202. --- src/Text/Pandoc/SelfContained.hs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index c9e20cad0..17c1e18c9 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -244,11 +244,10 @@ getData mimetype src let raw' = if ext `elem` [".gz", ".svgz"] then B.concat $ L.toChunks $ Gzip.decompress $ L.fromChunks [raw] else raw - mime <- case (mimetype, respMime) of - ("",Nothing) -> throwError $ PandocSomeError - $ "Could not determine mime type for `" <> src <> "'" - (x, Nothing) -> return x - (_, Just x ) -> return x + let mime = case (mimetype, respMime) of + ("",Nothing) -> "application/octet-stream" + (x, Nothing) -> x + (_, Just x ) -> x result <- if "text/css" `T.isPrefixOf` mime then do oldInputs <- getInputFiles -- cgit v1.2.3 From 038261ea529bc4516d7cee501db70020938dbf2b Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Mon, 5 Apr 2021 21:45:52 +0200 Subject: JATS writer: escape disallows chars in identifiers XML identifiers must start with an underscore or letter, and can contain only a limited set of punctuation characters. Any IDs not adhering to these rules are rewritten by writing the offending characters as Uxxxx, where `xxxx` is the character's hex code. --- src/Text/Pandoc/Writers/JATS.hs | 27 ++-- src/Text/Pandoc/Writers/JATS/References.hs | 5 +- src/Text/Pandoc/Writers/JATS/Table.hs | 4 +- src/Text/Pandoc/XML.hs | 30 ++++- test/Tests/Writers/JATS.hs | 205 ++++++++++++++++------------- 5 files changed, 162 insertions(+), 109 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index a9369db7a..26f94cb03 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -239,7 +239,7 @@ languageFor classes = codeAttr :: Attr -> (Text, [(Text, Text)]) codeAttr (ident,classes,kvs) = (lang, attr) where - attr = [("id",ident) | not (T.null ident)] ++ + attr = [("id", escapeNCName ident) | not (T.null ident)] ++ [("language",lang) | not (T.null lang)] ++ [(k,v) | (k,v) <- kvs, k `elem` ["code-type", "code-version", "executable", @@ -251,7 +251,8 @@ codeAttr (ident,classes,kvs) = (lang, attr) blockToJATS :: PandocMonad m => WriterOptions -> Block -> JATS m (Doc Text) blockToJATS _ Null = return empty blockToJATS opts (Div (id',"section":_,kvs) (Header _lvl _ ils : xs)) = do - let idAttr = [("id", writerIdentifierPrefix opts <> id') | not (T.null id')] + let idAttr = [ ("id", writerIdentifierPrefix opts <> escapeNCName id') + | not (T.null id')] let otherAttrs = ["sec-type", "specific-use"] let attribs = idAttr ++ [(k,v) | (k,v) <- kvs, k `elem` otherAttrs] title' <- inlinesToJATS opts ils @@ -260,7 +261,7 @@ blockToJATS opts (Div (id',"section":_,kvs) (Header _lvl _ ils : xs)) = do inTagsSimple "title" title' $$ contents -- Bibliography reference: blockToJATS opts (Div (ident,_,_) [Para lst]) | "ref-" `T.isPrefixOf` ident = - inTags True "ref" [("id", ident)] . + inTags True "ref" [("id", escapeNCName ident)] . inTagsSimple "mixed-citation" <$> inlinesToJATS opts lst blockToJATS opts (Div ("refs",_,_) xs) = do @@ -271,14 +272,14 @@ blockToJATS opts (Div ("refs",_,_) xs) = do return $ inTagsIndented "ref-list" contents blockToJATS opts (Div (ident,[cls],kvs) bs) | cls `elem` ["fig", "caption", "table-wrap"] = do contents <- blocksToJATS opts bs - let attr = [("id", ident) | not (T.null ident)] ++ + let attr = [("id", escapeNCName ident) | not (T.null ident)] ++ [("xml:lang",l) | ("lang",l) <- kvs] ++ [(k,v) | (k,v) <- kvs, k `elem` ["specific-use", "content-type", "orientation", "position"]] return $ inTags True cls attr contents blockToJATS opts (Div (ident,_,kvs) bs) = do contents <- blocksToJATS opts bs - let attr = [("id", ident) | not (T.null ident)] ++ + let attr = [("id", escapeNCName ident) | not (T.null ident)] ++ [("xml:lang",l) | ("lang",l) <- kvs] ++ [(k,v) | (k,v) <- kvs, k `elem` ["specific-use", "content-type", "orientation", "position"]] @@ -296,7 +297,7 @@ blockToJATS opts (Para [Image (ident,_,kvs) txt let capt = if null txt then empty else inTagsSimple "caption" $ inTagsSimple "p" alt - let attr = [("id", ident) | not (T.null ident)] ++ + let attr = [("id", escapeNCName ident) | not (T.null ident)] ++ [(k,v) | (k,v) <- kvs, k `elem` ["fig-type", "orientation", "position", "specific-use"]] let graphicattr = [("mimetype",maintype), @@ -307,7 +308,7 @@ blockToJATS opts (Para [Image (ident,_,kvs) txt capt $$ selfClosingTag "graphic" graphicattr blockToJATS _ (Para [Image (ident,_,kvs) _ (src, tit)]) = do let (maintype, subtype) = imageMimeType src kvs - let attr = [("id", ident) | not (T.null ident)] ++ + let attr = [("id", escapeNCName ident) | not (T.null ident)] ++ [("mimetype", maintype), ("mime-subtype", subtype), ("xlink:href", src)] ++ @@ -434,7 +435,7 @@ inlineToJATS opts (Note contents) = do let notenum = case notes of (n, _):_ -> n + 1 [] -> 1 - thenote <- inTags True "fn" [("id","fn" <> tshow notenum)] + thenote <- inTags True "fn" [("id", "fn" <> tshow notenum)] <$> wrappedBlocksToJATS (not . isPara) opts (walk demoteHeaderAndRefs contents) modify $ \st -> st{ jatsNotes = (notenum, thenote) : notes } @@ -447,7 +448,7 @@ inlineToJATS opts (Cite _ lst) = inlineToJATS opts (Span ("",_,[]) ils) = inlinesToJATS opts ils inlineToJATS opts (Span (ident,_,kvs) ils) = do contents <- inlinesToJATS opts ils - let attr = [("id",ident) | not (T.null ident)] ++ + let attr = [("id", escapeNCName ident) | not (T.null ident)] ++ [("xml:lang",l) | ("lang",l) <- kvs] ++ [(k,v) | (k,v) <- kvs , k `elem` ["content-type", "rationale", @@ -488,9 +489,9 @@ inlineToJATS _ (Link _attr [Str t] (T.stripPrefix "mailto:" -> Just email, _)) return $ inTagsSimple "email" $ literal (escapeStringForXML email) inlineToJATS opts (Link (ident,_,kvs) txt (T.uncons -> Just ('#', src), _)) = do let attr = mconcat - [ [("id", ident) | not (T.null ident)] + [ [("id", escapeNCName ident) | not (T.null ident)] , [("alt", stringify txt) | not (null txt)] - , [("rid", src)] + , [("rid", escapeNCName src)] , [(k,v) | (k,v) <- kvs, k `elem` ["ref-type", "specific-use"]] , [("ref-type", "bibr") | "ref-" `T.isPrefixOf` src] ] @@ -500,7 +501,7 @@ inlineToJATS opts (Link (ident,_,kvs) txt (T.uncons -> Just ('#', src), _)) = do contents <- inlinesToJATS opts txt return $ inTags False "xref" attr contents inlineToJATS opts (Link (ident,_,kvs) txt (src, tit)) = do - let attr = [("id", ident) | not (T.null ident)] ++ + let attr = [("id", escapeNCName ident) | not (T.null ident)] ++ [("ext-link-type", "uri"), ("xlink:href", src)] ++ [("xlink:title", tit) | not (T.null tit)] ++ @@ -518,7 +519,7 @@ inlineToJATS _ (Image (ident,_,kvs) _ (src, tit)) = do let subtype = fromMaybe "" $ lookup "mime-subtype" kvs `mplus` (T.drop 1 . T.dropWhile (/='/') <$> mbMT) - let attr = [("id", ident) | not (T.null ident)] ++ + let attr = [("id", escapeNCName ident) | not (T.null ident)] ++ [("mimetype", maintype), ("mime-subtype", subtype), ("xlink:href", src)] ++ diff --git a/src/Text/Pandoc/Writers/JATS/References.hs b/src/Text/Pandoc/Writers/JATS/References.hs index 903144128..5b19fd034 100644 --- a/src/Text/Pandoc/Writers/JATS/References.hs +++ b/src/Text/Pandoc/Writers/JATS/References.hs @@ -29,7 +29,7 @@ import Text.Pandoc.Builder (Inlines) import Text.Pandoc.Options (WriterOptions) import Text.Pandoc.Shared (tshow) import Text.Pandoc.Writers.JATS.Types -import Text.Pandoc.XML (escapeStringForXML, inTags) +import Text.Pandoc.XML (escapeNCName, escapeStringForXML, inTags) import qualified Data.Text as T referencesToJATS :: PandocMonad m @@ -46,7 +46,8 @@ referenceToJATS :: PandocMonad m referenceToJATS _opts ref = do let refType = referenceType ref let pubType = [("publication-type", refType) | not (T.null refType)] - let wrap = inTags True "ref" [("id", "ref-" <> unItemId (referenceId ref))] + let ident = escapeNCName $ "ref-" <> unItemId (referenceId ref) + let wrap = inTags True "ref" [("id", ident)] . inTags True "element-citation" pubType return . wrap . vcat $ [ authors diff --git a/src/Text/Pandoc/Writers/JATS/Table.hs b/src/Text/Pandoc/Writers/JATS/Table.hs index 465480f59..2e34900d2 100644 --- a/src/Text/Pandoc/Writers/JATS/Table.hs +++ b/src/Text/Pandoc/Writers/JATS/Table.hs @@ -24,7 +24,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Options (WriterOptions) import Text.Pandoc.Shared (tshow) import Text.Pandoc.Writers.JATS.Types -import Text.Pandoc.XML (inTags, inTagsIndented, selfClosingTag) +import Text.Pandoc.XML (escapeNCName, inTags, inTagsIndented, selfClosingTag) import qualified Data.Text as T import qualified Text.Pandoc.Writers.AnnotatedTable as Ann @@ -216,7 +216,7 @@ cellToJats opts celltype (Ann.Cell (colspec :| _) _colNum cell) = toAttribs :: Attr -> [Text] -> [(Text, Text)] toAttribs (ident, _classes, kvs) knownAttribs = - (if T.null ident then id else (("id", ident) :)) $ + (if T.null ident then id else (("id", escapeNCName ident) :)) $ filter ((`elem` knownAttribs) . fst) kvs tableCellToJats :: PandocMonad m diff --git a/src/Text/Pandoc/XML.hs b/src/Text/Pandoc/XML.hs index 6dbbce1d2..79b4768ec 100644 --- a/src/Text/Pandoc/XML.hs +++ b/src/Text/Pandoc/XML.hs @@ -13,6 +13,7 @@ Functions for escaping and formatting XML. -} module Text.Pandoc.XML ( escapeCharForXML, escapeStringForXML, + escapeNCName, inTags, selfClosingTag, inTagsSimple, @@ -24,7 +25,7 @@ module Text.Pandoc.XML ( escapeCharForXML, html5Attributes, rdfaAttributes ) where -import Data.Char (isAscii, isSpace, ord) +import Data.Char (isAscii, isSpace, ord, isLetter, isDigit) import Data.Text (Text) import qualified Data.Text as T import Text.HTML.TagSoup.Entity (lookupEntity, htmlEntities) @@ -119,8 +120,33 @@ html5EntityMap = foldr go mempty htmlEntities where ent' = T.takeWhile (/=';') (T.pack ent) _ -> entmap +-- | Converts a string into an NCName, i.e., an XML name without colons. +-- Disallowed characters are escaped using @ux%x@, where @%x@ is the +-- hexadecimal unicode identifier of the escaped character. +escapeNCName :: Text -> Text +escapeNCName t = case T.uncons t of + Nothing -> T.empty + Just (c, cs) -> escapeStartChar c <> T.concatMap escapeNCNameChar cs + where + escapeStartChar :: Char -> Text + escapeStartChar c = if isLetter c || c == '_' + then T.singleton c + else escapeChar c --- Unescapes XML entities + escapeNCNameChar :: Char -> Text + escapeNCNameChar c = if isNCNameChar c + then T.singleton c + else escapeChar c + + isNCNameChar :: Char -> Bool + isNCNameChar c = isLetter c || c `elem` ("_-.·" :: String) || isDigit c + || '\x0300' <= c && c <= '\x036f' + || '\x203f' <= c && c <= '\x2040' + + escapeChar :: Char -> Text + escapeChar = T.pack . printf "U%04X" . ord + +-- | Unescapes XML entities fromEntities :: Text -> Text fromEntities t = let (x, y) = T.break (== '&') t diff --git a/test/Tests/Writers/JATS.hs b/test/Tests/Writers/JATS.hs index 2f501c890..23c1686dc 100644 --- a/test/Tests/Writers/JATS.hs +++ b/test/Tests/Writers/JATS.hs @@ -1,21 +1,21 @@ {-# LANGUAGE OverloadedStrings #-} module Tests.Writers.JATS (tests) where -import Data.Text (unpack) +import Data.Text (Text) import Test.Tasty import Tests.Helpers import Text.Pandoc import Text.Pandoc.Arbitrary () import Text.Pandoc.Builder +import qualified Data.Text as T -jats :: (ToPandoc a) => a -> String -jats = unpack - . purely (writeJATS def{ writerWrapText = WrapNone }) - . toPandoc +jats :: (ToPandoc a) => a -> Text +jats = purely (writeJATS def{ writerWrapText = WrapNone }) + . toPandoc -jatsArticleAuthoring :: (ToPandoc a) => a -> String -jatsArticleAuthoring = unpack - . purely (writeJatsArticleAuthoring def{ writerWrapText = WrapNone }) +jatsArticleAuthoring :: (ToPandoc a) => a -> Text +jatsArticleAuthoring = + purely (writeJatsArticleAuthoring def{ writerWrapText = WrapNone }) . toPandoc {- @@ -32,89 +32,114 @@ which is in turn shorthand for infix 4 =: (=:) :: (ToString a, ToPandoc a) - => String -> (a, String) -> TestTree + => String -> (a, Text) -> TestTree (=:) = test jats tests :: [TestTree] -tests = [ testGroup "inline code" - [ "basic" =: code "@&" =?> "<p><monospace>@&</monospace></p>" - , "lang" =: codeWith ("", ["c"], []) "@&" =?> "<p><code language=\"c\">@&</code></p>" - ] - , testGroup "block code" - [ "basic" =: codeBlock "@&" =?> "<preformat>@&</preformat>" - , "lang" =: codeBlockWith ("", ["c"], []) "@&" =?> "<code language=\"c\">@&</code>" - ] - , testGroup "images" - [ "basic" =: - image "/url" "title" mempty - =?> "<graphic mimetype=\"image\" mime-subtype=\"\" xlink:href=\"/url\" xlink:title=\"title\" />" - ] - , testGroup "inlines" - [ "Emphasis" =: emph "emphasized" - =?> "<p><italic>emphasized</italic></p>" +tests = + [ testGroup "inline code" + [ "basic" =: code "@&" =?> "<p><monospace>@&</monospace></p>" + , "lang" =: codeWith ("", ["c"], []) "@&" =?> "<p><code language=\"c\">@&</code></p>" + ] + , testGroup "block code" + [ "basic" =: codeBlock "@&" =?> "<preformat>@&</preformat>" + , "lang" =: codeBlockWith ("", ["c"], []) "@&" =?> "<code language=\"c\">@&</code>" + ] + , testGroup "images" + [ "basic" =: + image "/url" "title" mempty + =?> "<graphic mimetype=\"image\" mime-subtype=\"\" xlink:href=\"/url\" xlink:title=\"title\" />" + ] + , testGroup "inlines" + [ "Emphasis" =: emph "emphasized" + =?> "<p><italic>emphasized</italic></p>" + + , test jatsArticleAuthoring "footnote in articleauthoring tag set" + ("test" <> note (para "footnote") =?> + unlines [ "<p>test<fn>" + , " <p>footnote</p>" + , "</fn></p>" + ]) + ] + , "bullet list" =: bulletList [ plain $ text "first" + , plain $ text "second" + , plain $ text "third" + ] + =?> "<list list-type=\"bullet\">\n\ + \ <list-item>\n\ + \ <p>first</p>\n\ + \ </list-item>\n\ + \ <list-item>\n\ + \ <p>second</p>\n\ + \ </list-item>\n\ + \ <list-item>\n\ + \ <p>third</p>\n\ + \ </list-item>\n\ + \</list>" + , testGroup "definition lists" + [ "with internal link" =: definitionList [(link "#go" "" (str "testing"), + [plain (text "hi there")])] =?> + "<def-list>\n\ + \ <def-item>\n\ + \ <term><xref alt=\"testing\" rid=\"go\">testing</xref></term>\n\ + \ <def>\n\ + \ <p>hi there</p>\n\ + \ </def>\n\ + \ </def-item>\n\ + \</def-list>" + ] + , testGroup "math" + [ "escape |" =: para (math "\\sigma|_{\\{x\\}}") =?> + "<p><inline-formula><alternatives>\n\ + \<tex-math><![CDATA[\\sigma|_{\\{x\\}}]]></tex-math>\n\ + \<mml:math display=\"inline\" xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"><mml:mrow><mml:mi>σ</mml:mi><mml:msub><mml:mo stretchy=\"false\" form=\"prefix\">|</mml:mo><mml:mrow><mml:mo stretchy=\"false\" form=\"prefix\">{</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy=\"false\" form=\"postfix\">}</mml:mo></mml:mrow></mml:msub></mml:mrow></mml:math></alternatives></inline-formula></p>" + ] + , testGroup "headers" + [ "unnumbered header" =: + headerWith ("foo",["unnumbered"],[]) 1 + (text "Header 1" <> note (plain $ text "note")) =?> + "<sec id=\"foo\">\n\ + \ <title>Header 1<xref ref-type=\"fn\" rid=\"fn1\">1</xref></title>\n\ + \</sec>" + , "unnumbered sub header" =: + headerWith ("foo",["unnumbered"],[]) 1 + (text "Header") + <> headerWith ("foo",["unnumbered"],[]) 2 + (text "Sub-Header") =?> + "<sec id=\"foo\">\n\ + \ <title>Header</title>\n\ + \ <sec id=\"foo\">\n\ + \ <title>Sub-Header</title>\n\ + \ </sec>\n\ + \</sec>" + , "containing image" =: + header 1 (image "imgs/foo.jpg" "" (text "Alt text")) =?> + "<sec>\n\ + \ <title><inline-graphic mimetype=\"image\" mime-subtype=\"jpeg\" xlink:href=\"imgs/foo.jpg\" /></title>\n\ + \</sec>" + ] + + , testGroup "ids" + [ "non-ASCII in header ID" =: + headerWith ("smørbrød",[],[]) 1 (text "smørbrød") =?> + T.unlines [ "<sec id=\"smørbrød\">" + , " <title>smørbrød</title>" + , "</sec>" + ] + + , "disallowed symbol in header id" =: + headerWith ("i/o",[],[]) 1 (text "I/O") =?> + T.unlines [ "<sec id=\"iU002Fo\">" + , " <title>I/O</title>" + , "</sec>" + ] + + , "disallowed symbols in internal link target" =: + link "#foo:bar" "" "baz" =?> + "<p><xref alt=\"baz\" rid=\"fooU003Abar\">baz</xref></p>" - , test jatsArticleAuthoring "footnote in articleauthoring tag set" - ("test" <> note (para "footnote") =?> - unlines [ "<p>test<fn>" - , " <p>footnote</p>" - , "</fn></p>" - ]) - ] - , "bullet list" =: bulletList [ plain $ text "first" - , plain $ text "second" - , plain $ text "third" - ] - =?> "<list list-type=\"bullet\">\n\ - \ <list-item>\n\ - \ <p>first</p>\n\ - \ </list-item>\n\ - \ <list-item>\n\ - \ <p>second</p>\n\ - \ </list-item>\n\ - \ <list-item>\n\ - \ <p>third</p>\n\ - \ </list-item>\n\ - \</list>" - , testGroup "definition lists" - [ "with internal link" =: definitionList [(link "#go" "" (str "testing"), - [plain (text "hi there")])] =?> - "<def-list>\n\ - \ <def-item>\n\ - \ <term><xref alt=\"testing\" rid=\"go\">testing</xref></term>\n\ - \ <def>\n\ - \ <p>hi there</p>\n\ - \ </def>\n\ - \ </def-item>\n\ - \</def-list>" - ] - , testGroup "math" - [ "escape |" =: para (math "\\sigma|_{\\{x\\}}") =?> - "<p><inline-formula><alternatives>\n\ - \<tex-math><![CDATA[\\sigma|_{\\{x\\}}]]></tex-math>\n\ - \<mml:math display=\"inline\" xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"><mml:mrow><mml:mi>σ</mml:mi><mml:msub><mml:mo stretchy=\"false\" form=\"prefix\">|</mml:mo><mml:mrow><mml:mo stretchy=\"false\" form=\"prefix\">{</mml:mo><mml:mi>x</mml:mi><mml:mo stretchy=\"false\" form=\"postfix\">}</mml:mo></mml:mrow></mml:msub></mml:mrow></mml:math></alternatives></inline-formula></p>" - ] - , testGroup "headers" - [ "unnumbered header" =: - headerWith ("foo",["unnumbered"],[]) 1 - (text "Header 1" <> note (plain $ text "note")) =?> - "<sec id=\"foo\">\n\ - \ <title>Header 1<xref ref-type=\"fn\" rid=\"fn1\">1</xref></title>\n\ - \</sec>" - , "unnumbered sub header" =: - headerWith ("foo",["unnumbered"],[]) 1 - (text "Header") - <> headerWith ("foo",["unnumbered"],[]) 2 - (text "Sub-Header") =?> - "<sec id=\"foo\">\n\ - \ <title>Header</title>\n\ - \ <sec id=\"foo\">\n\ - \ <title>Sub-Header</title>\n\ - \ </sec>\n\ - \</sec>" - , "containing image" =: - header 1 (image "imgs/foo.jpg" "" (text "Alt text")) =?> - "<sec>\n\ - \ <title><inline-graphic mimetype=\"image\" mime-subtype=\"jpeg\" xlink:href=\"imgs/foo.jpg\" /></title>\n\ - \</sec>" - ] - ] + , "code id starting with a number" =: + codeWith ("7y",[],[]) "print 5" =?> + "<p><monospace id=\"U0037y\">print 5</monospace></p>" + ] + ] -- cgit v1.2.3 From 21fed4a9c2c1c4bd2aedb619c3c76e0211b77a25 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 5 Apr 2021 23:26:54 -0700 Subject: SelfContained: remove unneeded imports. --- src/Text/Pandoc/SelfContained.hs | 2 -- 1 file changed, 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 17c1e18c9..3bbab4bbe 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -16,7 +16,6 @@ the HTML using data URIs. module Text.Pandoc.SelfContained ( makeDataURI, makeSelfContained ) where import Codec.Compression.GZip as Gzip import Control.Applicative ((<|>)) -import Control.Monad.Except (throwError) import Control.Monad.Trans (lift) import Data.ByteString (ByteString) import Data.ByteString.Base64 @@ -29,7 +28,6 @@ import System.FilePath (takeDirectory, takeExtension, (</>)) import Text.HTML.TagSoup import Text.Pandoc.Class.PandocMonad (PandocMonad (..), fetchItem, getInputFiles, report, setInputFiles) -import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.MIME (MimeType) import Text.Pandoc.Shared (isURI, renderTags', trim) -- cgit v1.2.3 From 60974538b25657c9aa37e72cc66ca3957912ddec Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 5 Apr 2021 23:29:22 -0700 Subject: Commonmark writer: Use backslash escapes for `<` and `|`... instead of entities. Closes #7208. --- src/Text/Pandoc/Writers/Markdown.hs | 12 +++++++++++- test/command/7208.md | 6 ++++++ 2 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 test/command/7208.md (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 4d9f3d5b0..daf45ed53 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -67,7 +67,17 @@ writePlain opts document = -- | Convert Pandoc to Commonmark. writeCommonMark :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeCommonMark opts document = - evalMD (pandocToMarkdown opts document) def{ envVariant = Commonmark } def + evalMD (pandocToMarkdown opts' document) def{ envVariant = Commonmark } def + where + opts' = opts{ writerExtensions = + -- These extensions can't be enabled or disabled + -- for commonmark because they're part of the core; + -- we set them here so that escapeText will behave + -- properly. + enableExtension Ext_all_symbols_escapable $ + enableExtension Ext_pipe_tables $ + enableExtension Ext_intraword_underscores $ + writerExtensions opts } pandocTitleBlock :: Doc Text -> [Doc Text] -> Doc Text -> Doc Text pandocTitleBlock tit auths dat = diff --git a/test/command/7208.md b/test/command/7208.md new file mode 100644 index 000000000..e65943ade --- /dev/null +++ b/test/command/7208.md @@ -0,0 +1,6 @@ +``` +% pandoc -t gfm +\<hi\> +^D +\<hi\> +``` -- cgit v1.2.3 From e227496d3a5d07df8183b8d986ea2aa36c90612c Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Thu, 8 Apr 2021 22:14:47 +0200 Subject: Lua filter: respect Inlines/Blocks filter functions in pandoc.walk_* --- src/Text/Pandoc/Lua/Filter.hs | 2 ++ src/Text/Pandoc/Lua/Module/Pandoc.hs | 11 ++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Lua/Filter.hs b/src/Text/Pandoc/Lua/Filter.hs index 90967f295..01bf90efa 100644 --- a/src/Text/Pandoc/Lua/Filter.hs +++ b/src/Text/Pandoc/Lua/Filter.hs @@ -13,7 +13,9 @@ module Text.Pandoc.Lua.Filter ( LuaFilterFunction , LuaFilter , runFilterFile , walkInlines + , walkInlineLists , walkBlocks + , walkBlockLists , module Text.Pandoc.Lua.Walk ) where import Control.Applicative ((<|>)) diff --git a/src/Text/Pandoc/Lua/Module/Pandoc.hs b/src/Text/Pandoc/Lua/Module/Pandoc.hs index 8d30f9a0c..5c14b3a30 100644 --- a/src/Text/Pandoc/Lua/Module/Pandoc.hs +++ b/src/Text/Pandoc/Lua/Module/Pandoc.hs @@ -23,8 +23,10 @@ import Foreign.Lua (Lua, NumResults, Optional, Peekable, Pushable) import System.Exit (ExitCode (..)) import Text.Pandoc.Class.PandocIO (runIO) import Text.Pandoc.Definition (Block, Inline) -import Text.Pandoc.Lua.Filter (walkInlines, walkBlocks, LuaFilter, SingletonsList (..)) +import Text.Pandoc.Lua.Filter (LuaFilter, SingletonsList (..), walkInlines, + walkInlineLists, walkBlocks, walkBlockLists) import Text.Pandoc.Lua.Marshaling () +import Text.Pandoc.Lua.Marshaling.List (List (..)) import Text.Pandoc.Lua.PandocLua (PandocLua, addFunction, liftPandocLua, loadDefaultModule) import Text.Pandoc.Walk (Walkable) @@ -51,9 +53,12 @@ pushModule = do return 1 walkElement :: (Walkable (SingletonsList Inline) a, - Walkable (SingletonsList Block) a) + Walkable (SingletonsList Block) a, + Walkable (List Inline) a, + Walkable (List Block) a) => a -> LuaFilter -> PandocLua a -walkElement x f = liftPandocLua $ walkInlines f x >>= walkBlocks f +walkElement x f = liftPandocLua $ + walkInlines f x >>= walkInlineLists f >>= walkBlocks f >>= walkBlockLists f walk_inline :: Inline -> LuaFilter -> PandocLua Inline walk_inline = walkElement -- cgit v1.2.3 From 20cd33e5a44810b68fed74da00f4f51eb2282147 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 8 Apr 2021 14:47:11 -0700 Subject: Fix regression in grid tables for wide characters. In the translation from String to Text, a char-width-sensitive splitAt' was dropped. This commit reinstates it. Closes #7214. --- src/Text/Pandoc/Shared.hs | 18 +++++++++++++----- test/command/7214.md | 28 ++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 5 deletions(-) create mode 100644 test/command/7214.md (limited to 'src') diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 3292b32f4..95cbdc8b8 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -153,12 +153,20 @@ splitTextBy isSep t in first : splitTextBy isSep (T.dropWhile isSep rest) splitTextByIndices :: [Int] -> T.Text -> [T.Text] -splitTextByIndices ns = splitTextByRelIndices (zipWith (-) ns (0:ns)) +splitTextByIndices ns = splitTextByRelIndices (zipWith (-) ns (0:ns)) . T.unpack where - splitTextByRelIndices [] t = [t] - splitTextByRelIndices (x:xs) t = - let (first, rest) = T.splitAt x t - in first : splitTextByRelIndices xs rest + splitTextByRelIndices [] cs = [T.pack cs] + splitTextByRelIndices (x:xs) cs = + let (first, rest) = splitAt' x cs + in T.pack first : splitTextByRelIndices xs rest + +-- Note: don't replace this with T.splitAt, which is not sensitive +-- to character widths! +splitAt' :: Int -> [Char] -> ([Char],[Char]) +splitAt' _ [] = ([],[]) +splitAt' n xs | n <= 0 = ([],xs) +splitAt' n (x:xs) = (x:ys,zs) + where (ys,zs) = splitAt' (n - charWidth x) xs ordNub :: (Ord a) => [a] -> [a] ordNub l = go Set.empty l diff --git a/test/command/7214.md b/test/command/7214.md new file mode 100644 index 000000000..43bf9e4ca --- /dev/null +++ b/test/command/7214.md @@ -0,0 +1,28 @@ +``` +% pandoc ++------------+----------+------------------+ +|日本語 | の文字列 | words in english | ++------------+----------+------------------+ +|abc defghij | def | xyz | ++------------+----------+------------------+ +^D +<table style="width:60%;"> +<colgroup> +<col style="width: 18%" /> +<col style="width: 15%" /> +<col style="width: 26%" /> +</colgroup> +<tbody> +<tr class="odd"> +<td>日本語</td> +<td>の文字列</td> +<td>words in english</td> +</tr> +<tr class="even"> +<td>abc defghij</td> +<td>def</td> +<td>xyz</td> +</tr> +</tbody> +</table> +``` -- cgit v1.2.3 From 051b7ffeaffdaf34ed1384a239cf0179aa59b932 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Sat, 10 Apr 2021 10:47:10 +0200 Subject: JATS writer: add footnote number as label in backmatter Footnotes in the backmatter are given the footnote's number as a label. The articleauthoring output is unaffected from this change, as footnotes are placed inline there. Closes: #7210 --- src/Text/Pandoc/Writers/JATS.hs | 1 + test/writer.jats_archiving | 14 ++++++++------ test/writer.jats_publishing | 14 ++++++++------ 3 files changed, 17 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index 26f94cb03..e78c6dc8f 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -436,6 +436,7 @@ inlineToJATS opts (Note contents) = do (n, _):_ -> n + 1 [] -> 1 thenote <- inTags True "fn" [("id", "fn" <> tshow notenum)] + . (inTagsSimple "label" (literal $ tshow notenum) <>) <$> wrappedBlocksToJATS (not . isPara) opts (walk demoteHeaderAndRefs contents) modify $ \st -> st{ jatsNotes = (notenum, thenote) : notes } diff --git a/test/writer.jats_archiving b/test/writer.jats_archiving index 1a9537a21..332b5d3fd 100644 --- a/test/writer.jats_archiving +++ b/test/writer.jats_archiving @@ -882,11 +882,12 @@ These should not be escaped: \$ \\ \> \[ \{</preformat> <back> <fn-group> <fn id="fn1"> - <p>Here is the footnote. It can go anywhere after the footnote reference. - It need not be placed at the end of the document.</p> + <label>1</label><p>Here is the footnote. It can go anywhere after the + footnote reference. It need not be placed at the end of the document.</p> </fn> <fn id="fn2"> - <p>Here’s the long note. This one contains multiple blocks.</p> + <label>2</label><p>Here’s the long note. This one contains multiple + blocks.</p> <p>Subsequent blocks are indented to show that they belong to the footnote (as with list items).</p> <p specific-use="wrapper"> @@ -896,16 +897,17 @@ These should not be escaped: \$ \\ \> \[ \{</preformat> just indent the first line of each block.</p> </fn> <fn id="fn3"> - <p>This is <italic>easier</italic> to type. Inline notes may contain + <label>3</label><p>This is <italic>easier</italic> to type. Inline notes + may contain <ext-link ext-link-type="uri" xlink:href="http://google.com">links</ext-link> and <monospace>]</monospace> verbatim characters, as well as [bracketed text].</p> </fn> <fn id="fn4"> - <p>In quote.</p> + <label>4</label><p>In quote.</p> </fn> <fn id="fn5"> - <p>In list.</p> + <label>5</label><p>In list.</p> </fn> </fn-group> </back> diff --git a/test/writer.jats_publishing b/test/writer.jats_publishing index b85816848..f53fd554d 100644 --- a/test/writer.jats_publishing +++ b/test/writer.jats_publishing @@ -882,11 +882,12 @@ These should not be escaped: \$ \\ \> \[ \{</preformat> <back> <fn-group> <fn id="fn1"> - <p>Here is the footnote. It can go anywhere after the footnote reference. - It need not be placed at the end of the document.</p> + <label>1</label><p>Here is the footnote. It can go anywhere after the + footnote reference. It need not be placed at the end of the document.</p> </fn> <fn id="fn2"> - <p>Here’s the long note. This one contains multiple blocks.</p> + <label>2</label><p>Here’s the long note. This one contains multiple + blocks.</p> <p>Subsequent blocks are indented to show that they belong to the footnote (as with list items).</p> <p specific-use="wrapper"> @@ -896,16 +897,17 @@ These should not be escaped: \$ \\ \> \[ \{</preformat> just indent the first line of each block.</p> </fn> <fn id="fn3"> - <p>This is <italic>easier</italic> to type. Inline notes may contain + <label>3</label><p>This is <italic>easier</italic> to type. Inline notes + may contain <ext-link ext-link-type="uri" xlink:href="http://google.com">links</ext-link> and <monospace>]</monospace> verbatim characters, as well as [bracketed text].</p> </fn> <fn id="fn4"> - <p>In quote.</p> + <label>4</label><p>In quote.</p> </fn> <fn id="fn5"> - <p>In list.</p> + <label>5</label><p>In list.</p> </fn> </fn-group> </back> -- cgit v1.2.3 From 2d60524de43d59ffb1763a33a15cc2ecce613ecf Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Sat, 10 Apr 2021 11:38:02 +0200 Subject: JATS writer: convert spans to <named-content> elements Spans with attributes are converted to `<named-content>` elements instead of being wrapped with `<milestone-start/>` and `<milestone-end>` elements. Milestone elements are not allowed in documents using the articleauthoring tag set, so this change ensures the creation of valid documents. Closes: #7211 --- src/Text/Pandoc/Writers/JATS.hs | 13 +++++++------ test/Tests/Writers/JATS.hs | 15 +++++++++++++++ 2 files changed, 22 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index e78c6dc8f..5b3e439d4 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -444,18 +444,19 @@ inlineToJATS opts (Note contents) = do ("rid", "fn" <> tshow notenum)] $ text (show notenum) inlineToJATS opts (Cite _ lst) = - -- TODO revisit this after examining the jats.csl pipeline inlinesToJATS opts lst -inlineToJATS opts (Span ("",_,[]) ils) = inlinesToJATS opts ils inlineToJATS opts (Span (ident,_,kvs) ils) = do contents <- inlinesToJATS opts ils let attr = [("id", escapeNCName ident) | not (T.null ident)] ++ [("xml:lang",l) | ("lang",l) <- kvs] ++ [(k,v) | (k,v) <- kvs - , k `elem` ["content-type", "rationale", - "rid", "specific-use"]] - return $ selfClosingTag "milestone-start" attr <> contents <> - selfClosingTag "milestone-end" [] + , k `elem` ["alt", "content-type", "rid", "specific-use", + "vocab", "vocab-identifier", "vocab-term", + "vocab-term-identifier"]] + return $ + if null attr + then contents -- unwrap if no relevant attributes are given + else inTags False "named-content" attr contents inlineToJATS _ (Math t str) = do let addPref (Xml.Attr q v) | Xml.qName q == "xmlns" = Xml.Attr q{ Xml.qName = "xmlns:mml" } v diff --git a/test/Tests/Writers/JATS.hs b/test/Tests/Writers/JATS.hs index 23c1686dc..e90438176 100644 --- a/test/Tests/Writers/JATS.hs +++ b/test/Tests/Writers/JATS.hs @@ -142,4 +142,19 @@ tests = codeWith ("7y",[],[]) "print 5" =?> "<p><monospace id=\"U0037y\">print 5</monospace></p>" ] + + , testGroup "spans" + [ "unwrapped if no attributes given" =: + spanWith nullAttr "text in span" =?> + "<p>text in span</p>" + + , "converted to named-content element" =: + spanWith ("a", ["ignored"], [("alt", "aa")]) "text" =?> + "<p><named-content id=\"a\" alt=\"aa\">text</named-content></p>" + + , "unwrapped if named-content element would have no attributes" =: + spanWith ("", ["ignored"], [("hidden", "true")]) "text in span" =?> + "<p>text in span</p>" + + ] ] -- cgit v1.2.3 From 5f79a66ed64e9b0cc326e467dcb17239f1596fcc Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Fri, 16 Apr 2021 22:13:29 +0200 Subject: JATS writer: reduce unnecessary use of <p> elements for wrapping The `<p>` element is used for wrapping in cases were the contents would otherwise not be allowed in a certain context. Unnecessary wrapping is avoided, especially around quotes (`<disp-quote>` elements). Closes: #7227 --- src/Text/Pandoc/Writers/JATS.hs | 22 +++++---- src/Text/Pandoc/Writers/JATS/Table.hs | 26 +++++++++-- src/Text/Pandoc/Writers/JATS/Types.hs | 15 ++++-- test/command/7041.md | 23 +++++++++ test/writer.jats_archiving | 68 +++++++++++---------------- test/writer.jats_articleauthoring | 88 ++++++++++++++++------------------- test/writer.jats_publishing | 68 +++++++++++---------------- 7 files changed, 166 insertions(+), 144 deletions(-) create mode 100644 test/command/7041.md (limited to 'src') diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index 5b3e439d4..b58ff8aef 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -1,6 +1,7 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Writers.JATS Copyright : 2017-2021 John MacFarlane @@ -80,7 +81,7 @@ writeJats tagSet opts d = do let environment = JATSEnv { jatsTagSet = tagSet , jatsInlinesWriter = inlinesToJATS - , jatsBlockWriter = blockToJATS + , jatsBlockWriter = wrappedBlocksToJATS , jatsReferences = refs } let initialState = JATSState { jatsNotes = [] } @@ -162,11 +163,9 @@ wrappedBlocksToJATS needsWrap opts = wrappedBlockToJATS b = do inner <- blockToJATS opts b return $ - if needsWrap b || isBlockQuote b -- see #7041 + if needsWrap b then inTags True "p" [("specific-use","wrapper")] inner else inner - isBlockQuote (BlockQuote _) = True - isBlockQuote _ = False -- | Auxiliary function to convert Plain block to Para. plainToPara :: Block -> Block @@ -324,10 +323,13 @@ blockToJATS opts (LineBlock lns) = blockToJATS opts $ linesToPara lns blockToJATS opts (BlockQuote blocks) = do tagSet <- asks jatsTagSet - let blocksToJats' = if tagSet == TagSetArticleAuthoring - then wrappedBlocksToJATS (not . isPara) - else blocksToJATS - inTagsIndented "disp-quote" <$> blocksToJats' opts blocks + let needsWrap = if tagSet == TagSetArticleAuthoring + then not . isPara + else \case + Header{} -> True + HorizontalRule -> True + _ -> False + inTagsIndented "disp-quote" <$> wrappedBlocksToJATS needsWrap opts blocks blockToJATS _ (CodeBlock a str) = return $ inTags False tag attr (flush (text (T.unpack $ escapeStringForXML str))) where (lang, attr) = codeAttr a diff --git a/src/Text/Pandoc/Writers/JATS/Table.hs b/src/Text/Pandoc/Writers/JATS/Table.hs index 2e34900d2..70569bdcd 100644 --- a/src/Text/Pandoc/Writers/JATS/Table.hs +++ b/src/Text/Pandoc/Writers/JATS/Table.hs @@ -34,13 +34,19 @@ tableToJATS :: PandocMonad m -> JATS m (Doc Text) tableToJATS opts (Ann.Table attr caption colspecs thead tbodies tfoot) = do let (Caption _maybeShortCaption captionBlocks) = caption + -- Only paragraphs are allowed in captions, all other blocks must be + -- wrapped in @<p>@ elements. + let needsWrapping = \case + Plain{} -> False + Para{} -> False + _ -> True tbl <- captionlessTable opts attr colspecs thead tbodies tfoot captionDoc <- if null captionBlocks then return empty else do blockToJATS <- asks jatsBlockWriter - inTagsIndented "caption" . vcat <$> - mapM (blockToJATS opts) captionBlocks + inTagsIndented "caption" <$> + blockToJATS needsWrapping opts captionBlocks return $ inTags True "table-wrap" [] $ captionDoc $$ tbl captionlessTable :: PandocMonad m @@ -230,7 +236,7 @@ tableCellToJats opts ctype colAlign (Cell attr align rowspan colspan item) = do inlinesToJats <- asks jatsInlinesWriter let cellContents = \case [Plain inlines] -> inlinesToJats opts inlines - blocks -> vcat <$> mapM (blockToJats opts) blocks + blocks -> blockToJats needsWrapInCell opts blocks let tag' = case ctype of BodyCell -> "td" HeaderCell -> "th" @@ -246,3 +252,17 @@ tableCellToJats opts ctype colAlign (Cell attr align rowspan colspan item) = do . maybeCons (colspanAttrib colspan) $ toAttribs attr validAttribs inTags False tag' attribs <$> cellContents item + +-- | Whether the JATS produced from this block should be wrapped in a +-- @<p>@ element when put directly below a @<td>@ element. +needsWrapInCell :: Block -> Bool +needsWrapInCell = \case + Plain{} -> False -- should be unwrapped anyway + Para{} -> False + BulletList{} -> False + OrderedList{} -> False + DefinitionList{} -> False + HorizontalRule -> False + CodeBlock{} -> False + RawBlock{} -> False -- responsibility of the user + _ -> True diff --git a/src/Text/Pandoc/Writers/JATS/Types.hs b/src/Text/Pandoc/Writers/JATS/Types.hs index 6fdddc0b5..8d8673cf6 100644 --- a/src/Text/Pandoc/Writers/JATS/Types.hs +++ b/src/Text/Pandoc/Writers/JATS/Types.hs @@ -37,11 +37,20 @@ newtype JATSState = JATSState { jatsNotes :: [(Int, Doc Text)] } +-- | Environment containing all information relevant for rendering. data JATSEnv m = JATSEnv - { jatsTagSet :: JATSTagSet + { jatsTagSet :: JATSTagSet -- ^ The tag set that's being ouput + + , jatsBlockWriter :: (Block -> Bool) + -> WriterOptions -> [Block] -> JATS m (Doc Text) + -- ^ Converts a block list to JATS, wrapping top-level blocks into a + -- @<p>@ element if the property evaluates to @True@. + -- See #7227. + , jatsInlinesWriter :: WriterOptions -> [Inline] -> JATS m (Doc Text) - , jatsBlockWriter :: WriterOptions -> Block -> JATS m (Doc Text) - , jatsReferences :: [Reference Inlines] + -- ^ Converts an inline list to JATS. + + , jatsReferences :: [Reference Inlines] -- ^ List of references } -- | JATS writer type diff --git a/test/command/7041.md b/test/command/7041.md new file mode 100644 index 000000000..1773963b8 --- /dev/null +++ b/test/command/7041.md @@ -0,0 +1,23 @@ +``` +% pandoc -f html -t jats +<table> + <tr><td><blockquote>Fly, you fools!</blockquote></td></tr> +</table> +^D +<table-wrap> + <table> + <colgroup> + <col width="100%" /> + </colgroup> + <tbody> + <tr> + <td><p specific-use="wrapper"> + <disp-quote> + <p>Fly, you fools!</p> + </disp-quote> + </p></td> + </tr> + </tbody> + </table> +</table-wrap> +``` diff --git a/test/writer.jats_archiving b/test/writer.jats_archiving index 332b5d3fd..70e15b6b8 100644 --- a/test/writer.jats_archiving +++ b/test/writer.jats_archiving @@ -78,39 +78,31 @@ Gruber’s markdown test suite.</p> <sec id="block-quotes"> <title>Block Quotes</title> <p>E-mail style:</p> - <p specific-use="wrapper"> + <disp-quote> + <p>This is a block quote. It is pretty short.</p> + </disp-quote> + <disp-quote> + <p>Code in a block quote:</p> + <preformat>sub status { + print "working"; +}</preformat> + <p>A list:</p> + <list list-type="order"> + <list-item> + <p>item one</p> + </list-item> + <list-item> + <p>item two</p> + </list-item> + </list> + <p>Nested block quotes:</p> <disp-quote> - <p>This is a block quote. It is pretty short.</p> + <p>nested</p> </disp-quote> - </p> - <p specific-use="wrapper"> <disp-quote> - <p>Code in a block quote:</p> - <preformat>sub status { - print "working"; -}</preformat> - <p>A list:</p> - <list list-type="order"> - <list-item> - <p>item one</p> - </list-item> - <list-item> - <p>item two</p> - </list-item> - </list> - <p>Nested block quotes:</p> - <p specific-use="wrapper"> - <disp-quote> - <p>nested</p> - </disp-quote> - </p> - <p specific-use="wrapper"> - <disp-quote> - <p>nested</p> - </disp-quote> - </p> + <p>nested</p> </disp-quote> - </p> + </disp-quote> <p>This should not be a block quote: 2 > 1.</p> <p>And a following paragraph.</p> </sec> @@ -837,12 +829,10 @@ These should not be escaped: \$ \\ \> \[ \{</preformat> </list-item> </list> <p>An e-mail address: <email>nobody@nowhere.net</email></p> - <p specific-use="wrapper"> - <disp-quote> - <p>Blockquoted: - <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p> - </disp-quote> - </p> + <disp-quote> + <p>Blockquoted: + <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p> + </disp-quote> <p>Auto-links should not occur here: <monospace><http://example.com/></monospace></p> <preformat>or here: <http://example.com/></preformat> @@ -866,11 +856,9 @@ These should not be escaped: \$ \\ \> \[ \{</preformat> <italic>not</italic> be a footnote reference, because it contains a space.[^my note] Here is an inline note.<xref ref-type="fn" rid="fn3">3</xref></p> - <p specific-use="wrapper"> - <disp-quote> - <p>Notes can go in quotes.<xref ref-type="fn" rid="fn4">4</xref></p> - </disp-quote> - </p> + <disp-quote> + <p>Notes can go in quotes.<xref ref-type="fn" rid="fn4">4</xref></p> + </disp-quote> <list list-type="order"> <list-item> <p>And in list items.<xref ref-type="fn" rid="fn5">5</xref></p> diff --git a/test/writer.jats_articleauthoring b/test/writer.jats_articleauthoring index 956a30faa..59485a114 100644 --- a/test/writer.jats_articleauthoring +++ b/test/writer.jats_articleauthoring @@ -67,43 +67,39 @@ Gruber’s markdown test suite.</p> <sec id="block-quotes"> <title>Block Quotes</title> <p>E-mail style:</p> - <p specific-use="wrapper"> - <disp-quote> - <p>This is a block quote. It is pretty short.</p> - </disp-quote> - </p> - <p specific-use="wrapper"> - <disp-quote> - <p>Code in a block quote:</p> - <p specific-use="wrapper"> - <preformat>sub status { + <disp-quote> + <p>This is a block quote. It is pretty short.</p> + </disp-quote> + <disp-quote> + <p>Code in a block quote:</p> + <p specific-use="wrapper"> + <preformat>sub status { print "working"; }</preformat> - </p> - <p>A list:</p> - <p specific-use="wrapper"> - <list list-type="order"> - <list-item> - <p>item one</p> - </list-item> - <list-item> - <p>item two</p> - </list-item> - </list> - </p> - <p>Nested block quotes:</p> - <p specific-use="wrapper"> - <disp-quote> - <p>nested</p> - </disp-quote> - </p> - <p specific-use="wrapper"> - <disp-quote> - <p>nested</p> - </disp-quote> - </p> - </disp-quote> - </p> + </p> + <p>A list:</p> + <p specific-use="wrapper"> + <list list-type="order"> + <list-item> + <p>item one</p> + </list-item> + <list-item> + <p>item two</p> + </list-item> + </list> + </p> + <p>Nested block quotes:</p> + <p specific-use="wrapper"> + <disp-quote> + <p>nested</p> + </disp-quote> + </p> + <p specific-use="wrapper"> + <disp-quote> + <p>nested</p> + </disp-quote> + </p> + </disp-quote> <p>This should not be a block quote: 2 > 1.</p> <p>And a following paragraph.</p> </sec> @@ -817,12 +813,10 @@ These should not be escaped: \$ \\ \> \[ \{</preformat> </list-item> </list> <p>An e-mail address: <email>nobody@nowhere.net</email></p> - <p specific-use="wrapper"> - <disp-quote> - <p>Blockquoted: - <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p> - </disp-quote> - </p> + <disp-quote> + <p>Blockquoted: + <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p> + </disp-quote> <p>Auto-links should not occur here: <monospace><http://example.com/></monospace></p> <preformat>or here: <http://example.com/></preformat> @@ -860,13 +854,11 @@ These should not be escaped: \$ \\ \> \[ \{</preformat> and <monospace>]</monospace> verbatim characters, as well as [bracketed text].</p> </fn></p> - <p specific-use="wrapper"> - <disp-quote> - <p>Notes can go in quotes.<fn> - <p>In quote.</p> - </fn></p> - </disp-quote> - </p> + <disp-quote> + <p>Notes can go in quotes.<fn> + <p>In quote.</p> + </fn></p> + </disp-quote> <list list-type="order"> <list-item> <p>And in list items.<fn> diff --git a/test/writer.jats_publishing b/test/writer.jats_publishing index f53fd554d..e6db4172a 100644 --- a/test/writer.jats_publishing +++ b/test/writer.jats_publishing @@ -78,39 +78,31 @@ Gruber’s markdown test suite.</p> <sec id="block-quotes"> <title>Block Quotes</title> <p>E-mail style:</p> - <p specific-use="wrapper"> + <disp-quote> + <p>This is a block quote. It is pretty short.</p> + </disp-quote> + <disp-quote> + <p>Code in a block quote:</p> + <preformat>sub status { + print "working"; +}</preformat> + <p>A list:</p> + <list list-type="order"> + <list-item> + <p>item one</p> + </list-item> + <list-item> + <p>item two</p> + </list-item> + </list> + <p>Nested block quotes:</p> <disp-quote> - <p>This is a block quote. It is pretty short.</p> + <p>nested</p> </disp-quote> - </p> - <p specific-use="wrapper"> <disp-quote> - <p>Code in a block quote:</p> - <preformat>sub status { - print "working"; -}</preformat> - <p>A list:</p> - <list list-type="order"> - <list-item> - <p>item one</p> - </list-item> - <list-item> - <p>item two</p> - </list-item> - </list> - <p>Nested block quotes:</p> - <p specific-use="wrapper"> - <disp-quote> - <p>nested</p> - </disp-quote> - </p> - <p specific-use="wrapper"> - <disp-quote> - <p>nested</p> - </disp-quote> - </p> + <p>nested</p> </disp-quote> - </p> + </disp-quote> <p>This should not be a block quote: 2 > 1.</p> <p>And a following paragraph.</p> </sec> @@ -837,12 +829,10 @@ These should not be escaped: \$ \\ \> \[ \{</preformat> </list-item> </list> <p>An e-mail address: <email>nobody@nowhere.net</email></p> - <p specific-use="wrapper"> - <disp-quote> - <p>Blockquoted: - <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p> - </disp-quote> - </p> + <disp-quote> + <p>Blockquoted: + <ext-link ext-link-type="uri" xlink:href="http://example.com/">http://example.com/</ext-link></p> + </disp-quote> <p>Auto-links should not occur here: <monospace><http://example.com/></monospace></p> <preformat>or here: <http://example.com/></preformat> @@ -866,11 +856,9 @@ These should not be escaped: \$ \\ \> \[ \{</preformat> <italic>not</italic> be a footnote reference, because it contains a space.[^my note] Here is an inline note.<xref ref-type="fn" rid="fn3">3</xref></p> - <p specific-use="wrapper"> - <disp-quote> - <p>Notes can go in quotes.<xref ref-type="fn" rid="fn4">4</xref></p> - </disp-quote> - </p> + <disp-quote> + <p>Notes can go in quotes.<xref ref-type="fn" rid="fn4">4</xref></p> + </disp-quote> <list list-type="order"> <list-item> <p>And in list items.<xref ref-type="fn" rid="fn5">5</xref></p> -- cgit v1.2.3 From 7ba8c0d2a5e2b89ae1547759510b2ee21de88cb1 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 11 Apr 2021 21:28:19 -0700 Subject: Move getLang from BCP47 -> T.P.Writers.Shared. [API change] --- pandoc.cabal | 1 + src/Text/Pandoc/BCP47.hs | 13 ---- src/Text/Pandoc/Citeproc/Data.hs | 5 +- src/Text/Pandoc/Shared.hs | 1 - src/Text/Pandoc/Writers/LaTeX/Lang.hs | 117 +++++++++++++++++----------------- src/Text/Pandoc/Writers/Shared.hs | 14 ++++ 6 files changed, 77 insertions(+), 74 deletions(-) (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index c8ef3cfb9..b6cbb0d7a 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -493,6 +493,7 @@ library unordered-containers >= 0.2 && < 0.3, xml >= 1.3.12 && < 1.4, xml-conduit >= 1.9.1.1 && < 1.10, + unicode-collation >= 0.1 && < 0.2, zip-archive >= 0.2.3.4 && < 0.5, zlib >= 0.5 && < 0.7 if os(windows) && arch(i386) diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs index 69824aa57..1ecf0bf73 100644 --- a/src/Text/Pandoc/BCP47.hs +++ b/src/Text/Pandoc/BCP47.hs @@ -37,19 +37,6 @@ renderLang :: Lang -> T.Text renderLang lang = T.intercalate "-" (langLanguage lang : filter (not . T.null) ([langScript lang, langRegion lang] ++ langVariants lang)) --- | Get the contents of the `lang` metadata field or variable. -getLang :: WriterOptions -> Meta -> Maybe T.Text -getLang opts meta = - case lookupContext "lang" (writerVariables opts) of - Just s -> Just s - _ -> - case lookupMeta "lang" meta of - Just (MetaBlocks [Para [Str s]]) -> Just s - Just (MetaBlocks [Plain [Str s]]) -> Just s - Just (MetaInlines [Str s]) -> Just s - Just (MetaString s) -> Just s - _ -> Nothing - -- | Parse a BCP 47 string as a Lang. Currently we parse -- extensions and private-use fields as "variants," even -- though officially they aren't. diff --git a/src/Text/Pandoc/Citeproc/Data.hs b/src/Text/Pandoc/Citeproc/Data.hs index dfdaf2598..40430b0f5 100644 --- a/src/Text/Pandoc/Citeproc/Data.hs +++ b/src/Text/Pandoc/Citeproc/Data.hs @@ -10,7 +10,7 @@ import qualified Data.Text.Encoding as TE import qualified Data.Text as T import Data.Text (Text) import Text.Pandoc.Citeproc.Util (toIETF) -import Citeproc (Lang(..), parseLang) +import UnicodeCollation.Lang (Lang(..), parseLang) biblatexLocalizations :: [(FilePath, ByteString)] biblatexLocalizations = $(embedDir "citeproc/biblatex-localization") @@ -21,7 +21,8 @@ biblatexStringMap :: M.Map Text (M.Map Text (Text, Text)) biblatexStringMap = foldr go mempty biblatexLocalizations where go (fp, bs) = - let Lang lang _ = parseLang (toIETF $ T.takeWhile (/= '.') $ T.pack fp) + let Lang lang _ _ _ _ _ = parseLang + (toIETF $ T.takeWhile (/= '.') $ T.pack fp) ls = T.lines $ TE.decodeUtf8 bs in if length ls > 4 then M.insert lang (toStringMap $ map (T.splitOn "|") ls) diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 95cbdc8b8..e389c1727 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -852,7 +852,6 @@ filteredFilesFromArchive zf f = fileAndBinary :: Archive -> FilePath -> Maybe (FilePath, BL.ByteString) fileAndBinary a fp = findEntryByPath fp a >>= \e -> Just (fp, fromEntry e) - -- -- IANA URIs -- diff --git a/src/Text/Pandoc/Writers/LaTeX/Lang.hs b/src/Text/Pandoc/Writers/LaTeX/Lang.hs index 41aafee48..871b2692a 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Lang.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Lang.hs @@ -15,7 +15,7 @@ module Text.Pandoc.Writers.LaTeX.Lang toBabel ) where import Data.Text (Text) -import Text.Pandoc.BCP47 (Lang (..)) +import UnicodeCollation.Lang (Lang(..)) -- In environments \Arabic instead of \arabic is used @@ -25,88 +25,89 @@ toPolyglossiaEnv l = ("arabic", o) -> ("Arabic", o) x -> x --- Takes a list of the constituents of a BCP 47 language code and +-- Takes a list of the constituents of a BCP47 language code and -- converts it to a Polyglossia (language, options) tuple -- http://mirrors.ctan.org/macros/latex/contrib/polyglossia/polyglossia.pdf toPolyglossia :: Lang -> (Text, Text) -toPolyglossia (Lang "ar" _ "DZ" _) = ("arabic", "locale=algeria") -toPolyglossia (Lang "ar" _ "IQ" _) = ("arabic", "locale=mashriq") -toPolyglossia (Lang "ar" _ "JO" _) = ("arabic", "locale=mashriq") -toPolyglossia (Lang "ar" _ "LB" _) = ("arabic", "locale=mashriq") -toPolyglossia (Lang "ar" _ "LY" _) = ("arabic", "locale=libya") -toPolyglossia (Lang "ar" _ "MA" _) = ("arabic", "locale=morocco") -toPolyglossia (Lang "ar" _ "MR" _) = ("arabic", "locale=mauritania") -toPolyglossia (Lang "ar" _ "PS" _) = ("arabic", "locale=mashriq") -toPolyglossia (Lang "ar" _ "SY" _) = ("arabic", "locale=mashriq") -toPolyglossia (Lang "ar" _ "TN" _) = ("arabic", "locale=tunisia") -toPolyglossia (Lang "de" _ _ vars) - | "1901" `elem` vars = ("german", "spelling=old") -toPolyglossia (Lang "de" _ "AT" vars) - | "1901" `elem` vars = ("german", "variant=austrian, spelling=old") -toPolyglossia (Lang "de" _ "AT" _) = ("german", "variant=austrian") -toPolyglossia (Lang "de" _ "CH" vars) - | "1901" `elem` vars = ("german", "variant=swiss, spelling=old") -toPolyglossia (Lang "de" _ "CH" _) = ("german", "variant=swiss") -toPolyglossia (Lang "de" _ _ _) = ("german", "") -toPolyglossia (Lang "dsb" _ _ _) = ("lsorbian", "") -toPolyglossia (Lang "el" _ "polyton" _) = ("greek", "variant=poly") -toPolyglossia (Lang "en" _ "AU" _) = ("english", "variant=australian") -toPolyglossia (Lang "en" _ "CA" _) = ("english", "variant=canadian") -toPolyglossia (Lang "en" _ "GB" _) = ("english", "variant=british") -toPolyglossia (Lang "en" _ "NZ" _) = ("english", "variant=newzealand") -toPolyglossia (Lang "en" _ "UK" _) = ("english", "variant=british") -toPolyglossia (Lang "en" _ "US" _) = ("english", "variant=american") -toPolyglossia (Lang "grc" _ _ _) = ("greek", "variant=ancient") -toPolyglossia (Lang "hsb" _ _ _) = ("usorbian", "") -toPolyglossia (Lang "la" _ _ vars) - | "x-classic" `elem` vars = ("latin", "variant=classic") -toPolyglossia (Lang "pt" _ "BR" _) = ("portuguese", "variant=brazilian") -toPolyglossia (Lang "sl" _ _ _) = ("slovenian", "") +toPolyglossia (Lang "ar" _ (Just "DZ") _ _ _) = ("arabic", "locale=algeria") +toPolyglossia (Lang "ar" _ (Just "IQ") _ _ _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ (Just "JO") _ _ _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ (Just "LB") _ _ _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ (Just "LY") _ _ _) = ("arabic", "locale=libya") +toPolyglossia (Lang "ar" _ (Just "MA") _ _ _) = ("arabic", "locale=morocco") +toPolyglossia (Lang "ar" _ (Just "MR") _ _ _) = ("arabic", "locale=mauritania") +toPolyglossia (Lang "ar" _ (Just "PS") _ _ _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ (Just "SY") _ _ _) = ("arabic", "locale=mashriq") +toPolyglossia (Lang "ar" _ (Just "TN") _ _ _) = ("arabic", "locale=tunisia") +toPolyglossia (Lang "de" _ _ vars _ _) + | "1901" `elem` vars = ("german", "spelling=old") +toPolyglossia (Lang "de" _ (Just "AT") vars _ _) + | "1901" `elem` vars = ("german", "variant=austrian, spelling=old") +toPolyglossia (Lang "de" _ (Just "AT") _ _ _) = ("german", "variant=austrian") +toPolyglossia (Lang "de" _ (Just "CH") vars _ _) + | "1901" `elem` vars = ("german", "variant=swiss, spelling=old") +toPolyglossia (Lang "de" _ (Just "CH") _ _ _ _) = ("german", "variant=swiss") +toPolyglossia (Lang "de" _ _ _ _ _) = ("german", "") +toPolyglossia (Lang "dsb" _ _ _ _ _) = ("lsorbian", "") +toPolyglossia (Lang "el" _ _ vars _ _) + | "polyton" `elem` vars = ("greek", "variant=poly") +toPolyglossia (Lang "en" _ (Just "AU") _ _ _) = ("english", "variant=australian") +toPolyglossia (Lang "en" _ (Just "CA") _ _ _) = ("english", "variant=canadian") +toPolyglossia (Lang "en" _ (Just "GB") _ _ _) = ("english", "variant=british") +toPolyglossia (Lang "en" _ (Just "NZ") _ _ _) = ("english", "variant=newzealand") +toPolyglossia (Lang "en" _ (Just "UK") _ _ _) = ("english", "variant=british") +toPolyglossia (Lang "en" _ (Just "US") _ _ _) = ("english", "variant=american") +toPolyglossia (Lang "grc" _ _ _ _ _) = ("greek", "variant=ancient") +toPolyglossia (Lang "hsb" _ _ _ _ _) = ("usorbian", "") +toPolyglossia (Lang "la" _ _ vars _ _) + | "x-classic" `elem` vars = ("latin", "variant=classic") +toPolyglossia (Lang "pt" _ "BR" _ _ _) = ("portuguese", "variant=brazilian") +toPolyglossia (Lang "sl" _ _ _ _ _) = ("slovenian", "") toPolyglossia x = (commonFromBcp47 x, "") --- Takes a list of the constituents of a BCP 47 language code and +-- Takes a list of the constituents of a BCP47 language code and -- converts it to a Babel language string. -- http://mirrors.ctan.org/macros/latex/required/babel/base/babel.pdf -- List of supported languages (slightly outdated): -- http://tug.ctan.org/language/hyph-utf8/doc/generic/hyph-utf8/hyphenation.pdf toBabel :: Lang -> Text -toBabel (Lang "de" _ "AT" vars) +toBabel (Lang "de" _ (Just "AT") vars _ _) | "1901" `elem` vars = "austrian" | otherwise = "naustrian" -toBabel (Lang "de" _ "CH" vars) +toBabel (Lang "de" _ (Just "CH") vars _ _) | "1901" `elem` vars = "swissgerman" | otherwise = "nswissgerman" -toBabel (Lang "de" _ _ vars) +toBabel (Lang "de" _ _ vars _ _) | "1901" `elem` vars = "german" | otherwise = "ngerman" -toBabel (Lang "dsb" _ _ _) = "lowersorbian" +toBabel (Lang "dsb" _ _ _ _ _) = "lowersorbian" toBabel (Lang "el" _ _ vars) | "polyton" `elem` vars = "polutonikogreek" -toBabel (Lang "en" _ "AU" _) = "australian" -toBabel (Lang "en" _ "CA" _) = "canadian" -toBabel (Lang "en" _ "GB" _) = "british" -toBabel (Lang "en" _ "NZ" _) = "newzealand" -toBabel (Lang "en" _ "UK" _) = "british" -toBabel (Lang "en" _ "US" _) = "american" -toBabel (Lang "fr" _ "CA" _) = "canadien" -toBabel (Lang "fra" _ _ vars) +toBabel (Lang "en" _ (Just "AU") _ _ _) = "australian" +toBabel (Lang "en" _ (Just "CA") _ _ _) = "canadian" +toBabel (Lang "en" _ (Just "GB") _ _ _) = "british" +toBabel (Lang "en" _ (Just "NZ") _ _ _) = "newzealand" +toBabel (Lang "en" _ (Just "UK") _ _ _) = "british" +toBabel (Lang "en" _ (Just "US") _ _ _) = "american" +toBabel (Lang "fr" _ (Just "CA") _ _ _) = "canadien" +toBabel (Lang "fra" _ _ vars _ _) | "aca" `elem` vars = "acadian" -toBabel (Lang "grc" _ _ _) = "polutonikogreek" -toBabel (Lang "hsb" _ _ _) = "uppersorbian" -toBabel (Lang "la" _ _ vars) +toBabel (Lang "grc" _ _ _ _ _) = "polutonikogreek" +toBabel (Lang "hsb" _ _ _ _ _) = "uppersorbian" +toBabel (Lang "la" _ _ vars _ _) | "x-classic" `elem` vars = "classiclatin" -toBabel (Lang "pt" _ "BR" _) = "brazilian" -toBabel (Lang "sl" _ _ _) = "slovene" +toBabel (Lang "pt" _ (Just "BR") _ _ _) = "brazilian" +toBabel (Lang "sl" _ _ _ _ _) = "slovene" toBabel x = commonFromBcp47 x --- Takes a list of the constituents of a BCP 47 language code +-- Takes a list of the constituents of a BCP47 language code -- and converts it to a string shared by Babel and Polyglossia. -- https://tools.ietf.org/html/bcp47#section-2.1 commonFromBcp47 :: Lang -> Text -commonFromBcp47 (Lang "sr" "Cyrl" _ _) = "serbianc" -commonFromBcp47 (Lang "zh" "Latn" _ vars) - | "pinyin" `elem` vars = "pinyin" -commonFromBcp47 (Lang l _ _ _) = fromIso l +commonFromBcp47 (Lang "sr" (Just "Cyrl") _ _ _ _) = "serbianc" +commonFromBcp47 (Lang "zh" (Just "Latn") _ vars _ _) + | "pinyin" `elem` vars = "pinyin" +commonFromBcp47 (Lang l _ _ _ _ _) = fromIso l where fromIso "af" = "afrikaans" fromIso "am" = "amharic" diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index 91ecb310b..fcb47bd5a 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -20,6 +20,7 @@ module Text.Pandoc.Writers.Shared ( , setField , resetField , defField + , getLang , tagWithAttrs , isDisplayMath , fixDisplayMath @@ -147,6 +148,19 @@ defField field val (Context m) = where f _newval oldval = oldval +-- | Get the contents of the `lang` metadata field or variable. +getLang :: WriterOptions -> Meta -> Maybe Text +getLang opts meta = + case lookupContext "lang" (writerVariables opts) of + Just s -> Just s + _ -> + case lookupMeta "lang" meta of + Just (MetaBlocks [Para [Str s]]) -> Just s + Just (MetaBlocks [Plain [Str s]]) -> Just s + Just (MetaInlines [Str s]) -> Just s + Just (MetaString s) -> Just s + _ -> Nothing + -- | Produce an HTML tag with the given pandoc attributes. tagWithAttrs :: HasChars a => T.Text -> Attr -> Doc a tagWithAttrs tag (ident,classes,kvs) = hsep -- cgit v1.2.3 From aecbf8156eb7c36c4b41de27797e262c23728db5 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 11 Apr 2021 21:28:48 -0700 Subject: Remove Text.Pandoc.BCP47 module. [API change] Use Lang from UnicodeCollation.Lang instead. This is a richer implementation of BCP 47. --- pandoc.cabal | 1 - src/Text/Pandoc/App.hs | 6 +- src/Text/Pandoc/BCP47.hs | 99 ------------- src/Text/Pandoc/Citeproc.hs | 10 +- src/Text/Pandoc/Citeproc/BibTeX.hs | 20 +-- src/Text/Pandoc/Citeproc/Data.hs | 12 +- src/Text/Pandoc/Class/CommonState.hs | 2 +- src/Text/Pandoc/Class/PandocMonad.hs | 4 +- src/Text/Pandoc/Readers/BibTeX.hs | 9 +- src/Text/Pandoc/Readers/LaTeX.hs | 2 +- src/Text/Pandoc/Readers/LaTeX/Lang.hs | 241 ++++++++++++++++---------------- src/Text/Pandoc/Writers/BibTeX.hs | 2 +- src/Text/Pandoc/Writers/ConTeXt.hs | 48 +++---- src/Text/Pandoc/Writers/CslJson.hs | 7 +- src/Text/Pandoc/Writers/Docx.hs | 2 +- src/Text/Pandoc/Writers/LaTeX.hs | 2 +- src/Text/Pandoc/Writers/LaTeX/Lang.hs | 8 +- src/Text/Pandoc/Writers/ODT.hs | 7 +- src/Text/Pandoc/Writers/OpenDocument.hs | 8 +- src/Text/Pandoc/Writers/Shared.hs | 2 +- 20 files changed, 198 insertions(+), 294 deletions(-) delete mode 100644 src/Text/Pandoc/BCP47.hs (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index b6cbb0d7a..8816767e9 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -601,7 +601,6 @@ library Text.Pandoc.Asciify, Text.Pandoc.Emoji, Text.Pandoc.ImageSize, - Text.Pandoc.BCP47, Text.Pandoc.Class, Text.Pandoc.Citeproc other-modules: Text.Pandoc.App.CommandLineOptions, diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 6b45e5418..67d3cce7d 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -55,7 +55,7 @@ import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts, import Text.Pandoc.App.CommandLineOptions (parseOptions, parseOptionsFromArgs, options) import Text.Pandoc.App.OutputSettings (OutputSettings (..), optToOutputSettings) -import Text.Pandoc.BCP47 (Lang (..), parseBCP47) +import UnicodeCollation.Lang (Lang (..), parseLang) import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters) import Text.Pandoc.PDF (makePDF) import Text.Pandoc.SelfContained (makeSelfContained) @@ -200,8 +200,8 @@ convertWithOpts opts = do Just f -> readFileStrict f case lookupMetaString "lang" (optMetadata opts) of - "" -> setTranslations $ Lang "en" "" "US" [] - l -> case parseBCP47 l of + "" -> setTranslations $ Lang "en" Nothing (Just "US") [] [] [] + l -> case parseLang l of Left _ -> report $ InvalidLang l Right l' -> setTranslations l' diff --git a/src/Text/Pandoc/BCP47.hs b/src/Text/Pandoc/BCP47.hs deleted file mode 100644 index 1ecf0bf73..000000000 --- a/src/Text/Pandoc/BCP47.hs +++ /dev/null @@ -1,99 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{- | - Module : Text.Pandoc.BCP47 - Copyright : Copyright (C) 2017-2021 John MacFarlane - License : GNU GPL, version 2 or above - - Maintainer : John MacFarlane <jgm@berkeley.edu> - Stability : alpha - Portability : portable - -Functions for parsing and rendering BCP47 language identifiers. --} -module Text.Pandoc.BCP47 ( - getLang - , parseBCP47 - , Lang(..) - , renderLang - ) -where -import Control.Monad (guard) -import Data.Char (isAlphaNum, isAscii, isLetter, isLower, isUpper) -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.DocTemplates (FromContext(..)) -import qualified Data.Text as T -import qualified Text.Parsec as P - --- | Represents BCP 47 language/country code. -data Lang = Lang{ langLanguage :: T.Text - , langScript :: T.Text - , langRegion :: T.Text - , langVariants :: [T.Text] } - deriving (Eq, Ord, Show) - --- | Render a Lang as BCP 47. -renderLang :: Lang -> T.Text -renderLang lang = T.intercalate "-" (langLanguage lang : filter (not . T.null) - ([langScript lang, langRegion lang] ++ langVariants lang)) - --- | Parse a BCP 47 string as a Lang. Currently we parse --- extensions and private-use fields as "variants," even --- though officially they aren't. -parseBCP47 :: T.Text -> Either T.Text Lang -parseBCP47 lang = - case P.parse bcp47 "lang" lang of - Right r -> Right r - Left e -> Left $ T.pack $ show e - where bcp47 = do - language <- pLanguage - script <- P.option "" pScript - region <- P.option "" pRegion - variants <- P.many (pVariant P.<|> pExtension P.<|> pPrivateUse) - P.eof - return Lang{ langLanguage = language - , langScript = script - , langRegion = region - , langVariants = variants } - asciiLetter = P.satisfy (\c -> isAscii c && isLetter c) - pLanguage = do - cs <- P.many1 asciiLetter - let lcs = length cs - guard $ lcs == 2 || lcs == 3 - return $ T.toLower $ T.pack cs - pScript = P.try $ do - P.char '-' - x <- P.satisfy (\c -> isAscii c && isLetter c && isUpper c) - xs <- P.count 3 - (P.satisfy (\c -> isAscii c && isLetter c && isLower c)) - return $ T.toLower $ T.pack (x:xs) - pRegion = P.try $ do - P.char '-' - cs <- P.many1 asciiLetter - let lcs = length cs - guard $ lcs == 2 || lcs == 3 - return $ T.toUpper $ T.pack cs - pVariant = P.try $ do - P.char '-' - ds <- P.option "" (P.count 1 P.digit) - cs <- P.many1 asciiLetter - let var = ds ++ cs - lv = length var - guard $ if null ds - then lv >= 5 && lv <= 8 - else lv == 4 - return $ T.toLower $ T.pack var - pExtension = P.try $ do - P.char '-' - cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c) - let lcs = length cs - guard $ lcs >= 2 && lcs <= 8 - return $ T.toLower $ T.pack cs - pPrivateUse = P.try $ do - P.char '-' - P.char 'x' - P.char '-' - cs <- P.many1 $ P.satisfy (\c -> isAscii c && isAlphaNum c) - guard $ not (null cs) && length cs <= 8 - let var = "x-" ++ cs - return $ T.toLower $ T.pack var diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index af302f782..c9f1806e4 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -18,7 +18,6 @@ import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences) import Text.Pandoc.Citeproc.BibTeX (readBibtexString, Variant(..)) import Text.Pandoc.Citeproc.MetaValue (metaValueToReference, metaValueToText) import Text.Pandoc.Readers.Markdown (yamlToRefs) -import qualified Text.Pandoc.BCP47 as BCP47 import Text.Pandoc.Builder (Inlines, Many(..), deleteMeta, setMeta) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Definition as Pandoc @@ -630,13 +629,8 @@ removeFinalPeriod ils = bcp47LangToIETF :: PandocMonad m => Text -> m (Maybe Lang) bcp47LangToIETF bcplang = - case BCP47.parseBCP47 bcplang of + case parseLang bcplang of Left _ -> do report $ InvalidLang bcplang return Nothing - Right lang -> - return $ Just - $ Lang (BCP47.langLanguage lang) - (if T.null (BCP47.langRegion lang) - then Nothing - else Just (BCP47.langRegion lang)) + Right lang -> return $ Just lang diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs index c0752dadc..510e56f9c 100644 --- a/src/Text/Pandoc/Citeproc/BibTeX.hs +++ b/src/Text/Pandoc/Citeproc/BibTeX.hs @@ -205,10 +205,13 @@ writeBibtexString opts variant mblang ref = [ (", " <>) <$> nameGiven name, nameDroppingParticle name ] - mblang' = (parseLang <$> getVariableAsText "language") <|> mblang + mblang' = case getVariableAsText "language" of + Just l -> either (const Nothing) Just $ parseLang l + Nothing -> mblang titlecase = case mblang' of - Just (Lang "en" _) -> titlecase' + Just lang | langLanguage lang == "en" + -> titlecase' Nothing -> titlecase' _ -> case variant of @@ -331,7 +334,7 @@ writeBibtexString opts variant mblang ref = renderFields = mconcat . intersperse ("," <> cr) . mapMaybe renderField defaultLang :: Lang -defaultLang = Lang "en" (Just "US") +defaultLang = Lang "en" Nothing (Just "US") [] [] [] -- a map of bibtex "string" macros type StringMap = Map.Map Text Text @@ -351,9 +354,7 @@ itemToReference locale variant item = do bib item $ do let lang = fromMaybe defaultLang $ localeLanguage locale modify $ \st -> st{ localeLang = lang, - untitlecase = case lang of - (Lang "en" _) -> True - _ -> False } + untitlecase = langLanguage lang == "en" } id' <- asks identifier otherIds <- (Just <$> getRawField "ids") @@ -711,7 +712,7 @@ itemToReference locale variant item = do bib :: Item -> Bib a -> BibParser a -bib entry m = fst <$> evalRWST m entry (BibState True (Lang "en" (Just "US"))) +bib entry m = fst <$> evalRWST m entry (BibState True defaultLang) resolveCrossRefs :: Variant -> [Item] -> [Item] resolveCrossRefs variant entries = @@ -1456,8 +1457,9 @@ resolveKey lang ils = Walk.walk go ils go x = x resolveKey' :: Lang -> Text -> Text -resolveKey' lang@(Lang l _) k = - case Map.lookup l biblatexStringMap >>= Map.lookup (T.toLower k) of +resolveKey' lang k = + case Map.lookup (langLanguage lang) biblatexStringMap >>= + Map.lookup (T.toLower k) of Nothing -> k Just (x, _) -> either (const k) stringify $ parseLaTeX lang x diff --git a/src/Text/Pandoc/Citeproc/Data.hs b/src/Text/Pandoc/Citeproc/Data.hs index 40430b0f5..388b9ba62 100644 --- a/src/Text/Pandoc/Citeproc/Data.hs +++ b/src/Text/Pandoc/Citeproc/Data.hs @@ -21,12 +21,12 @@ biblatexStringMap :: M.Map Text (M.Map Text (Text, Text)) biblatexStringMap = foldr go mempty biblatexLocalizations where go (fp, bs) = - let Lang lang _ _ _ _ _ = parseLang - (toIETF $ T.takeWhile (/= '.') $ T.pack fp) - ls = T.lines $ TE.decodeUtf8 bs - in if length ls > 4 - then M.insert lang (toStringMap $ map (T.splitOn "|") ls) - else id + let ls = T.lines $ TE.decodeUtf8 bs + in case parseLang (toIETF $ T.takeWhile (/= '.') $ T.pack fp) of + Right lang | length ls > 4 + -> M.insert (langLanguage lang) + (toStringMap $ map (T.splitOn "|") ls) + _ -> id toStringMap = foldr go' mempty go' [term, x, y] = M.insert term (x, y) go' _ = id diff --git a/src/Text/Pandoc/Class/CommonState.hs b/src/Text/Pandoc/Class/CommonState.hs index 7e1735c2b..0fd094d99 100644 --- a/src/Text/Pandoc/Class/CommonState.hs +++ b/src/Text/Pandoc/Class/CommonState.hs @@ -19,7 +19,7 @@ where import Data.Default (Default (def)) import Data.Text (Text) -import Text.Pandoc.BCP47 (Lang) +import UnicodeCollation.Lang (Lang) import Text.Pandoc.MediaBag (MediaBag) import Text.Pandoc.Logging (LogMessage, Verbosity (WARNING)) import Text.Pandoc.Translations (Translations) diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index 293a822a0..76f1fa32b 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -70,7 +70,7 @@ import Network.URI ( escapeURIString, nonStrictRelativeTo, import System.FilePath ((</>), (<.>), takeExtension, dropExtension, isRelative, splitDirectories) import System.Random (StdGen) -import Text.Pandoc.BCP47 (Lang(..), parseBCP47, renderLang) +import UnicodeCollation.Lang (Lang(..), parseLang, renderLang) import Text.Pandoc.Class.CommonState (CommonState (..)) import Text.Pandoc.Definition import Text.Pandoc.Error @@ -285,7 +285,7 @@ readFileFromDirs (d:ds) f = catchError toLang :: PandocMonad m => Maybe T.Text -> m (Maybe Lang) toLang Nothing = return Nothing toLang (Just s) = - case parseBCP47 s of + case parseLang s of Left _ -> do report $ InvalidLang s return Nothing diff --git a/src/Text/Pandoc/Readers/BibTeX.hs b/src/Text/Pandoc/Readers/BibTeX.hs index 956b9f1f7..b82a81350 100644 --- a/src/Text/Pandoc/Readers/BibTeX.hs +++ b/src/Text/Pandoc/Readers/BibTeX.hs @@ -48,11 +48,14 @@ readBibLaTeX = readBibTeX' BibTeX.Biblatex readBibTeX' :: PandocMonad m => Variant -> ReaderOptions -> Text -> m Pandoc readBibTeX' variant _opts t = do - lang <- maybe (Lang "en" (Just "US")) parseLang - <$> lookupEnv "LANG" + mblangEnv <- lookupEnv "LANG" + let defaultLang = Lang "en" Nothing (Just "US") [] [] [] + let lang = case mblangEnv of + Nothing -> defaultLang + Just l -> either (const defaultLang) id $ parseLang l locale <- case getLocale lang of Left e -> - case getLocale (Lang "en" (Just "US")) of + case getLocale (Lang "en" Nothing (Just "US") [] [] []) of Right l -> return l Left _ -> throwError $ PandocCiteprocError e Right l -> return l diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 851756065..83caf742a 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -33,7 +33,7 @@ import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T import System.FilePath (addExtension, replaceExtension, takeExtension) -import Text.Pandoc.BCP47 (renderLang) +import UnicodeCollation.Lang (renderLang) import Text.Pandoc.Builder as B import Text.Pandoc.Class.PandocPure (PandocPure) import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath, diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs index 08e217bdb..b92e6ab57 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs @@ -23,7 +23,7 @@ import qualified Data.Map as M import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.Shared (extractSpaces) -import Text.Pandoc.BCP47 (Lang(..), renderLang) +import UnicodeCollation.Lang (Lang(..), renderLang) import Text.Pandoc.Class (PandocMonad(..), setTranslations) import Text.Pandoc.Readers.LaTeX.Parsing import Text.Pandoc.Parsing (updateState, option, getState, QuoteContext(..), @@ -99,133 +99,136 @@ setDefaultLanguage = do polyglossiaLangToBCP47 :: M.Map T.Text (T.Text -> Lang) polyglossiaLangToBCP47 = M.fromList [ ("arabic", \o -> case T.filter (/=' ') o of - "locale=algeria" -> Lang "ar" "" "DZ" [] - "locale=mashriq" -> Lang "ar" "" "SY" [] - "locale=libya" -> Lang "ar" "" "LY" [] - "locale=morocco" -> Lang "ar" "" "MA" [] - "locale=mauritania" -> Lang "ar" "" "MR" [] - "locale=tunisia" -> Lang "ar" "" "TN" [] - _ -> Lang "ar" "" "" []) + "locale=algeria" -> Lang "ar" Nothing (Just "DZ") [] [] [] + "locale=mashriq" -> Lang "ar" Nothing (Just "SY") [] [] [] + "locale=libya" -> Lang "ar" Nothing (Just "LY") [] [] [] + "locale=morocco" -> Lang "ar" Nothing (Just "MA") [] [] [] + "locale=mauritania" -> Lang "ar" Nothing (Just "MR") [] [] [] + "locale=tunisia" -> Lang "ar" Nothing (Just "TN") [] [] [] + _ -> Lang "ar" Nothing (Just "") [] [] []) , ("german", \o -> case T.filter (/=' ') o of - "spelling=old" -> Lang "de" "" "DE" ["1901"] + "spelling=old" -> Lang "de" Nothing (Just "DE") ["1901"] [] [] "variant=austrian,spelling=old" - -> Lang "de" "" "AT" ["1901"] - "variant=austrian" -> Lang "de" "" "AT" [] + -> Lang "de" Nothing (Just "AT") ["1901"] [] [] + "variant=austrian" -> Lang "de" Nothing (Just "AT") [] [] [] "variant=swiss,spelling=old" - -> Lang "de" "" "CH" ["1901"] - "variant=swiss" -> Lang "de" "" "CH" [] - _ -> Lang "de" "" "" []) - , ("lsorbian", \_ -> Lang "dsb" "" "" []) + -> Lang "de" Nothing (Just "CH") ["1901"] [] [] + "variant=swiss" -> Lang "de" Nothing (Just "CH") [] [] [] + _ -> Lang "de" Nothing Nothing [] [] []) + , ("lsorbian", \_ -> Lang "dsb" Nothing Nothing [] [] []) , ("greek", \o -> case T.filter (/=' ') o of - "variant=poly" -> Lang "el" "" "polyton" [] - "variant=ancient" -> Lang "grc" "" "" [] - _ -> Lang "el" "" "" []) + "variant=poly" -> Lang "el" Nothing (Just "polyton") [] [] [] + "variant=ancient" -> Lang "grc" Nothing Nothing [] [] [] + _ -> Lang "el" Nothing Nothing [] [] []) , ("english", \o -> case T.filter (/=' ') o of - "variant=australian" -> Lang "en" "" "AU" [] - "variant=canadian" -> Lang "en" "" "CA" [] - "variant=british" -> Lang "en" "" "GB" [] - "variant=newzealand" -> Lang "en" "" "NZ" [] - "variant=american" -> Lang "en" "" "US" [] - _ -> Lang "en" "" "" []) - , ("usorbian", \_ -> Lang "hsb" "" "" []) + "variant=australian" -> Lang "en" Nothing (Just "AU") [] [] [] + "variant=canadian" -> Lang "en" Nothing (Just "CA") [] [] [] + "variant=british" -> Lang "en" Nothing (Just "GB") [] [] [] + "variant=newzealand" -> Lang "en" Nothing (Just "NZ") [] [] [] + "variant=american" -> Lang "en" Nothing (Just "US") [] [] [] + _ -> Lang "en" Nothing (Just "") [] [] []) + , ("usorbian", \_ -> Lang "hsb" Nothing Nothing [] [] []) , ("latin", \o -> case T.filter (/=' ') o of - "variant=classic" -> Lang "la" "" "" ["x-classic"] - _ -> Lang "la" "" "" []) - , ("slovenian", \_ -> Lang "sl" "" "" []) - , ("serbianc", \_ -> Lang "sr" "cyrl" "" []) - , ("pinyin", \_ -> Lang "zh" "Latn" "" ["pinyin"]) - , ("afrikaans", \_ -> Lang "af" "" "" []) - , ("amharic", \_ -> Lang "am" "" "" []) - , ("assamese", \_ -> Lang "as" "" "" []) - , ("asturian", \_ -> Lang "ast" "" "" []) - , ("bulgarian", \_ -> Lang "bg" "" "" []) - , ("bengali", \_ -> Lang "bn" "" "" []) - , ("tibetan", \_ -> Lang "bo" "" "" []) - , ("breton", \_ -> Lang "br" "" "" []) - , ("catalan", \_ -> Lang "ca" "" "" []) - , ("welsh", \_ -> Lang "cy" "" "" []) - , ("czech", \_ -> Lang "cs" "" "" []) - , ("coptic", \_ -> Lang "cop" "" "" []) - , ("danish", \_ -> Lang "da" "" "" []) - , ("divehi", \_ -> Lang "dv" "" "" []) - , ("esperanto", \_ -> Lang "eo" "" "" []) - , ("spanish", \_ -> Lang "es" "" "" []) - , ("estonian", \_ -> Lang "et" "" "" []) - , ("basque", \_ -> Lang "eu" "" "" []) - , ("farsi", \_ -> Lang "fa" "" "" []) - , ("finnish", \_ -> Lang "fi" "" "" []) - , ("french", \_ -> Lang "fr" "" "" []) - , ("friulan", \_ -> Lang "fur" "" "" []) - , ("irish", \_ -> Lang "ga" "" "" []) - , ("scottish", \_ -> Lang "gd" "" "" []) - , ("ethiopic", \_ -> Lang "gez" "" "" []) - , ("galician", \_ -> Lang "gl" "" "" []) - , ("hebrew", \_ -> Lang "he" "" "" []) - , ("hindi", \_ -> Lang "hi" "" "" []) - , ("croatian", \_ -> Lang "hr" "" "" []) - , ("magyar", \_ -> Lang "hu" "" "" []) - , ("armenian", \_ -> Lang "hy" "" "" []) - , ("interlingua", \_ -> Lang "ia" "" "" []) - , ("indonesian", \_ -> Lang "id" "" "" []) - , ("icelandic", \_ -> Lang "is" "" "" []) - , ("italian", \_ -> Lang "it" "" "" []) - , ("japanese", \_ -> Lang "jp" "" "" []) - , ("khmer", \_ -> Lang "km" "" "" []) - , ("kurmanji", \_ -> Lang "kmr" "" "" []) - , ("kannada", \_ -> Lang "kn" "" "" []) - , ("korean", \_ -> Lang "ko" "" "" []) - , ("lao", \_ -> Lang "lo" "" "" []) - , ("lithuanian", \_ -> Lang "lt" "" "" []) - , ("latvian", \_ -> Lang "lv" "" "" []) - , ("malayalam", \_ -> Lang "ml" "" "" []) - , ("mongolian", \_ -> Lang "mn" "" "" []) - , ("marathi", \_ -> Lang "mr" "" "" []) - , ("dutch", \_ -> Lang "nl" "" "" []) - , ("nynorsk", \_ -> Lang "nn" "" "" []) - , ("norsk", \_ -> Lang "no" "" "" []) - , ("nko", \_ -> Lang "nqo" "" "" []) - , ("occitan", \_ -> Lang "oc" "" "" []) - , ("panjabi", \_ -> Lang "pa" "" "" []) - , ("polish", \_ -> Lang "pl" "" "" []) - , ("piedmontese", \_ -> Lang "pms" "" "" []) - , ("portuguese", \_ -> Lang "pt" "" "" []) - , ("romansh", \_ -> Lang "rm" "" "" []) - , ("romanian", \_ -> Lang "ro" "" "" []) - , ("russian", \_ -> Lang "ru" "" "" []) - , ("sanskrit", \_ -> Lang "sa" "" "" []) - , ("samin", \_ -> Lang "se" "" "" []) - , ("slovak", \_ -> Lang "sk" "" "" []) - , ("albanian", \_ -> Lang "sq" "" "" []) - , ("serbian", \_ -> Lang "sr" "" "" []) - , ("swedish", \_ -> Lang "sv" "" "" []) - , ("syriac", \_ -> Lang "syr" "" "" []) - , ("tamil", \_ -> Lang "ta" "" "" []) - , ("telugu", \_ -> Lang "te" "" "" []) - , ("thai", \_ -> Lang "th" "" "" []) - , ("turkmen", \_ -> Lang "tk" "" "" []) - , ("turkish", \_ -> Lang "tr" "" "" []) - , ("ukrainian", \_ -> Lang "uk" "" "" []) - , ("urdu", \_ -> Lang "ur" "" "" []) - , ("vietnamese", \_ -> Lang "vi" "" "" []) + "variant=classic" -> Lang "la" Nothing Nothing ["x-classic"] [] [] + _ -> Lang "la" Nothing Nothing [] [] []) + , ("slovenian", \_ -> Lang "sl" Nothing Nothing [] [] []) + , ("serbianc", \_ -> Lang "sr" (Just "Cyrl") Nothing [] [] []) + , ("pinyin", \_ -> Lang "zh" (Just "Latn") Nothing ["pinyin"] [] []) + , ("afrikaans", \_ -> simpleLang "af") + , ("amharic", \_ -> simpleLang "am") + , ("assamese", \_ -> simpleLang "as") + , ("asturian", \_ -> simpleLang "ast") + , ("bulgarian", \_ -> simpleLang "bg") + , ("bengali", \_ -> simpleLang "bn") + , ("tibetan", \_ -> simpleLang "bo") + , ("breton", \_ -> simpleLang "br") + , ("catalan", \_ -> simpleLang "ca") + , ("welsh", \_ -> simpleLang "cy") + , ("czech", \_ -> simpleLang "cs") + , ("coptic", \_ -> simpleLang "cop") + , ("danish", \_ -> simpleLang "da") + , ("divehi", \_ -> simpleLang "dv") + , ("esperanto", \_ -> simpleLang "eo") + , ("spanish", \_ -> simpleLang "es") + , ("estonian", \_ -> simpleLang "et") + , ("basque", \_ -> simpleLang "eu") + , ("farsi", \_ -> simpleLang "fa") + , ("finnish", \_ -> simpleLang "fi") + , ("french", \_ -> simpleLang "fr") + , ("friulan", \_ -> simpleLang "fur") + , ("irish", \_ -> simpleLang "ga") + , ("scottish", \_ -> simpleLang "gd") + , ("ethiopic", \_ -> simpleLang "gez") + , ("galician", \_ -> simpleLang "gl") + , ("hebrew", \_ -> simpleLang "he") + , ("hindi", \_ -> simpleLang "hi") + , ("croatian", \_ -> simpleLang "hr") + , ("magyar", \_ -> simpleLang "hu") + , ("armenian", \_ -> simpleLang "hy") + , ("interlingua", \_ -> simpleLang "ia") + , ("indonesian", \_ -> simpleLang "id") + , ("icelandic", \_ -> simpleLang "is") + , ("italian", \_ -> simpleLang "it") + , ("japanese", \_ -> simpleLang "jp") + , ("khmer", \_ -> simpleLang "km") + , ("kurmanji", \_ -> simpleLang "kmr") + , ("kannada", \_ -> simpleLang "kn") + , ("korean", \_ -> simpleLang "ko") + , ("lao", \_ -> simpleLang "lo") + , ("lithuanian", \_ -> simpleLang "lt") + , ("latvian", \_ -> simpleLang "lv") + , ("malayalam", \_ -> simpleLang "ml") + , ("mongolian", \_ -> simpleLang "mn") + , ("marathi", \_ -> simpleLang "mr") + , ("dutch", \_ -> simpleLang "nl") + , ("nynorsk", \_ -> simpleLang "nn") + , ("norsk", \_ -> simpleLang "no") + , ("nko", \_ -> simpleLang "nqo") + , ("occitan", \_ -> simpleLang "oc") + , ("panjabi", \_ -> simpleLang "pa") + , ("polish", \_ -> simpleLang "pl") + , ("piedmontese", \_ -> simpleLang "pms") + , ("portuguese", \_ -> simpleLang "pt") + , ("romansh", \_ -> simpleLang "rm") + , ("romanian", \_ -> simpleLang "ro") + , ("russian", \_ -> simpleLang "ru") + , ("sanskrit", \_ -> simpleLang "sa") + , ("samin", \_ -> simpleLang "se") + , ("slovak", \_ -> simpleLang "sk") + , ("albanian", \_ -> simpleLang "sq") + , ("serbian", \_ -> simpleLang "sr") + , ("swedish", \_ -> simpleLang "sv") + , ("syriac", \_ -> simpleLang "syr") + , ("tamil", \_ -> simpleLang "ta") + , ("telugu", \_ -> simpleLang "te") + , ("thai", \_ -> simpleLang "th") + , ("turkmen", \_ -> simpleLang "tk") + , ("turkish", \_ -> simpleLang "tr") + , ("ukrainian", \_ -> simpleLang "uk") + , ("urdu", \_ -> simpleLang "ur") + , ("vietnamese", \_ -> simpleLang "vi") ] +simpleLang :: Text -> Lang +simpleLang l = Lang l Nothing Nothing [] [] [] + babelLangToBCP47 :: T.Text -> Maybe Lang babelLangToBCP47 s = case s of - "austrian" -> Just $ Lang "de" "" "AT" ["1901"] - "naustrian" -> Just $ Lang "de" "" "AT" [] - "swissgerman" -> Just $ Lang "de" "" "CH" ["1901"] - "nswissgerman" -> Just $ Lang "de" "" "CH" [] - "german" -> Just $ Lang "de" "" "DE" ["1901"] - "ngerman" -> Just $ Lang "de" "" "DE" [] - "lowersorbian" -> Just $ Lang "dsb" "" "" [] - "uppersorbian" -> Just $ Lang "hsb" "" "" [] - "polutonikogreek" -> Just $ Lang "el" "" "" ["polyton"] - "slovene" -> Just $ Lang "sl" "" "" [] - "australian" -> Just $ Lang "en" "" "AU" [] - "canadian" -> Just $ Lang "en" "" "CA" [] - "british" -> Just $ Lang "en" "" "GB" [] - "newzealand" -> Just $ Lang "en" "" "NZ" [] - "american" -> Just $ Lang "en" "" "US" [] - "classiclatin" -> Just $ Lang "la" "" "" ["x-classic"] + "austrian" -> Just $ Lang "de" Nothing (Just "AT") ["1901"] [] [] + "naustrian" -> Just $ Lang "de" Nothing (Just "AT") [] [] [] + "swissgerman" -> Just $ Lang "de" Nothing (Just "CH") ["1901"] [] [] + "nswissgerman" -> Just $ Lang "de" Nothing (Just "CH") [] [] [] + "german" -> Just $ Lang "de" Nothing (Just "DE") ["1901"] [] [] + "ngerman" -> Just $ Lang "de" Nothing (Just "DE") [] [] [] + "lowersorbian" -> Just $ Lang "dsb" Nothing Nothing [] [] [] + "uppersorbian" -> Just $ Lang "hsb" Nothing Nothing [] [] [] + "polutonikogreek" -> Just $ Lang "el" Nothing Nothing ["polyton"] [] [] + "slovene" -> Just $ simpleLang "sl" + "australian" -> Just $ Lang "en" Nothing (Just "AU") [] [] [] + "canadian" -> Just $ Lang "en" Nothing (Just "CA") [] [] [] + "british" -> Just $ Lang "en" Nothing (Just "GB") [] [] [] + "newzealand" -> Just $ Lang "en" Nothing (Just "NZ") [] [] [] + "american" -> Just $ Lang "en" Nothing (Just "US") [] [] [] + "classiclatin" -> Just $ Lang "la" Nothing Nothing ["x-classic"] [] [] _ -> ($ "") <$> M.lookup s polyglossiaLangToBCP47 diff --git a/src/Text/Pandoc/Writers/BibTeX.hs b/src/Text/Pandoc/Writers/BibTeX.hs index b9ae0c13a..95de6b71f 100644 --- a/src/Text/Pandoc/Writers/BibTeX.hs +++ b/src/Text/Pandoc/Writers/BibTeX.hs @@ -43,7 +43,7 @@ writeBibTeX' :: PandocMonad m => Variant -> WriterOptions -> Pandoc -> m Text writeBibTeX' variant opts (Pandoc meta _) = do let mblang = case lookupMetaString "lang" meta of "" -> Nothing - t -> Just $ parseLang t + t -> either (const Nothing) Just $ parseLang t let refs = case lookupMeta "references" meta of Just (MetaList xs) -> mapMaybe metaValueToReference xs _ -> [] diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 3c9975be8..f352c84bc 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -21,7 +21,7 @@ import Data.Maybe (mapMaybe) import Data.Text (Text) import qualified Data.Text as T import Network.URI (unEscapeString) -import Text.Pandoc.BCP47 +import UnicodeCollation.Lang (Lang(..)) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import Text.Pandoc.Definition import Text.Pandoc.ImageSize @@ -555,26 +555,26 @@ fromBCP47 mbs = fromBCP47' <$> toLang mbs -- https://tools.ietf.org/html/bcp47#section-2.1 -- http://wiki.contextgarden.net/Language_Codes fromBCP47' :: Maybe Lang -> Maybe Text -fromBCP47' (Just (Lang "ar" _ "SY" _) ) = Just "ar-sy" -fromBCP47' (Just (Lang "ar" _ "IQ" _) ) = Just "ar-iq" -fromBCP47' (Just (Lang "ar" _ "JO" _) ) = Just "ar-jo" -fromBCP47' (Just (Lang "ar" _ "LB" _) ) = Just "ar-lb" -fromBCP47' (Just (Lang "ar" _ "DZ" _) ) = Just "ar-dz" -fromBCP47' (Just (Lang "ar" _ "MA" _) ) = Just "ar-ma" -fromBCP47' (Just (Lang "de" _ _ ["1901"]) ) = Just "deo" -fromBCP47' (Just (Lang "de" _ "DE" _) ) = Just "de-de" -fromBCP47' (Just (Lang "de" _ "AT" _) ) = Just "de-at" -fromBCP47' (Just (Lang "de" _ "CH" _) ) = Just "de-ch" -fromBCP47' (Just (Lang "el" _ _ ["poly"]) ) = Just "agr" -fromBCP47' (Just (Lang "en" _ "US" _) ) = Just "en-us" -fromBCP47' (Just (Lang "en" _ "GB" _) ) = Just "en-gb" -fromBCP47' (Just (Lang "grc"_ _ _) ) = Just "agr" -fromBCP47' (Just (Lang "el" _ _ _) ) = Just "gr" -fromBCP47' (Just (Lang "eu" _ _ _) ) = Just "ba" -fromBCP47' (Just (Lang "he" _ _ _) ) = Just "il" -fromBCP47' (Just (Lang "jp" _ _ _) ) = Just "ja" -fromBCP47' (Just (Lang "uk" _ _ _) ) = Just "ua" -fromBCP47' (Just (Lang "vi" _ _ _) ) = Just "vn" -fromBCP47' (Just (Lang "zh" _ _ _) ) = Just "cn" -fromBCP47' (Just (Lang l _ _ _) ) = Just l -fromBCP47' Nothing = Nothing +fromBCP47' (Just (Lang "ar" _ (Just "SY") _ _ _)) = Just "ar-sy" +fromBCP47' (Just (Lang "ar" _ (Just "IQ") _ _ _)) = Just "ar-iq" +fromBCP47' (Just (Lang "ar" _ (Just "JO") _ _ _)) = Just "ar-jo" +fromBCP47' (Just (Lang "ar" _ (Just "LB") _ _ _)) = Just "ar-lb" +fromBCP47' (Just (Lang "ar" _ (Just "DZ") _ _ _)) = Just "ar-dz" +fromBCP47' (Just (Lang "ar" _ (Just "MA") _ _ _)) = Just "ar-ma" +fromBCP47' (Just (Lang "de" _ _ ["1901"] _ _)) = Just "deo" +fromBCP47' (Just (Lang "de" _ (Just "DE") _ _ _)) = Just "de-de" +fromBCP47' (Just (Lang "de" _ (Just "AT") _ _ _)) = Just "de-at" +fromBCP47' (Just (Lang "de" _ (Just "CH") _ _ _)) = Just "de-ch" +fromBCP47' (Just (Lang "el" _ _ ["poly"] _ _)) = Just "agr" +fromBCP47' (Just (Lang "en" _ (Just "US") _ _ _)) = Just "en-us" +fromBCP47' (Just (Lang "en" _ (Just "GB") _ _ _)) = Just "en-gb" +fromBCP47' (Just (Lang "grc"_ _ _ _ _)) = Just "agr" +fromBCP47' (Just (Lang "el" _ _ _ _ _)) = Just "gr" +fromBCP47' (Just (Lang "eu" _ _ _ _ _)) = Just "ba" +fromBCP47' (Just (Lang "he" _ _ _ _ _)) = Just "il" +fromBCP47' (Just (Lang "jp" _ _ _ _ _)) = Just "ja" +fromBCP47' (Just (Lang "uk" _ _ _ _ _)) = Just "ua" +fromBCP47' (Just (Lang "vi" _ _ _ _ _)) = Just "vn" +fromBCP47' (Just (Lang "zh" _ _ _ _ _)) = Just "cn" +fromBCP47' (Just (Lang l _ _ _ _ _)) = Just l +fromBCP47' Nothing = Nothing diff --git a/src/Text/Pandoc/Writers/CslJson.hs b/src/Text/Pandoc/Writers/CslJson.hs index a10def95e..395335667 100644 --- a/src/Text/Pandoc/Writers/CslJson.hs +++ b/src/Text/Pandoc/Writers/CslJson.hs @@ -34,15 +34,16 @@ import Control.Monad.Identity import Citeproc.Locale (getLocale) import Citeproc.CslJson import Text.Pandoc.Options (WriterOptions) -import Data.Maybe (mapMaybe) +import Data.Maybe (mapMaybe, fromMaybe) import Data.Aeson.Encode.Pretty (Config (..), Indent (Spaces), NumberFormat (Generic), defConfig, encodePretty') writeCslJson :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeCslJson _opts (Pandoc meta _) = do - let lang = maybe (Lang "en" (Just "US")) parseLang - (lookupMeta "lang" meta >>= metaValueToText) + let lang = fromMaybe (Lang "en" Nothing (Just "US") [] [] []) + (lookupMeta "lang" meta >>= metaValueToText >>= + either (const Nothing) Just . parseLang) locale <- case getLocale lang of Left e -> throwError $ PandocCiteprocError e Right l -> return l diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 20bcd0324..7781df8e7 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -36,7 +36,7 @@ import qualified Data.Text.Lazy as TL import Data.Time.Clock.POSIX import Data.Digest.Pure.SHA (sha1, showDigest) import Skylighting -import Text.Pandoc.BCP47 (getLang, renderLang) +import UnicodeCollation.Lang (renderLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import qualified Text.Pandoc.Class.PandocMonad as P import Data.Time diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 1c970e6ad..e99bad738 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -29,7 +29,7 @@ import qualified Data.Text as T import Network.URI (unEscapeString) import Text.DocTemplates (FromContext(lookupContext), renderTemplate, Val(..), Context(..)) -import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang) +import UnicodeCollation.Lang (Lang (..), renderLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import Text.Pandoc.Definition import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight, diff --git a/src/Text/Pandoc/Writers/LaTeX/Lang.hs b/src/Text/Pandoc/Writers/LaTeX/Lang.hs index 871b2692a..437b84120 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Lang.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Lang.hs @@ -46,7 +46,7 @@ toPolyglossia (Lang "de" _ (Just "AT") vars _ _) toPolyglossia (Lang "de" _ (Just "AT") _ _ _) = ("german", "variant=austrian") toPolyglossia (Lang "de" _ (Just "CH") vars _ _) | "1901" `elem` vars = ("german", "variant=swiss, spelling=old") -toPolyglossia (Lang "de" _ (Just "CH") _ _ _ _) = ("german", "variant=swiss") +toPolyglossia (Lang "de" _ (Just "CH") _ _ _) = ("german", "variant=swiss") toPolyglossia (Lang "de" _ _ _ _ _) = ("german", "") toPolyglossia (Lang "dsb" _ _ _ _ _) = ("lsorbian", "") toPolyglossia (Lang "el" _ _ vars _ _) @@ -61,9 +61,9 @@ toPolyglossia (Lang "grc" _ _ _ _ _) = ("greek", "variant=ancient") toPolyglossia (Lang "hsb" _ _ _ _ _) = ("usorbian", "") toPolyglossia (Lang "la" _ _ vars _ _) | "x-classic" `elem` vars = ("latin", "variant=classic") -toPolyglossia (Lang "pt" _ "BR" _ _ _) = ("portuguese", "variant=brazilian") +toPolyglossia (Lang "pt" _ (Just "BR") _ _ _) = ("portuguese", "variant=brazilian") toPolyglossia (Lang "sl" _ _ _ _ _) = ("slovenian", "") -toPolyglossia x = (commonFromBcp47 x, "") +toPolyglossia x = (commonFromBcp47 x, "") -- Takes a list of the constituents of a BCP47 language code and -- converts it to a Babel language string. @@ -81,7 +81,7 @@ toBabel (Lang "de" _ _ vars _ _) | "1901" `elem` vars = "german" | otherwise = "ngerman" toBabel (Lang "dsb" _ _ _ _ _) = "lowersorbian" -toBabel (Lang "el" _ _ vars) +toBabel (Lang "el" _ _ vars _ _) | "polyton" `elem` vars = "polutonikogreek" toBabel (Lang "en" _ (Just "AU") _ _ _) = "australian" toBabel (Lang "en" _ (Just "CA") _ _ _) = "canadian" diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 101b236aa..6fd4cdeb4 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -16,6 +16,7 @@ import Codec.Archive.Zip import Control.Monad.Except (catchError, throwError) import Control.Monad.State.Strict import qualified Data.ByteString.Lazy as B +import Data.Maybe (fromMaybe) import Data.Generics (everywhere', mkT) import Data.List (isPrefixOf) import qualified Data.Map as Map @@ -23,7 +24,7 @@ import qualified Data.Text as T import qualified Data.Text.Lazy as TL import Data.Time import System.FilePath (takeDirectory, takeExtension, (<.>)) -import Text.Pandoc.BCP47 (Lang (..), getLang, renderLang) +import UnicodeCollation.Lang (Lang (..), renderLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import qualified Text.Pandoc.Class.PandocMonad as P import Text.Pandoc.Definition @@ -35,7 +36,7 @@ import Text.Pandoc.Options (WrapOption (..), WriterOptions (..)) import Text.DocLayout import Text.Pandoc.Shared (stringify, pandocVersion, tshow) import Text.Pandoc.Writers.Shared (lookupMetaString, lookupMetaBlocks, - fixDisplayMath) + fixDisplayMath, getLang) import Text.Pandoc.UTF8 (fromStringLazy, fromTextLazy, toTextLazy) import Text.Pandoc.Walk import Text.Pandoc.Writers.OpenDocument (writeOpenDocument) @@ -194,7 +195,7 @@ addLang lang = everywhere' (mkT updateLangAttr) where updateLangAttr (Attr n@(QName "language" _ (Just "fo")) _) = Attr n (langLanguage lang) updateLangAttr (Attr n@(QName "country" _ (Just "fo")) _) - = Attr n (langRegion lang) + = Attr n (fromMaybe "" $ langRegion lang) updateLangAttr x = x -- | transform both Image and Math elements diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index cf42f2228..6c265090c 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -25,7 +25,7 @@ import Data.Ord (comparing) import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T -import Text.Pandoc.BCP47 (Lang (..), parseBCP47) +import UnicodeCollation.Lang (Lang (..), parseLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, translateTerm, setTranslations, toLang) import Text.Pandoc.Definition @@ -236,7 +236,7 @@ handleSpaces s = case T.uncons s of -- | Convert Pandoc document to string in OpenDocument format. writeOpenDocument :: PandocMonad m => WriterOptions -> Pandoc -> m Text writeOpenDocument opts (Pandoc meta blocks) = do - let defLang = Lang "en" "US" "" [] + let defLang = Lang "en" (Just "US") Nothing [] [] [] lang <- case lookupMetaString "lang" meta of "" -> pure defLang s -> fromMaybe defLang <$> toLang (Just s) @@ -893,7 +893,7 @@ textStyleAttr m s Map.insert "style:font-name-complex" "Courier New" $ m | Language lang <- s = Map.insert "fo:language" (langLanguage lang) . - Map.insert "fo:country" (langRegion lang) $ m + maybe id (Map.insert "fo:country") (langRegion lang) $ m | otherwise = m withLangFromAttr :: PandocMonad m => Attr -> OD m a -> OD m a @@ -901,7 +901,7 @@ withLangFromAttr (_,_,kvs) action = case lookup "lang" kvs of Nothing -> action Just l -> - case parseBCP47 l of + case parseLang l of Right lang -> withTextStyle (Language lang) action Left _ -> do report $ InvalidLang l diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index fcb47bd5a..a09d18571 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -149,7 +149,7 @@ defField field val (Context m) = f _newval oldval = oldval -- | Get the contents of the `lang` metadata field or variable. -getLang :: WriterOptions -> Meta -> Maybe Text +getLang :: WriterOptions -> Meta -> Maybe T.Text getLang opts meta = case lookupContext "lang" (writerVariables opts) of Just s -> Just s -- cgit v1.2.3 From 7a7fefce5ef395f1c88db8b984618a4bd3c7d916 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 15 Apr 2021 17:45:28 -0700 Subject: Use document's lang for the lang parameter of citeproc... even if it differs from localeLanguage. (It is designed to be possible to override the locale language, and this is especially useful when one wants to use the unicode extension syntx, e.g. fr-u-kb.) --- src/Text/Pandoc/Citeproc.hs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index c9f1806e4..2f4936fa6 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -74,8 +74,7 @@ processCitations (Pandoc meta bs) = do let linkCites = maybe False truish $ lookupMeta "link-citations" meta let opts = defaultCiteprocOptions{ linkCitations = linkCites } - let result = Citeproc.citeproc opts style (localeLanguage locale) - refs citations + let result = Citeproc.citeproc opts style mblang refs citations mapM_ (report . CiteprocWarning) (resultWarnings result) let sopts = styleOptions style let classes = "references" : -- TODO remove this or keep for compatibility? -- cgit v1.2.3 From a478a5c4c8753fd0bf272cd540ca197ae146a196 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 17 Apr 2021 11:47:54 -0700 Subject: Update to released unicode-collation, latest citeproc dev version. Update citeproc test. --- cabal.project | 11 +---------- src/Text/Pandoc/App.hs | 2 +- src/Text/Pandoc/Citeproc/Data.hs | 2 +- src/Text/Pandoc/Class/CommonState.hs | 2 +- src/Text/Pandoc/Class/PandocMonad.hs | 2 +- src/Text/Pandoc/Readers/LaTeX.hs | 2 +- src/Text/Pandoc/Readers/LaTeX/Lang.hs | 2 +- src/Text/Pandoc/Writers/ConTeXt.hs | 2 +- src/Text/Pandoc/Writers/Docbook.hs | 2 +- src/Text/Pandoc/Writers/Docx.hs | 2 +- src/Text/Pandoc/Writers/LaTeX.hs | 2 +- src/Text/Pandoc/Writers/LaTeX/Lang.hs | 2 +- src/Text/Pandoc/Writers/ODT.hs | 2 +- src/Text/Pandoc/Writers/OpenDocument.hs | 2 +- stack.yaml | 5 ++--- test/command/pandoc-citeproc-320a.md | 8 ++++---- 16 files changed, 20 insertions(+), 30 deletions(-) (limited to 'src') diff --git a/cabal.project b/cabal.project index fa17a20a6..77bc8ef88 100644 --- a/cabal.project +++ b/cabal.project @@ -5,14 +5,5 @@ flags: +embed_data_files source-repository-package type: git location: https://github.com/jgm/citeproc - tag: b42857be658b8f2649e989e061978e304986f853 + tag: f9439e07e9271c7c2674a51efcad2fb8c663b2c8 -source-repository-package - type: git - location: https://github.com/jgm/unicode-collation - tag: 9d229a5c6bcbaf53d7022575234eb223cfa90d55 - --- source-repository-package --- type: git --- location: https://github.com/jgm/citeproc --- tag: d44e24696ab444090d0e63e321c3a573f68b2e74 diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 67d3cce7d..4e8c9f2ab 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -55,7 +55,7 @@ import Text.Pandoc.App.Opt (Opt (..), LineEnding (..), defaultOpts, import Text.Pandoc.App.CommandLineOptions (parseOptions, parseOptionsFromArgs, options) import Text.Pandoc.App.OutputSettings (OutputSettings (..), optToOutputSettings) -import UnicodeCollation.Lang (Lang (..), parseLang) +import Text.Collate.Lang (Lang (..), parseLang) import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters) import Text.Pandoc.PDF (makePDF) import Text.Pandoc.SelfContained (makeSelfContained) diff --git a/src/Text/Pandoc/Citeproc/Data.hs b/src/Text/Pandoc/Citeproc/Data.hs index 388b9ba62..848a83a1e 100644 --- a/src/Text/Pandoc/Citeproc/Data.hs +++ b/src/Text/Pandoc/Citeproc/Data.hs @@ -10,7 +10,7 @@ import qualified Data.Text.Encoding as TE import qualified Data.Text as T import Data.Text (Text) import Text.Pandoc.Citeproc.Util (toIETF) -import UnicodeCollation.Lang (Lang(..), parseLang) +import Text.Collate.Lang (Lang(..), parseLang) biblatexLocalizations :: [(FilePath, ByteString)] biblatexLocalizations = $(embedDir "citeproc/biblatex-localization") diff --git a/src/Text/Pandoc/Class/CommonState.hs b/src/Text/Pandoc/Class/CommonState.hs index 0fd094d99..796a4afd5 100644 --- a/src/Text/Pandoc/Class/CommonState.hs +++ b/src/Text/Pandoc/Class/CommonState.hs @@ -19,7 +19,7 @@ where import Data.Default (Default (def)) import Data.Text (Text) -import UnicodeCollation.Lang (Lang) +import Text.Collate.Lang (Lang) import Text.Pandoc.MediaBag (MediaBag) import Text.Pandoc.Logging (LogMessage, Verbosity (WARNING)) import Text.Pandoc.Translations (Translations) diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index 76f1fa32b..7559cd7cd 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -70,7 +70,7 @@ import Network.URI ( escapeURIString, nonStrictRelativeTo, import System.FilePath ((</>), (<.>), takeExtension, dropExtension, isRelative, splitDirectories) import System.Random (StdGen) -import UnicodeCollation.Lang (Lang(..), parseLang, renderLang) +import Text.Collate.Lang (Lang(..), parseLang, renderLang) import Text.Pandoc.Class.CommonState (CommonState (..)) import Text.Pandoc.Definition import Text.Pandoc.Error diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 83caf742a..203dab83c 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -33,7 +33,7 @@ import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T import System.FilePath (addExtension, replaceExtension, takeExtension) -import UnicodeCollation.Lang (renderLang) +import Text.Collate.Lang (renderLang) import Text.Pandoc.Builder as B import Text.Pandoc.Class.PandocPure (PandocPure) import Text.Pandoc.Class.PandocMonad (PandocMonad (..), getResourcePath, diff --git a/src/Text/Pandoc/Readers/LaTeX/Lang.hs b/src/Text/Pandoc/Readers/LaTeX/Lang.hs index b92e6ab57..6a8327904 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Lang.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Lang.hs @@ -23,7 +23,7 @@ import qualified Data.Map as M import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.Shared (extractSpaces) -import UnicodeCollation.Lang (Lang(..), renderLang) +import Text.Collate.Lang (Lang(..), renderLang) import Text.Pandoc.Class (PandocMonad(..), setTranslations) import Text.Pandoc.Readers.LaTeX.Parsing import Text.Pandoc.Parsing (updateState, option, getState, QuoteContext(..), diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index f352c84bc..f14b1d894 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -21,7 +21,7 @@ import Data.Maybe (mapMaybe) import Data.Text (Text) import qualified Data.Text as T import Network.URI (unEscapeString) -import UnicodeCollation.Lang (Lang(..)) +import Text.Collate.Lang (Lang(..)) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import Text.Pandoc.Definition import Text.Pandoc.ImageSize diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 1f10c9d04..02b141250 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -493,4 +493,4 @@ isSectionAttr DocBook4 ("os",_) = True isSectionAttr DocBook4 ("revision",_) = True isSectionAttr DocBook4 ("security",_) = True isSectionAttr DocBook4 ("vendor",_) = True -isSectionAttr _ (_,_) = False \ No newline at end of file +isSectionAttr _ (_,_) = False diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 7781df8e7..749ad9a21 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -36,7 +36,7 @@ import qualified Data.Text.Lazy as TL import Data.Time.Clock.POSIX import Data.Digest.Pure.SHA (sha1, showDigest) import Skylighting -import UnicodeCollation.Lang (renderLang) +import Text.Collate.Lang (renderLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import qualified Text.Pandoc.Class.PandocMonad as P import Data.Time diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index e99bad738..8b1f3df1d 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -29,7 +29,7 @@ import qualified Data.Text as T import Network.URI (unEscapeString) import Text.DocTemplates (FromContext(lookupContext), renderTemplate, Val(..), Context(..)) -import UnicodeCollation.Lang (Lang (..), renderLang) +import Text.Collate.Lang (Lang (..), renderLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import Text.Pandoc.Definition import Text.Pandoc.Highlighting (formatLaTeXBlock, formatLaTeXInline, highlight, diff --git a/src/Text/Pandoc/Writers/LaTeX/Lang.hs b/src/Text/Pandoc/Writers/LaTeX/Lang.hs index 437b84120..0ba68b74e 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Lang.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Lang.hs @@ -15,7 +15,7 @@ module Text.Pandoc.Writers.LaTeX.Lang toBabel ) where import Data.Text (Text) -import UnicodeCollation.Lang (Lang(..)) +import Text.Collate.Lang (Lang(..)) -- In environments \Arabic instead of \arabic is used diff --git a/src/Text/Pandoc/Writers/ODT.hs b/src/Text/Pandoc/Writers/ODT.hs index 6fd4cdeb4..e4eb4fd25 100644 --- a/src/Text/Pandoc/Writers/ODT.hs +++ b/src/Text/Pandoc/Writers/ODT.hs @@ -24,7 +24,7 @@ import qualified Data.Text as T import qualified Data.Text.Lazy as TL import Data.Time import System.FilePath (takeDirectory, takeExtension, (<.>)) -import UnicodeCollation.Lang (Lang (..), renderLang) +import Text.Collate.Lang (Lang (..), renderLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) import qualified Text.Pandoc.Class.PandocMonad as P import Text.Pandoc.Definition diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 6c265090c..34a3a4aa5 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -25,7 +25,7 @@ import Data.Ord (comparing) import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T -import UnicodeCollation.Lang (Lang (..), parseLang) +import Text.Collate.Lang (Lang (..), parseLang) import Text.Pandoc.Class.PandocMonad (PandocMonad, report, translateTerm, setTranslations, toLang) import Text.Pandoc.Definition diff --git a/stack.yaml b/stack.yaml index 159bf74b9..7bc33fa43 100644 --- a/stack.yaml +++ b/stack.yaml @@ -16,11 +16,10 @@ extra-deps: - texmath-0.12.2 - random-1.2.0 - xml-conduit-1.9.1.1 +- unicode-collation-0.1 # - citeproc-0.3.0.9 - git: https://github.com/jgm/citeproc - commit: b42857be658b8f2649e989e061978e304986f853 -- git: https://github.com/jgm/unicode-collation - commit: 9d229a5c6bcbaf53d7022575234eb223cfa90d55 + commit: f9439e07e9271c7c2674a51efcad2fb8c663b2c8 ghc-options: "$locals": -fhide-source-paths -Wno-missing-home-modules resolver: lts-17.5 diff --git a/test/command/pandoc-citeproc-320a.md b/test/command/pandoc-citeproc-320a.md index 1c3b47de0..79dacfa10 100644 --- a/test/command/pandoc-citeproc-320a.md +++ b/test/command/pandoc-citeproc-320a.md @@ -56,6 +56,10 @@ n.d.; al-'Udhrī, n.d.; Uch, n.d.; Uebel, n.d.; Zzz, n.d.). Uch, Ann. n.d. ::: +::: {#ref-item4 .csl-entry} +'Udhrī, Jamīl al-. n.d. +::: + ::: {#ref-item1 .csl-entry} ʾUdhrī, Jamīl al-. n.d. ::: @@ -68,10 +72,6 @@ Uch, Ann. n.d. \'Udhrī, Jamīl al-. n.d. ::: -::: {#ref-item4 .csl-entry} -'Udhrī, Jamīl al-. n.d. -::: - ::: {#ref-item5 .csl-entry} 'Udhrī, Jamīl al-. n.d. ::: -- cgit v1.2.3 From 73d394ca2adff31a384404ae25665b36c7d0bba0 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 18 Apr 2021 22:01:12 -0700 Subject: Use MetaInlines not MetaBlocks for multimarkdown metadata fields. This gives better results in converting to e.g. pandoc markdown. Ref: <https://groups.google.com/d/msgid/pandoc-discuss/9728d1f4-040e-4392-aa04-148f648a8dfdn%40googlegroups.com> --- src/Text/Pandoc/Readers/Markdown.hs | 2 +- test/command/mmd-metadata.md | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 test/command/mmd-metadata.md (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 6c3947a81..4b20e3a8b 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -298,7 +298,7 @@ kvPair allowEmpty = try $ do (try $ newline >> lookAhead (blankline <|> nonspaceChar)) guard $ allowEmpty || not (T.null val) let key' = T.concat $ T.words $ T.toLower key - let val' = MetaBlocks $ B.toList $ B.plain $ B.text val + let val' = MetaInlines $ B.toList $ B.text val return (key',val') parseMarkdown :: PandocMonad m => MarkdownParser m Pandoc diff --git a/test/command/mmd-metadata.md b/test/command/mmd-metadata.md new file mode 100644 index 000000000..3cda34873 --- /dev/null +++ b/test/command/mmd-metadata.md @@ -0,0 +1,20 @@ +``` +% pandoc -f markdown_mmd -t markdown -s +Title: Blah blah blah +Author: Doo de Doo +Base Header Level: 1 +Bibliography: Pubs.bib +Lang: en-GB + +body +^D +--- +author: Doo de Doo +baseheaderlevel: 1 +bibliography: Pubs.bib +lang: en-GB +title: Blah blah blah +--- + +body +``` -- cgit v1.2.3 From 16d372abcb991caf26077c7f1cd0be4680894170 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 19 Apr 2021 08:38:31 -0700 Subject: Issue error message when reader or writer format is malformed. Previously we exited with an error status but (due to a bug) no message. Closes #7231. --- src/Text/Pandoc/Readers.hs | 6 +++--- src/Text/Pandoc/Writers.hs | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs index ac70f7d4c..7ae9db34f 100644 --- a/src/Text/Pandoc/Readers.hs +++ b/src/Text/Pandoc/Readers.hs @@ -65,6 +65,7 @@ import Data.Aeson import qualified Data.ByteString.Lazy as BL import Data.Text (Text) import qualified Data.Text as T +import Text.Pandoc.Shared (tshow) import Text.Pandoc.Class import Text.Pandoc.Definition import Text.Pandoc.Error @@ -101,7 +102,6 @@ import Text.Pandoc.Readers.CSV import Text.Pandoc.Readers.CslJson import Text.Pandoc.Readers.BibTeX import qualified Text.Pandoc.UTF8 as UTF8 -import Text.Parsec.Error data Reader m = TextReader (ReaderOptions -> Text -> m Pandoc) | ByteStringReader (ReaderOptions -> BL.ByteString -> m Pandoc) @@ -152,8 +152,8 @@ readers = [ ("native" , TextReader readNative) getReader :: PandocMonad m => Text -> m (Reader m, Extensions) getReader s = case parseFormatSpec s of - Left e -> throwError $ PandocAppError - $ T.intercalate "\n" [T.pack m | Message m <- errorMessages e] + Left e -> throwError $ PandocAppError $ + "Error parsing reader format " <> tshow s <> ": " <> tshow e Right (readerName, extsToEnable, extsToDisable) -> case lookup readerName readers of Nothing -> throwError $ PandocUnknownReaderError diff --git a/src/Text/Pandoc/Writers.hs b/src/Text/Pandoc/Writers.hs index 95d6270b5..c348477c2 100644 --- a/src/Text/Pandoc/Writers.hs +++ b/src/Text/Pandoc/Writers.hs @@ -81,6 +81,7 @@ import Data.Aeson import qualified Data.ByteString.Lazy as BL import Data.Text (Text) import qualified Data.Text as T +import Text.Pandoc.Shared (tshow) import Text.Pandoc.Class import Text.Pandoc.Definition import Text.Pandoc.Options @@ -122,7 +123,6 @@ import Text.Pandoc.Writers.Texinfo import Text.Pandoc.Writers.Textile import Text.Pandoc.Writers.XWiki import Text.Pandoc.Writers.ZimWiki -import Text.Parsec.Error data Writer m = TextWriter (WriterOptions -> Pandoc -> m Text) | ByteStringWriter (WriterOptions -> Pandoc -> m BL.ByteString) @@ -196,8 +196,8 @@ writers = [ getWriter :: PandocMonad m => Text -> m (Writer m, Extensions) getWriter s = case parseFormatSpec s of - Left e -> throwError $ PandocAppError - $ T.intercalate "\n" [T.pack m | Message m <- errorMessages e] + Left e -> throwError $ PandocAppError $ + "Error parsing writer format " <> tshow s <> ": " <> tshow e Right (writerName, extsToEnable, extsToDisable) -> case lookup writerName writers of Nothing -> throwError $ -- cgit v1.2.3 From 0b74bbbdaa643a473e26ea14d0a94efac6078d8c Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Tue, 20 Apr 2021 10:54:46 +0200 Subject: Docx writer: extract Table handling into separate module --- pandoc.cabal | 2 + src/Text/Pandoc/Writers/Docx.hs | 226 +--------------------------------- src/Text/Pandoc/Writers/Docx/Table.hs | 114 +++++++++++++++++ 3 files changed, 121 insertions(+), 221 deletions(-) create mode 100644 src/Text/Pandoc/Writers/Docx/Table.hs (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index 744f3f095..14d2da0ae 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -657,6 +657,8 @@ library Text.Pandoc.Readers.Metadata, Text.Pandoc.Readers.Roff, Text.Pandoc.Writers.Docx.StyleMap, + Text.Pandoc.Writers.Docx.Table, + Text.Pandoc.Writers.Docx.Types, Text.Pandoc.Writers.JATS.References, Text.Pandoc.Writers.JATS.Table, Text.Pandoc.Writers.JATS.Types, diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 749ad9a21..7064ded09 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -22,7 +22,6 @@ import Control.Applicative ((<|>)) import Control.Monad.Except (catchError, throwError) import Control.Monad.Reader import Control.Monad.State.Strict -import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL import Data.Char (isSpace, isLetter) import Data.List (intercalate, isPrefixOf, isSuffixOf) @@ -47,123 +46,24 @@ import Text.Pandoc.Highlighting (highlight) import Text.Pandoc.Error import Text.Pandoc.ImageSize import Text.Pandoc.Logging -import Text.Pandoc.MIME (MimeType, extensionFromMimeType, getMimeType, - getMimeTypeDef) +import Text.Pandoc.MIME (extensionFromMimeType, getMimeType, getMimeTypeDef) import Text.Pandoc.Options import Text.Pandoc.Writers.Docx.StyleMap +import Text.Pandoc.Writers.Docx.Table +import Text.Pandoc.Writers.Docx.Types import Text.Pandoc.Shared import Text.Pandoc.Walk import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared -import Text.Printf (printf) import Text.TeXMath import Text.Pandoc.Writers.OOXML import Text.Pandoc.XML.Light as XML import Data.Generics (mkT, everywhere) -data ListMarker = NoMarker - | BulletMarker - | NumberMarker ListNumberStyle ListNumberDelim Int - deriving (Show, Read, Eq, Ord) - -listMarkerToId :: ListMarker -> Text -listMarkerToId NoMarker = "990" -listMarkerToId BulletMarker = "991" -listMarkerToId (NumberMarker sty delim n) = T.pack $ - '9' : '9' : styNum : delimNum : show n - where styNum = case sty of - DefaultStyle -> '2' - Example -> '3' - Decimal -> '4' - LowerRoman -> '5' - UpperRoman -> '6' - LowerAlpha -> '7' - UpperAlpha -> '8' - delimNum = case delim of - DefaultDelim -> '0' - Period -> '1' - OneParen -> '2' - TwoParens -> '3' - -data EnvProps = EnvProps{ styleElement :: Maybe Element - , otherElements :: [Element] - } - -instance Semigroup EnvProps where - EnvProps s es <> EnvProps s' es' = EnvProps (s <|> s') (es ++ es') - -instance Monoid EnvProps where - mempty = EnvProps Nothing [] - mappend = (<>) - squashProps :: EnvProps -> [Element] squashProps (EnvProps Nothing es) = es squashProps (EnvProps (Just e) es) = e : es -data WriterEnv = WriterEnv{ envTextProperties :: EnvProps - , envParaProperties :: EnvProps - , envRTL :: Bool - , envListLevel :: Int - , envListNumId :: Int - , envInDel :: Bool - , envChangesAuthor :: Text - , envChangesDate :: Text - , envPrintWidth :: Integer - } - -defaultWriterEnv :: WriterEnv -defaultWriterEnv = WriterEnv{ envTextProperties = mempty - , envParaProperties = mempty - , envRTL = False - , envListLevel = -1 - , envListNumId = 1 - , envInDel = False - , envChangesAuthor = "unknown" - , envChangesDate = "1969-12-31T19:00:00Z" - , envPrintWidth = 1 - } - -data WriterState = WriterState{ - stFootnotes :: [Element] - , stComments :: [([(Text, Text)], [Inline])] - , stSectionIds :: Set.Set Text - , stExternalLinks :: M.Map Text Text - , stImages :: M.Map FilePath (String, String, Maybe MimeType, B.ByteString) - , stLists :: [ListMarker] - , stInsId :: Int - , stDelId :: Int - , stStyleMaps :: StyleMaps - , stFirstPara :: Bool - , stInTable :: Bool - , stInList :: Bool - , stTocTitle :: [Inline] - , stDynamicParaProps :: Set.Set ParaStyleName - , stDynamicTextProps :: Set.Set CharStyleName - , stCurId :: Int - } - -defaultWriterState :: WriterState -defaultWriterState = WriterState{ - stFootnotes = defaultFootnotes - , stComments = [] - , stSectionIds = Set.empty - , stExternalLinks = M.empty - , stImages = M.empty - , stLists = [NoMarker] - , stInsId = 1 - , stDelId = 1 - , stStyleMaps = StyleMaps M.empty M.empty - , stFirstPara = False - , stInTable = False - , stInList = False - , stTocTitle = [Str "Table of Contents"] - , stDynamicParaProps = Set.empty - , stDynamicTextProps = Set.empty - , stCurId = 20 - } - -type WS m = ReaderT WriterEnv (StateT WriterState m) - renumIdMap :: Int -> [Element] -> M.Map Text Text renumIdMap _ [] = M.empty renumIdMap n (e:es) @@ -858,12 +758,6 @@ separateTables (x@Table{}:xs@(Table{}:_)) = x : RawBlock (Format "openxml") "<w:p />" : separateTables xs separateTables (x:xs) = x : separateTables xs -pStyleM :: (PandocMonad m) => ParaStyleName -> WS m XML.Element -pStyleM styleName = do - pStyleMap <- gets (smParaStyle . stStyleMaps) - let sty' = getStyleIdFromName styleName pStyleMap - return $ mknode "w:pStyle" [("w:val", fromStyleId sty')] () - rStyleM :: (PandocMonad m) => CharStyleName -> WS m XML.Element rStyleM styleName = do cStyleMap <- gets (smCharStyle . stStyleMaps) @@ -995,78 +889,8 @@ blockToOpenXML' _ HorizontalRule = do $ mknode "v:rect" [("style","width:0;height:1.5pt"), ("o:hralign","center"), ("o:hrstd","t"),("o:hr","t")] () ] -blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = do - let (caption, aligns, widths, headers, rows) = toLegacyTable blkCapt specs thead tbody tfoot - setFirstPara - modify $ \s -> s { stInTable = True } - let captionStr = stringify caption - caption' <- if null caption - then return [] - else withParaPropM (pStyleM "Table Caption") - $ blockToOpenXML opts (Para caption) - let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] () - -- Table cells require a <w:p> element, even an empty one! - -- Not in the spec but in Word 2007, 2010. See #4953. And - -- apparently the last element must be a <w:p>, see #6983. - let cellToOpenXML (al, cell) = do - es <- withParaProp (alignmentFor al) $ blocksToOpenXML opts cell - return $ - case reverse (onlyElems es) of - b:e:_ | qName (elName b) == "bookmarkEnd" - , qName (elName e) == "p" -> es - e:_ | qName (elName e) == "p" -> es - _ -> es ++ [Elem $ mknode "w:p" [] ()] - headers' <- mapM cellToOpenXML $ zip aligns headers - rows' <- mapM (mapM cellToOpenXML . zip aligns) rows - compactStyle <- pStyleM "Compact" - let emptyCell' = [Elem $ mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]] - let mkcell contents = mknode "w:tc" [] - $ if null contents - then emptyCell' - else contents - let mkrow cells = - mknode "w:tr" [] $ - map mkcell cells - let textwidth = 7920 -- 5.5 in in twips, 1/20 pt - let fullrow = 5000 -- 100% specified in pct - let (rowwidth :: Int) = round $ fullrow * sum widths - let mkgridcol w = mknode "w:gridCol" - [("w:w", tshow (floor (textwidth * w) :: Integer))] () - let hasHeader = not $ all null headers - modify $ \s -> s { stInTable = False } - -- for compatibility with Word <= 2007, we include a val with a bitmask - -- 0×0020 Apply first row conditional formatting - -- 0×0040 Apply last row conditional formatting - -- 0×0080 Apply first column conditional formatting - -- 0×0100 Apply last column conditional formatting - -- 0×0200 Do not apply row banding conditional formatting - -- 0×0400 Do not apply column banding conditional formattin - let tblLookVal :: Int - tblLookVal = if hasHeader then 0x20 else 0 - return $ - caption' ++ - [Elem $ - mknode "w:tbl" [] - ( mknode "w:tblPr" [] - ( mknode "w:tblStyle" [("w:val","Table")] () : - mknode "w:tblW" [("w:type", "pct"), ("w:w", tshow rowwidth)] () : - mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0") - ,("w:lastRow","0") - ,("w:firstColumn","0") - ,("w:lastColumn","0") - ,("w:noHBand","0") - ,("w:noVBand","0") - ,("w:val", T.pack $ printf "%04x" tblLookVal) - ] () : - [ mknode "w:tblCaption" [("w:val", captionStr)] () - | not (null caption) ] ) - : mknode "w:tblGrid" [] - (if all (==0) widths - then [] - else map mkgridcol widths) - : [ mkrow headers' | hasHeader ] ++ - map mkrow rows' - )] +blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = + tableToOpenXML (blocksToOpenXML opts) blkCapt specs thead tbody tfoot blockToOpenXML' opts el | BulletList lst <- el = addOpenXMLList BulletMarker lst | OrderedList (start, numstyle, numdelim) lst <- el @@ -1121,13 +945,6 @@ listItemToOpenXML opts numid (first:rest) = do modify $ \st -> st{ stInList = oldInList } return $ first'' ++ rest'' -alignmentToString :: Alignment -> Text -alignmentToString alignment = case alignment of - AlignLeft -> "left" - AlignRight -> "right" - AlignCenter -> "center" - AlignDefault -> "left" - -- | Convert a list of inline elements to OpenXML. inlinesToOpenXML :: PandocMonad m => WriterOptions -> [Inline] -> WS m [Content] inlinesToOpenXML opts lst = concat `fmap` mapM (inlineToOpenXML opts) lst @@ -1138,10 +955,6 @@ withNumId numid = local $ \env -> env{ envListNumId = numid } asList :: (PandocMonad m) => WS m a -> WS m a asList = local $ \env -> env{ envListLevel = envListLevel env + 1 } -isStyle :: Element -> Bool -isStyle e = isElem [] "w" "rStyle" e || - isElem [] "w" "pStyle" e - getTextProps :: (PandocMonad m) => WS m [Element] getTextProps = do props <- asks envTextProperties @@ -1170,16 +983,6 @@ getParaProps displayMathPara = do [] -> [] ps -> [mknode "w:pPr" [] ps] -withParaProp :: PandocMonad m => Element -> WS m a -> WS m a -withParaProp d p = - local (\env -> env {envParaProperties = ep <> envParaProperties env}) p - where ep = if isStyle d then EnvProps (Just d) [] else EnvProps Nothing [d] - -withParaPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a -withParaPropM md p = do - d <- md - withParaProp d p - formattedString :: PandocMonad m => Text -> WS m [Element] formattedString str = -- properly handle soft hyphens @@ -1200,9 +1003,6 @@ formattedRun els = do props <- getTextProps return [ mknode "w:r" [] $ props ++ els ] -setFirstPara :: PandocMonad m => WS m () -setFirstPara = modify $ \s -> s { stFirstPara = True } - -- | Convert an inline element to OpenXML. inlineToOpenXML :: PandocMonad m => WriterOptions -> Inline -> WS m [Content] inlineToOpenXML opts il = withDirection $ inlineToOpenXML' opts il @@ -1494,22 +1294,6 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do br :: Element br = mknode "w:r" [] [mknode "w:br" [] ()] --- Word will insert these footnotes into the settings.xml file --- (whether or not they're visible in the document). If they're in the --- file, but not in the footnotes.xml file, it will produce --- problems. So we want to make sure we insert them into our document. -defaultFootnotes :: [Element] -defaultFootnotes = [ mknode "w:footnote" - [("w:type", "separator"), ("w:id", "-1")] - [ mknode "w:p" [] - [mknode "w:r" [] - [ mknode "w:separator" [] ()]]] - , mknode "w:footnote" - [("w:type", "continuationSeparator"), ("w:id", "0")] - [ mknode "w:p" [] - [ mknode "w:r" [] - [ mknode "w:continuationSeparator" [] ()]]]] - withDirection :: PandocMonad m => WS m a -> WS m a withDirection x = do diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs new file mode 100644 index 000000000..a6b137fc4 --- /dev/null +++ b/src/Text/Pandoc/Writers/Docx/Table.hs @@ -0,0 +1,114 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{- | +Module : Text.Pandoc.Writers.Docx +Copyright : Copyright (C) 2012-2021 John MacFarlane +License : GNU GPL, version 2 or above +Maintainer : John MacFarlane <jgm@berkeley.edu> + +Conversion of table blocks to docx. +-} +module Text.Pandoc.Writers.Docx.Table + ( tableToOpenXML + ) where + +import Control.Monad.State.Strict +import Data.Text (Text) +import Text.Pandoc.Definition +import Text.Pandoc.Class.PandocMonad (PandocMonad) +import Text.Pandoc.Writers.Docx.Types +import Text.Pandoc.Shared +import Text.Pandoc.Writers.Shared +import Text.Printf (printf) +import Text.Pandoc.Writers.OOXML +import Text.Pandoc.XML.Light as XML +import qualified Data.Text as T + +tableToOpenXML :: PandocMonad m + => ([Block] -> WS m [Content]) + -> Caption + -> [ColSpec] + -> TableHead + -> [TableBody] + -> TableFoot + -> WS m [Content] +tableToOpenXML blocksToOpenXML blkCapt specs thead tbody tfoot = do + let (caption, aligns, widths, headers, rows) = + toLegacyTable blkCapt specs thead tbody tfoot + setFirstPara + modify $ \s -> s { stInTable = True } + let captionStr = stringify caption + caption' <- if null caption + then return [] + else withParaPropM (pStyleM "Table Caption") + $ blocksToOpenXML [Para caption] + let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] () + -- Table cells require a <w:p> element, even an empty one! + -- Not in the spec but in Word 2007, 2010. See #4953. And + -- apparently the last element must be a <w:p>, see #6983. + let cellToOpenXML (al, cell) = do + es <- withParaProp (alignmentFor al) $ blocksToOpenXML cell + return $ + case reverse (onlyElems es) of + b:e:_ | qName (elName b) == "bookmarkEnd" + , qName (elName e) == "p" -> es + e:_ | qName (elName e) == "p" -> es + _ -> es ++ [Elem $ mknode "w:p" [] ()] + headers' <- mapM cellToOpenXML $ zip aligns headers + rows' <- mapM (mapM cellToOpenXML . zip aligns) rows + compactStyle <- pStyleM "Compact" + let emptyCell' = [Elem $ mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]] + let mkcell contents = mknode "w:tc" [] + $ if null contents + then emptyCell' + else contents + let mkrow cells = + mknode "w:tr" [] $ + map mkcell cells + let textwidth = 7920 -- 5.5 in in twips, 1/20 pt + let fullrow = 5000 -- 100% specified in pct + let (rowwidth :: Int) = round $ fullrow * sum widths + let mkgridcol w = mknode "w:gridCol" + [("w:w", tshow (floor (textwidth * w) :: Integer))] () + let hasHeader = not $ all null headers + modify $ \s -> s { stInTable = False } + -- for compatibility with Word <= 2007, we include a val with a bitmask + -- 0×0020 Apply first row conditional formatting + -- 0×0040 Apply last row conditional formatting + -- 0×0080 Apply first column conditional formatting + -- 0×0100 Apply last column conditional formatting + -- 0×0200 Do not apply row banding conditional formatting + -- 0×0400 Do not apply column banding conditional formattin + let tblLookVal :: Int + tblLookVal = if hasHeader then 0x20 else 0 + return $ + caption' ++ + [Elem $ + mknode "w:tbl" [] + ( mknode "w:tblPr" [] + ( mknode "w:tblStyle" [("w:val","Table")] () : + mknode "w:tblW" [("w:type", "pct"), ("w:w", tshow rowwidth)] () : + mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0") + ,("w:lastRow","0") + ,("w:firstColumn","0") + ,("w:lastColumn","0") + ,("w:noHBand","0") + ,("w:noVBand","0") + ,("w:val", T.pack $ printf "%04x" tblLookVal) + ] () : + [ mknode "w:tblCaption" [("w:val", captionStr)] () + | not (null caption) ] ) + : mknode "w:tblGrid" [] + (if all (==0) widths + then [] + else map mkgridcol widths) + : [ mkrow headers' | hasHeader ] ++ + map mkrow rows' + )] + +alignmentToString :: Alignment -> Text +alignmentToString alignment = case alignment of + AlignLeft -> "left" + AlignRight -> "right" + AlignCenter -> "center" + AlignDefault -> "left" -- cgit v1.2.3 From dc0ba7294d9c40987b016ac886440a85ade8ae44 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Tue, 20 Apr 2021 13:38:16 +0200 Subject: Docx writer: add missing file --- src/Text/Pandoc/Writers/Docx/Types.hs | 181 ++++++++++++++++++++++++++++++++++ 1 file changed, 181 insertions(+) create mode 100644 src/Text/Pandoc/Writers/Docx/Types.hs (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Docx/Types.hs b/src/Text/Pandoc/Writers/Docx/Types.hs new file mode 100644 index 000000000..006584c30 --- /dev/null +++ b/src/Text/Pandoc/Writers/Docx/Types.hs @@ -0,0 +1,181 @@ +{-# LANGUAGE OverloadedStrings #-} +{- | +Module : Text.Pandoc.Writers.Docx +Copyright : Copyright (C) 2012-2021 John MacFarlane +License : GNU GPL, version 2 or above +Maintainer : John MacFarlane <jgm@berkeley.edu> + +Conversion of table blocks to docx. +-} +module Text.Pandoc.Writers.Docx.Types + ( EnvProps (..) + , WriterEnv (..) + , defaultWriterEnv + , WriterState (..) + , defaultWriterState + , WS + , ListMarker (..) + , listMarkerToId + , pStyleM + , isStyle + , setFirstPara + , withParaProp + , withParaPropM + ) where + +import Control.Applicative ((<|>)) +import Control.Monad.Reader +import Control.Monad.State.Strict +import Data.Text (Text) +import Text.Pandoc.Class.PandocMonad (PandocMonad) +import Text.Pandoc.Definition +import Text.Pandoc.MIME (MimeType) +import Text.Pandoc.Writers.Docx.StyleMap +import Text.Pandoc.Writers.OOXML +import Text.Pandoc.XML.Light as XML +import qualified Data.ByteString as B +import qualified Data.Map as M +import qualified Data.Set as Set +import qualified Data.Text as T + +data ListMarker = NoMarker + | BulletMarker + | NumberMarker ListNumberStyle ListNumberDelim Int + deriving (Show, Read, Eq, Ord) + +listMarkerToId :: ListMarker -> Text +listMarkerToId NoMarker = "990" +listMarkerToId BulletMarker = "991" +listMarkerToId (NumberMarker sty delim n) = T.pack $ + '9' : '9' : styNum : delimNum : show n + where styNum = case sty of + DefaultStyle -> '2' + Example -> '3' + Decimal -> '4' + LowerRoman -> '5' + UpperRoman -> '6' + LowerAlpha -> '7' + UpperAlpha -> '8' + delimNum = case delim of + DefaultDelim -> '0' + Period -> '1' + OneParen -> '2' + TwoParens -> '3' + + +data EnvProps = EnvProps{ styleElement :: Maybe Element + , otherElements :: [Element] + } + +instance Semigroup EnvProps where + EnvProps s es <> EnvProps s' es' = EnvProps (s <|> s') (es ++ es') + +instance Monoid EnvProps where + mempty = EnvProps Nothing [] + mappend = (<>) + +data WriterEnv = WriterEnv + { envTextProperties :: EnvProps + , envParaProperties :: EnvProps + , envRTL :: Bool + , envListLevel :: Int + , envListNumId :: Int + , envInDel :: Bool + , envChangesAuthor :: Text + , envChangesDate :: Text + , envPrintWidth :: Integer + } + +defaultWriterEnv :: WriterEnv +defaultWriterEnv = WriterEnv + { envTextProperties = mempty + , envParaProperties = mempty + , envRTL = False + , envListLevel = -1 + , envListNumId = 1 + , envInDel = False + , envChangesAuthor = "unknown" + , envChangesDate = "1969-12-31T19:00:00Z" + , envPrintWidth = 1 + } + + +data WriterState = WriterState{ + stFootnotes :: [Element] + , stComments :: [([(Text, Text)], [Inline])] + , stSectionIds :: Set.Set Text + , stExternalLinks :: M.Map Text Text + , stImages :: M.Map FilePath (String, String, Maybe MimeType, B.ByteString) + , stLists :: [ListMarker] + , stInsId :: Int + , stDelId :: Int + , stStyleMaps :: StyleMaps + , stFirstPara :: Bool + , stInTable :: Bool + , stInList :: Bool + , stTocTitle :: [Inline] + , stDynamicParaProps :: Set.Set ParaStyleName + , stDynamicTextProps :: Set.Set CharStyleName + , stCurId :: Int + } + +defaultWriterState :: WriterState +defaultWriterState = WriterState{ + stFootnotes = defaultFootnotes + , stComments = [] + , stSectionIds = Set.empty + , stExternalLinks = M.empty + , stImages = M.empty + , stLists = [NoMarker] + , stInsId = 1 + , stDelId = 1 + , stStyleMaps = StyleMaps M.empty M.empty + , stFirstPara = False + , stInTable = False + , stInList = False + , stTocTitle = [Str "Table of Contents"] + , stDynamicParaProps = Set.empty + , stDynamicTextProps = Set.empty + , stCurId = 20 + } + +setFirstPara :: PandocMonad m => WS m () +setFirstPara = modify $ \s -> s { stFirstPara = True } + +type WS m = ReaderT WriterEnv (StateT WriterState m) + +-- Word will insert these footnotes into the settings.xml file +-- (whether or not they're visible in the document). If they're in the +-- file, but not in the footnotes.xml file, it will produce +-- problems. So we want to make sure we insert them into our document. +defaultFootnotes :: [Element] +defaultFootnotes = [ mknode "w:footnote" + [("w:type", "separator"), ("w:id", "-1")] + [ mknode "w:p" [] + [mknode "w:r" [] + [ mknode "w:separator" [] ()]]] + , mknode "w:footnote" + [("w:type", "continuationSeparator"), ("w:id", "0")] + [ mknode "w:p" [] + [ mknode "w:r" [] + [ mknode "w:continuationSeparator" [] ()]]]] + +pStyleM :: (PandocMonad m) => ParaStyleName -> WS m XML.Element +pStyleM styleName = do + pStyleMap <- gets (smParaStyle . stStyleMaps) + let sty' = getStyleIdFromName styleName pStyleMap + return $ mknode "w:pStyle" [("w:val", fromStyleId sty')] () + +withParaProp :: PandocMonad m => Element -> WS m a -> WS m a +withParaProp d p = + local (\env -> env {envParaProperties = ep <> envParaProperties env}) p + where ep = if isStyle d then EnvProps (Just d) [] else EnvProps Nothing [d] + +withParaPropM :: PandocMonad m => WS m Element -> WS m a -> WS m a +withParaPropM md p = do + d <- md + withParaProp d p + +isStyle :: Element -> Bool +isStyle e = isElem [] "w" "rStyle" e || + isElem [] "w" "pStyle" e -- cgit v1.2.3 From 547bc2cdf83b8be926de55521674c0e8fab12db5 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 25 Apr 2021 10:31:33 -0700 Subject: Add quotes properly in markdown YAML metadata fields. This fixes a bug, which caused the writer to look at the LAST rather than the FIRST character in determining whether quotes were needed. So we got spurious quotes in some cases and didn't get necessary quotes in others. Closes #7245. Updated a number of test cases accordingly. --- src/Text/Pandoc/Writers/Markdown.hs | 11 +++++------ test/command/1279.md | 2 +- test/command/biblatex-inproceedings.md | 2 +- test/command/biblatex-jaffe.md | 4 ++-- test/command/biblatex-moraux.md | 2 +- test/command/biblatex-quotes.md | 2 +- test/command/biblatex-spiegelberg.md | 4 ++-- test/command/biblatex-test-case-conversion.md | 4 ++-- test/command/biblatex-textnormal.md | 2 +- 9 files changed, 16 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index daf45ed53..3295d9e6c 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -26,7 +26,7 @@ import Data.Default import Data.List (intersperse, sortOn, transpose) import Data.List.NonEmpty (nonEmpty, NonEmpty(..)) import qualified Data.Map as M -import Data.Maybe (fromMaybe, mapMaybe) +import Data.Maybe (fromMaybe, mapMaybe, isNothing) import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T @@ -142,18 +142,17 @@ valToYaml (SimpleVal x) | otherwise = if hasNewlines x then hang 0 ("|" <> cr) x - else if fst $ foldr needsDoubleQuotes (False, True) x + else if isNothing $ foldM needsDoubleQuotes True x then "\"" <> fmap escapeInDoubleQuotes x <> "\"" else x where - needsDoubleQuotes t (positive, isFirst) + needsDoubleQuotes isFirst t = if T.any isBadAnywhere t || (isFirst && T.any isYamlPunct (T.take 1 t)) - then (True, False) - else (positive, False) + then Nothing + else Just False isBadAnywhere '#' = True isBadAnywhere ':' = True - isBadAnywhere '`' = False isBadAnywhere _ = False hasNewlines NewLine = True hasNewlines BlankLines{} = True diff --git a/test/command/1279.md b/test/command/1279.md index acad6859e..d3572f245 100644 --- a/test/command/1279.md +++ b/test/command/1279.md @@ -9,7 +9,7 @@ title: My Article [^1]: Dept. of This and That ^D --- -author: "John Doe[^1]" +author: John Doe[^1] date: 2014 title: My Article --- diff --git a/test/command/biblatex-inproceedings.md b/test/command/biblatex-inproceedings.md index 29a1e0298..76b584800 100644 --- a/test/command/biblatex-inproceedings.md +++ b/test/command/biblatex-inproceedings.md @@ -63,7 +63,7 @@ references: publisher-place: Cambridge title: "Le *De Anima* dans la tradition grècque: Quelques aspects de l'interpretation du traité, de Theophraste à Themistius" - title-short: *De Anima* dans la tradition grècque + title-short: "*De Anima* dans la tradition grècque" type: paper-conference - author: - family: Salam diff --git a/test/command/biblatex-jaffe.md b/test/command/biblatex-jaffe.md index f172c6ae7..e0381772b 100644 --- a/test/command/biblatex-jaffe.md +++ b/test/command/biblatex-jaffe.md @@ -72,8 +72,8 @@ references: language: la number-of-volumes: 2 publisher-place: Leipzig - title: "Regesta Pontificum Romanorum ab condita ecclesia ad annum post - Christum natum [mcxcviii]{.smallcaps}" + title: Regesta Pontificum Romanorum ab condita ecclesia ad annum post + Christum natum [mcxcviii]{.smallcaps} title-short: Regesta Pontificum Romanorum type: book --- diff --git a/test/command/biblatex-moraux.md b/test/command/biblatex-moraux.md index 8a1b084f8..1624ad960 100644 --- a/test/command/biblatex-moraux.md +++ b/test/command/biblatex-moraux.md @@ -86,7 +86,7 @@ references: publisher-place: Cambridge title: "Le *De Anima* dans la tradition grècque: Quelques aspects de l'interpretation du traité, de Theophraste à Themistius" - title-short: *De Anima* dans la tradition grècque + title-short: "*De Anima* dans la tradition grècque" type: paper-conference --- diff --git a/test/command/biblatex-quotes.md b/test/command/biblatex-quotes.md index 9d868cb76..50055316f 100644 --- a/test/command/biblatex-quotes.md +++ b/test/command/biblatex-quotes.md @@ -36,7 +36,7 @@ references: language: en-US publisher: Princeton University Press publisher-place: Princeton - title: "Aristotle's \"De Motu Animalium\"" + title: Aristotle's "De Motu Animalium" type: book --- diff --git a/test/command/biblatex-spiegelberg.md b/test/command/biblatex-spiegelberg.md index 59d651958..59338ae88 100644 --- a/test/command/biblatex-spiegelberg.md +++ b/test/command/biblatex-spiegelberg.md @@ -63,8 +63,8 @@ references: issued: 1969 language: de-DE page: 189-216 - title: "Intention" und "Intentionalität" in der Scholastik, bei - Brentano und Husserl + title: "\"Intention\" und \"Intentionalität\" in der Scholastik, bei + Brentano und Husserl" title-short: Intention und Intentionalität type: article-journal volume: 29 diff --git a/test/command/biblatex-test-case-conversion.md b/test/command/biblatex-test-case-conversion.md index aa30767ec..e69cb9336 100644 --- a/test/command/biblatex-test-case-conversion.md +++ b/test/command/biblatex-test-case-conversion.md @@ -63,9 +63,9 @@ references: id: item1 issued: 2013 language: en-US - title: "A title, in English, with a Proper Name and an ACRONYM and a + title: A title, in English, with a Proper Name and an ACRONYM and a [camelCase]{.nocase} word and some units, 400 [nm]{.nocase}, - 3 [cm]{.nocase}, and a quote, *Alea [iacta est]{.nocase}*" + 3 [cm]{.nocase}, and a quote, *Alea [iacta est]{.nocase}* type: article-journal --- diff --git a/test/command/biblatex-textnormal.md b/test/command/biblatex-textnormal.md index 0aa5550c3..a4e6107ec 100644 --- a/test/command/biblatex-textnormal.md +++ b/test/command/biblatex-textnormal.md @@ -9,7 +9,7 @@ nocite: "[@*]" references: - id: item1 - title: "The title [of this book]{.nodecor}" + title: The title [of this book]{.nodecor} type: book --- -- cgit v1.2.3 From e9c0f9f97ba6459530c7bb2ffb55d432a1ba7884 Mon Sep 17 00:00:00 2001 From: Jan Tojnar <jtojnar@gmail.com> Date: Sun, 25 Apr 2021 19:36:06 +0200 Subject: Markdown writer: Cleaner (code)blocks with single class (#7242) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a block only has a single class and no other attributes, it is not necessary to wrap the class attribute in curly braces – the class name can be placed after the opening mark as is. This will result in bit cleaner output when pandoc is used as a markdown pretty-printer. --- src/Text/Pandoc/Writers/Markdown.hs | 10 ++++++++-- test/command/5304.md | 2 +- test/command/5519.md | 2 +- test/command/6925.md | 4 ++-- test/command/toc.md | 4 ++-- 5 files changed, 14 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 3295d9e6c..2ad9eabd9 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -278,6 +278,12 @@ attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys] escAttrChar '\\' = literal "\\\\" escAttrChar c = literal $ T.singleton c +-- | (Code) blocks with a single class can just use it standalone, +-- no need to bother with curly braces. +classOrAttrsToMarkdown :: Attr -> Doc Text +classOrAttrsToMarkdown ("",[cls],_) = literal cls +classOrAttrsToMarkdown attrs = attrsToMarkdown attrs + linkAttributes :: WriterOptions -> Attr -> Doc Text linkAttributes opts attr = if isEnabled Ext_link_attributes opts && attr /= nullAttr @@ -343,7 +349,7 @@ blockToMarkdown' opts (Div attrs ils) = do case () of _ | isEnabled Ext_fenced_divs opts && attrs /= nullAttr -> - nowrap (literal ":::" <+> attrsToMarkdown attrs) $$ + nowrap (literal ":::" <+> classOrAttrsToMarkdown attrs) $$ chomp contents $$ literal ":::" <> blankline | isEnabled Ext_native_divs opts || @@ -512,7 +518,7 @@ blockToMarkdown' opts (CodeBlock attribs str) = do backticks = endline '`' tildes = endline '~' attrs = if isEnabled Ext_fenced_code_attributes opts - then nowrap $ " " <> attrsToMarkdown attribs + then nowrap $ " " <> classOrAttrsToMarkdown attribs else case attribs of (_,cls:_,_) -> " " <> literal cls _ -> empty diff --git a/test/command/5304.md b/test/command/5304.md index 62b2b9ddd..70f32a96a 100644 --- a/test/command/5304.md +++ b/test/command/5304.md @@ -11,7 +11,7 @@ ... ``` ^D -``` {.markdown} +``` markdown `«sträng»` `` «sträng» `` diff --git a/test/command/5519.md b/test/command/5519.md index a175ce9f9..ecde184f1 100644 --- a/test/command/5519.md +++ b/test/command/5519.md @@ -6,7 +6,7 @@ ``` `````` ^D -```` {.attr} +```` attr ``` code ``` diff --git a/test/command/6925.md b/test/command/6925.md index 458a0b91d..e0d8e6870 100644 --- a/test/command/6925.md +++ b/test/command/6925.md @@ -20,13 +20,13 @@ a \end{thm2} \end{document} ^D -::: {.thm} +::: thm **Theorem 1**. *a*  ::: -::: {.thm2} +::: thm2 **Theorem 1**. a  diff --git a/test/command/toc.md b/test/command/toc.md index 794af7690..543f97ba9 100644 --- a/test/command/toc.md +++ b/test/command/toc.md @@ -31,7 +31,7 @@ ## b -::: {.interior} +::: interior # C ## cc @@ -39,7 +39,7 @@ # D ::: -::: {.blue} +::: blue # E ## e -- cgit v1.2.3 From c56d080a253c1364ca55ae97b8d43ed638acf723 Mon Sep 17 00:00:00 2001 From: Jan Tojnar <jtojnar@gmail.com> Date: Sun, 25 Apr 2021 21:19:07 +0200 Subject: Writers: Recognize custom syntax definitions (#7241) Languages defined using `--syntax-definition` were not recognized by `languagesByExtension`. This patch corrects that, allowing the writers to see all custom definitions. The LaTeX still uses the default syntax map, but that's okay in that context, since `--syntax-definition` won't create new listings styles. --- src/Text/Pandoc/Highlighting.hs | 10 +++++----- src/Text/Pandoc/Readers/LaTeX.hs | 3 ++- src/Text/Pandoc/Writers/Docbook.hs | 7 ++++--- src/Text/Pandoc/Writers/JATS.hs | 24 +++++++++++++----------- src/Text/Pandoc/Writers/TEI.hs | 7 ++++--- 5 files changed, 28 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Highlighting.hs b/src/Text/Pandoc/Highlighting.hs index 0bb6ed319..62a261e50 100644 --- a/src/Text/Pandoc/Highlighting.hs +++ b/src/Text/Pandoc/Highlighting.hs @@ -52,12 +52,12 @@ highlightingStyles = ("breezedark", breezeDark), ("haddock", haddock)] -languages :: [T.Text] -languages = [T.toLower (sName s) | s <- M.elems defaultSyntaxMap] +languages :: SyntaxMap -> [T.Text] +languages syntaxmap = [T.toLower (sName s) | s <- M.elems syntaxmap] -languagesByExtension :: T.Text -> [T.Text] -languagesByExtension ext = - [T.toLower (sName s) | s <- syntaxesByExtension defaultSyntaxMap (T.unpack ext)] +languagesByExtension :: SyntaxMap -> T.Text -> [T.Text] +languagesByExtension syntaxmap ext = + [T.toLower (sName s) | s <- syntaxesByExtension syntaxmap (T.unpack ext)] highlight :: SyntaxMap -> (FormatOptions -> [SourceLine] -> a) -- ^ Formatter diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 203dab83c..19c257a48 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -32,6 +32,7 @@ import Data.Maybe (fromMaybe, maybeToList) import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T +import Skylighting (defaultSyntaxMap) import System.FilePath (addExtension, replaceExtension, takeExtension) import Text.Collate.Lang (renderLang) import Text.Pandoc.Builder as B @@ -1170,7 +1171,7 @@ inputListing = do let (ident,classes,kvs) = parseListingsOptions options let classes' = (case listingsLanguage options of - Nothing -> (take 1 (languagesByExtension (T.pack $ takeExtension $ T.unpack f)) <>) + Nothing -> (take 1 (languagesByExtension defaultSyntaxMap (T.pack $ takeExtension $ T.unpack f)) <>) Just _ -> id) classes let firstline = fromMaybe 1 $ lookup "firstline" options >>= safeRead let lastline = fromMaybe (length codeLines) $ diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 02b141250..25bd308bf 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -253,17 +253,18 @@ blockToDocbook opts (LineBlock lns) = blockToDocbook opts $ linesToPara lns blockToDocbook opts (BlockQuote blocks) = inTagsIndented "blockquote" <$> blocksToDocbook opts blocks -blockToDocbook _ (CodeBlock (_,classes,_) str) = return $ +blockToDocbook opts (CodeBlock (_,classes,_) str) = return $ literal ("<programlisting" <> lang <> ">") <> cr <> flush (literal (escapeStringForXML str) <> cr <> literal "</programlisting>") where lang = if null langs then "" else " language=\"" <> escapeStringForXML (head langs) <> "\"" - isLang l = T.toLower l `elem` map T.toLower languages + syntaxMap = writerSyntaxMap opts + isLang l = T.toLower l `elem` map T.toLower (languages syntaxMap) langsFrom s = if isLang s then [s] - else languagesByExtension . T.toLower $ s + else (languagesByExtension syntaxMap) . T.toLower $ s langs = concatMap langsFrom classes blockToDocbook opts (BulletList lst) = do let attribs = [("spacing", "compact") | isTightList lst] diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index b58ff8aef..0bcfa0df4 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -224,19 +224,21 @@ imageMimeType src kvs = (T.drop 1 . T.dropWhile (/='/') <$> mbMT) in (maintype, subtype) -languageFor :: [Text] -> Text -languageFor classes = +languageFor :: WriterOptions -> [Text] -> Text +languageFor opts classes = case langs of (l:_) -> escapeStringForXML l [] -> "" - where isLang l = T.toLower l `elem` map T.toLower languages + where + syntaxMap = writerSyntaxMap opts + isLang l = T.toLower l `elem` map T.toLower (languages syntaxMap) langsFrom s = if isLang s then [s] - else languagesByExtension . T.toLower $ s + else (languagesByExtension syntaxMap) . T.toLower $ s langs = concatMap langsFrom classes -codeAttr :: Attr -> (Text, [(Text, Text)]) -codeAttr (ident,classes,kvs) = (lang, attr) +codeAttr :: WriterOptions -> Attr -> (Text, [(Text, Text)]) +codeAttr opts (ident,classes,kvs) = (lang, attr) where attr = [("id", escapeNCName ident) | not (T.null ident)] ++ [("language",lang) | not (T.null lang)] ++ @@ -244,7 +246,7 @@ codeAttr (ident,classes,kvs) = (lang, attr) "code-version", "executable", "language-version", "orientation", "platforms", "position", "specific-use"]] - lang = languageFor classes + lang = languageFor opts classes -- | Convert a Pandoc block element to JATS. blockToJATS :: PandocMonad m => WriterOptions -> Block -> JATS m (Doc Text) @@ -330,9 +332,9 @@ blockToJATS opts (BlockQuote blocks) = do HorizontalRule -> True _ -> False inTagsIndented "disp-quote" <$> wrappedBlocksToJATS needsWrap opts blocks -blockToJATS _ (CodeBlock a str) = return $ +blockToJATS opts (CodeBlock a str) = return $ inTags False tag attr (flush (text (T.unpack $ escapeStringForXML str))) - where (lang, attr) = codeAttr a + where (lang, attr) = codeAttr opts a tag = if T.null lang then "preformat" else "code" blockToJATS _ (BulletList []) = return empty blockToJATS opts (BulletList lst) = @@ -412,9 +414,9 @@ inlineToJATS opts (Quoted SingleQuote lst) = do inlineToJATS opts (Quoted DoubleQuote lst) = do contents <- inlinesToJATS opts lst return $ char '“' <> contents <> char '”' -inlineToJATS _ (Code a str) = +inlineToJATS opts (Code a str) = return $ inTags False tag attr $ literal (escapeStringForXML str) - where (lang, attr) = codeAttr a + where (lang, attr) = codeAttr opts a tag = if T.null lang then "monospace" else "code" inlineToJATS _ il@(RawInline f x) | f == "jats" = return $ literal x diff --git a/src/Text/Pandoc/Writers/TEI.hs b/src/Text/Pandoc/Writers/TEI.hs index b926c48a1..18015259d 100644 --- a/src/Text/Pandoc/Writers/TEI.hs +++ b/src/Text/Pandoc/Writers/TEI.hs @@ -146,16 +146,17 @@ blockToTEI opts (LineBlock lns) = blockToTEI opts $ linesToPara lns blockToTEI opts (BlockQuote blocks) = inTagsIndented "quote" <$> blocksToTEI opts blocks -blockToTEI _ (CodeBlock (_,classes,_) str) = +blockToTEI opts (CodeBlock (_,classes,_) str) = return $ literal ("<ab type='codeblock " <> lang <> "'>") <> cr <> flush (literal (escapeStringForXML str) <> cr <> text "</ab>") where lang = if null langs then "" else escapeStringForXML (head langs) - isLang l = T.toLower l `elem` map T.toLower languages + syntaxMap = writerSyntaxMap opts + isLang l = T.toLower l `elem` map T.toLower (languages syntaxMap) langsFrom s = if isLang s then [s] - else languagesByExtension . T.toLower $ s + else (languagesByExtension syntaxMap) . T.toLower $ s langs = concatMap langsFrom classes blockToTEI opts (BulletList lst) = do let attribs = [("type", "unordered")] -- cgit v1.2.3 From 3a98f7a0c767c3b13c7b358ca75d2bddab7cd4d2 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 25 Apr 2021 12:21:00 -0700 Subject: Minor code reformatting. Also taking this opportunity to note, for the record, that the commit for #7241 should be marked [API change]. It changes the type of `languagesByExtension` in Highlighting, adding a parameter for a `SyntaxMap`. --- src/Text/Pandoc/Readers/LaTeX.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 19c257a48..9ad168293 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -1171,7 +1171,8 @@ inputListing = do let (ident,classes,kvs) = parseListingsOptions options let classes' = (case listingsLanguage options of - Nothing -> (take 1 (languagesByExtension defaultSyntaxMap (T.pack $ takeExtension $ T.unpack f)) <>) + Nothing -> (take 1 (languagesByExtension defaultSyntaxMap + (T.pack $ takeExtension $ T.unpack f)) <>) Just _ -> id) classes let firstline = fromMaybe 1 $ lookup "firstline" options >>= safeRead let lastline = fromMaybe (length codeLines) $ -- cgit v1.2.3 From 0921b82d98b6ec7fa80ffd522c129b3828b9c00b Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Mon, 26 Apr 2021 18:07:01 +0200 Subject: Docx writer: autoset table width if no column has an explicit width. --- src/Text/Pandoc/Writers/Docx/Table.hs | 18 +++++++++++------- test/docx/golden/table_one_row.docx | Bin 9903 -> 9904 bytes test/docx/golden/table_with_list_cell.docx | Bin 10212 -> 10212 bytes test/docx/golden/tables.docx | Bin 10238 -> 10239 bytes 4 files changed, 11 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs index a6b137fc4..349f3a4ce 100644 --- a/src/Text/Pandoc/Writers/Docx/Table.hs +++ b/src/Text/Pandoc/Writers/Docx/Table.hs @@ -1,7 +1,8 @@ +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {- | -Module : Text.Pandoc.Writers.Docx +Module : Text.Pandoc.Writers.Docx.Table Copyright : Copyright (C) 2012-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -87,7 +88,10 @@ tableToOpenXML blocksToOpenXML blkCapt specs thead tbody tfoot = do mknode "w:tbl" [] ( mknode "w:tblPr" [] ( mknode "w:tblStyle" [("w:val","Table")] () : - mknode "w:tblW" [("w:type", "pct"), ("w:w", tshow rowwidth)] () : + mknode "w:tblW" (if all (== 0) widths + then [("w:type", "auto"), ("w:w", "0")] + else [("w:type", "pct"), ("w:w", tshow rowwidth)]) + () : mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0") ,("w:lastRow","0") ,("w:firstColumn","0") @@ -107,8 +111,8 @@ tableToOpenXML blocksToOpenXML blkCapt specs thead tbody tfoot = do )] alignmentToString :: Alignment -> Text -alignmentToString alignment = case alignment of - AlignLeft -> "left" - AlignRight -> "right" - AlignCenter -> "center" - AlignDefault -> "left" +alignmentToString = \case + AlignLeft -> "left" + AlignRight -> "right" + AlignCenter -> "center" + AlignDefault -> "left" diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx index 6eaea2ac2..cab3fc31c 100644 Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx index 45a97ccaa..9238c7e20 100644 Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx index 115a16a48..6f0379def 100644 Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ -- cgit v1.2.3 From 85f379e474be72ae0f7a53ebc5efe2ad4f8165b4 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Wed, 28 Apr 2021 12:46:52 +0200 Subject: JATS writer: use either styled-content or named-content for spans. If the element has a content-type attribute, or at least one class, then that value is used as `content-type` and the span is put inside a `<named-content>` element. Otherwise a `<styled-content>` element is used instead. Closes: #7211 --- src/Text/Pandoc/Writers/JATS.hs | 36 ++++++++++++++++++++++++++---------- test/Tests/Writers/JATS.hs | 14 +++++++++----- 2 files changed, 35 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index 0bcfa0df4..9db8723d1 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -21,12 +21,13 @@ module Text.Pandoc.Writers.JATS , writeJatsPublishing , writeJatsArticleAuthoring ) where +import Control.Applicative ((<|>)) import Control.Monad.Reader import Control.Monad.State import Data.Generics (everywhere, mkT) import Data.List (partition) import qualified Data.Map as M -import Data.Maybe (fromMaybe) +import Data.Maybe (fromMaybe, listToMaybe) import Data.Time (toGregorian, Day, parseTimeM, defaultTimeLocale, formatTime) import qualified Data.Text as T import Data.Text (Text) @@ -449,18 +450,33 @@ inlineToJATS opts (Note contents) = do $ text (show notenum) inlineToJATS opts (Cite _ lst) = inlinesToJATS opts lst -inlineToJATS opts (Span (ident,_,kvs) ils) = do +inlineToJATS opts (Span (ident,classes,kvs) ils) = do contents <- inlinesToJATS opts ils - let attr = [("id", escapeNCName ident) | not (T.null ident)] ++ - [("xml:lang",l) | ("lang",l) <- kvs] ++ - [(k,v) | (k,v) <- kvs - , k `elem` ["alt", "content-type", "rid", "specific-use", - "vocab", "vocab-identifier", "vocab-term", - "vocab-term-identifier"]] + let commonAttr = [("id", escapeNCName ident) | not (T.null ident)] ++ + [("xml:lang",l) | ("lang",l) <- kvs] ++ + [(k,v) | (k,v) <- kvs, k `elem` ["alt", "specific-use"]] + -- A named-content element is a good fit for spans, but requires a + -- content-type attribute to be present. We use either the explicit + -- attribute or the first class as content type. If neither is + -- available, then we fall back to using a @styled-content@ element. + let (tag, specificAttr) = + case lookup "content-type" kvs <|> listToMaybe classes of + Just ct -> ( "named-content" + , ("content-type", ct) : + [(k, v) | (k, v) <- kvs + , k `elem` ["rid", "vocab", "vocab-identifier", + "vocab-term", "vocab-term-identifier"]]) + -- Fall back to styled-content + Nothing -> ("styled-content" + , [(k, v) | (k,v) <- kvs + , k `elem` ["style", "style-type", "style-detail", + "toggle"]]) + let attr = commonAttr ++ specificAttr + -- unwrap if wrapping element would have no attributes return $ if null attr - then contents -- unwrap if no relevant attributes are given - else inTags False "named-content" attr contents + then contents + else inTags False tag attr contents inlineToJATS _ (Math t str) = do let addPref (Xml.Attr q v) | Xml.qName q == "xmlns" = Xml.Attr q{ Xml.qName = "xmlns:mml" } v diff --git a/test/Tests/Writers/JATS.hs b/test/Tests/Writers/JATS.hs index e90438176..5b96ed2ed 100644 --- a/test/Tests/Writers/JATS.hs +++ b/test/Tests/Writers/JATS.hs @@ -148,13 +148,17 @@ tests = spanWith nullAttr "text in span" =?> "<p>text in span</p>" - , "converted to named-content element" =: - spanWith ("a", ["ignored"], [("alt", "aa")]) "text" =?> - "<p><named-content id=\"a\" alt=\"aa\">text</named-content></p>" + , "converted to named-content element if class given" =: + spanWith ("a", ["genus-species"], [("alt", "aa")]) "C. elegans" =?> + ("<p><named-content id=\"a\" alt=\"aa\" content-type=\"genus-species\">" + <> "C. elegans</named-content></p>") - , "unwrapped if named-content element would have no attributes" =: - spanWith ("", ["ignored"], [("hidden", "true")]) "text in span" =?> + , "unwrapped if styled-content element would have no attributes" =: + spanWith ("", [], [("hidden", "true")]) "text in span" =?> "<p>text in span</p>" + , "use content-type attribute if present" =: + spanWith ("", [], [("content-type", "species")]) "E. coli" =?> + "<p><named-content content-type=\"species\">E. coli</named-content></p>" ] ] -- cgit v1.2.3 From 80e2e88287f43d88ea92a77779b25e161c81f67b Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 28 Apr 2021 23:30:16 -0700 Subject: Smarter smart quotes. Treat a leading " with no closing " as a left curly quote. This supports the practice, in fiction, of continuing paragraphs quoting the same speaker without an end quote. It also helps with quotes that break over lines in line blocks. Closes #7216. --- src/Text/Pandoc/Parsing.hs | 45 +++++++++++++++++++++++------------- src/Text/Pandoc/Readers/Markdown.hs | 18 ++++++++------- src/Text/Pandoc/Readers/RST.hs | 19 +-------------- src/Text/Pandoc/Readers/TWiki.hs | 22 +----------------- test/Tests/Readers/Markdown.hs | 2 +- test/command/7216.md | 19 +++++++++++++++ test/command/pandoc-citeproc-320a.md | 12 +++++----- 7 files changed, 67 insertions(+), 70 deletions(-) create mode 100644 test/command/7216.md (limited to 'src') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 847fd2e05..2f6189104 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -105,8 +105,9 @@ module Text.Pandoc.Parsing ( take1WhileP, singleQuoteEnd, doubleQuoteStart, doubleQuoteEnd, - ellipses, apostrophe, + doubleCloseQuote, + ellipses, dash, nested, citeKey, @@ -1398,10 +1399,7 @@ smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext -> ParserT s st m Inlines smartPunctuation inlineParser = do guardEnabled Ext_smart - choice [ quoted inlineParser, apostrophe, dash, ellipses ] - -apostrophe :: Stream s m Char => ParserT s st m Inlines -apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\x2019") + choice [ quoted inlineParser, apostrophe, doubleCloseQuote, dash, ellipses ] quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char) => ParserT s st m Inlines @@ -1411,16 +1409,22 @@ quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char) => ParserT s st m Inlines -> ParserT s st m Inlines -singleQuoted inlineParser = try $ B.singleQuoted . mconcat - <$ singleQuoteStart - <*> withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd) - -doubleQuoted :: (HasQuoteContext st m, Stream s m Char) +singleQuoted inlineParser = do + singleQuoteStart + (B.singleQuoted . mconcat <$> + try + (withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd))) + <|> pure "\8217" + +doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st, Stream s m Char) => ParserT s st m Inlines -> ParserT s st m Inlines -doubleQuoted inlineParser = try $ B.doubleQuoted . mconcat - <$ doubleQuoteStart - <*> withQuoteContext InDoubleQuote (manyTill inlineParser doubleQuoteEnd) +doubleQuoted inlineParser = do + doubleQuoteStart + (B.doubleQuoted . mconcat <$> + try + (withQuoteContext InDoubleQuote (manyTill inlineParser doubleQuoteEnd))) + <|> pure (B.str "\8220") failIfInQuoteContext :: (HasQuoteContext st m, Stream s m t) => QuoteContext @@ -1443,7 +1447,7 @@ singleQuoteStart = do guard =<< notAfterString try $ do charOrRef "'\8216\145" - notFollowedBy (oneOf [' ', '\t', '\n']) + notFollowedBy (satisfy isSpaceChar) singleQuoteEnd :: Stream s m Char => ParserT s st m () @@ -1451,17 +1455,26 @@ singleQuoteEnd = try $ do charOrRef "'\8217\146" notFollowedBy alphaNum -doubleQuoteStart :: (HasQuoteContext st m, Stream s m Char) +doubleQuoteStart :: (HasLastStrPosition st, + HasQuoteContext st m, + Stream s m Char) => ParserT s st m () doubleQuoteStart = do failIfInQuoteContext InDoubleQuote + guard =<< notAfterString try $ do charOrRef "\"\8220\147" - notFollowedBy (oneOf [' ', '\t', '\n']) + notFollowedBy (satisfy isSpaceChar) doubleQuoteEnd :: Stream s m Char => ParserT s st m () doubleQuoteEnd = void (charOrRef "\"\8221\148") +apostrophe :: Stream s m Char => ParserT s st m Inlines +apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\8217") + +doubleCloseQuote :: Stream s m Char => ParserT s st m Inlines +doubleCloseQuote = B.str "\8221" <$ char '"' + ellipses :: Stream s m Char => ParserT s st m Inlines ellipses = try (string "..." >> return (B.str "\8230")) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 4b20e3a8b..ba8ed147e 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -2199,25 +2199,27 @@ citation = try $ do smart :: PandocMonad m => MarkdownParser m (F Inlines) smart = do guardEnabled Ext_smart - doubleQuoted <|> singleQuoted <|> - choice (map (return <$>) [apostrophe, dash, ellipses]) + doubleQuoted <|> singleQuoted <|> (return <$> doubleCloseQuote) <|> + (return <$> apostrophe) <|> (return <$> dash) <|> (return <$> ellipses) singleQuoted :: PandocMonad m => MarkdownParser m (F Inlines) -singleQuoted = try $ do +singleQuoted = do singleQuoteStart - withQuoteContext InSingleQuote $ + (try (withQuoteContext InSingleQuote $ fmap B.singleQuoted . trimInlinesF . mconcat <$> - many1Till inline singleQuoteEnd + many1Till inline singleQuoteEnd)) + <|> (return (return (B.str "\8217"))) -- doubleQuoted will handle regular double-quoted sections, as well -- as dialogues with an open double-quote without a close double-quote -- in the same paragraph. doubleQuoted :: PandocMonad m => MarkdownParser m (F Inlines) -doubleQuoted = try $ do +doubleQuoted = do doubleQuoteStart - withQuoteContext InDoubleQuote $ + (try (withQuoteContext InDoubleQuote $ fmap B.doubleQuoted . trimInlinesF . mconcat <$> - many1Till inline doubleQuoteEnd + many1Till inline doubleQuoteEnd)) + <|> (return (return (B.str "\8220"))) toRow :: [Blocks] -> Row toRow = Row nullAttr . map B.simpleCell diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 514e3b88d..ac4c0b6cb 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -1658,21 +1658,4 @@ note = try $ do return $ B.note contents smart :: PandocMonad m => RSTParser m Inlines -smart = do - guardEnabled Ext_smart - doubleQuoted <|> singleQuoted <|> - choice [apostrophe, dash, ellipses] - -singleQuoted :: PandocMonad m => RSTParser m Inlines -singleQuoted = try $ do - singleQuoteStart - withQuoteContext InSingleQuote $ - B.singleQuoted . trimInlines . mconcat <$> - many1Till inline singleQuoteEnd - -doubleQuoted :: PandocMonad m => RSTParser m Inlines -doubleQuoted = try $ do - doubleQuoteStart - withQuoteContext InDoubleQuote $ - B.doubleQuoted . trimInlines . mconcat <$> - many1Till inline doubleQuoteEnd +smart = smartPunctuation inline diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index 484a6c923..c4d7bcc93 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -469,27 +469,7 @@ symbol :: PandocMonad m => TWParser m B.Inlines symbol = B.str <$> countChar 1 nonspaceChar smart :: PandocMonad m => TWParser m B.Inlines -smart = do - guardEnabled Ext_smart - doubleQuoted <|> singleQuoted <|> - choice [ apostrophe - , dash - , ellipses - ] - -singleQuoted :: PandocMonad m => TWParser m B.Inlines -singleQuoted = try $ do - singleQuoteStart - withQuoteContext InSingleQuote - (B.singleQuoted . B.trimInlines . mconcat <$> many1Till inline singleQuoteEnd) - -doubleQuoted :: PandocMonad m => TWParser m B.Inlines -doubleQuoted = try $ do - doubleQuoteStart - contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) - withQuoteContext InDoubleQuote (doubleQuoteEnd >> - return (B.doubleQuoted $ B.trimInlines contents)) - <|> return (B.str "\8220" B.<> contents) +smart = smartPunctuation inline link :: PandocMonad m => TWParser m B.Inlines link = try $ do diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs index 0930deae6..6e38da21a 100644 --- a/test/Tests/Readers/Markdown.hs +++ b/test/Tests/Readers/Markdown.hs @@ -358,7 +358,7 @@ tests = [ testGroup "inline code" para (text "The value of the " <> math "x" <> text "\8217s and the systems\8217 condition.") , test markdownSmart "unclosed double quote" ("**this should \"be bold**" - =?> para (strong "this should \"be bold")) + =?> para (strong "this should \8220be bold")) ] , testGroup "footnotes" [ "indent followed by newline and flush-left text" =: diff --git a/test/command/7216.md b/test/command/7216.md new file mode 100644 index 000000000..cab3b9689 --- /dev/null +++ b/test/command/7216.md @@ -0,0 +1,19 @@ +``` +pandoc -t latex +"This is some text in quotes. Another paragraph by the same speaker follows. The first paragraph should have no close quote. + +"The second paragraph should have open and close quotes." + +| "Open quote on this line, +| Close quote on the next line." +| "Quotes on the same line." +^D +``This is some text in quotes. Another paragraph by the same speaker +follows. The first paragraph should have no close quote. + +``The second paragraph should have open and close quotes.'' + +``Open quote on this line,\\ +Close quote on the next line.''\\ +``Quotes on the same line.'' +``` diff --git a/test/command/pandoc-citeproc-320a.md b/test/command/pandoc-citeproc-320a.md index 79dacfa10..e894a2250 100644 --- a/test/command/pandoc-citeproc-320a.md +++ b/test/command/pandoc-citeproc-320a.md @@ -49,17 +49,13 @@ references: Foo [@item1; @item2; @item3; @item4; @item5; @item6; @item7; @item8]. ^D Foo (al-ʾUdhrī, n.d.; al-ʿUdhrī, n.d.; al-\'Udhrī, n.d.; al-'Udhrī, -n.d.; al-'Udhrī, n.d.; Uch, n.d.; Uebel, n.d.; Zzz, n.d.). +n.d.a, n.d.b; Uch, n.d.; Uebel, n.d.; Zzz, n.d.). ::: {#refs .references .csl-bib-body .hanging-indent} ::: {#ref-item6 .csl-entry} Uch, Ann. n.d. ::: -::: {#ref-item4 .csl-entry} -'Udhrī, Jamīl al-. n.d. -::: - ::: {#ref-item1 .csl-entry} ʾUdhrī, Jamīl al-. n.d. ::: @@ -72,8 +68,12 @@ Uch, Ann. n.d. \'Udhrī, Jamīl al-. n.d. ::: +::: {#ref-item4 .csl-entry} +'Udhrī, Jamīl al-. n.d.a. +::: + ::: {#ref-item5 .csl-entry} -'Udhrī, Jamīl al-. n.d. +---------. n.d.b. ::: ::: {#ref-item7 .csl-entry} -- cgit v1.2.3 From d14c5f94df490996d12f8a56e29f927b9f9549e8 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 29 Apr 2021 08:48:49 -0700 Subject: Further improvements in smart quotes. Improves heuristic for detection of an "open double quote." Closes #2103. --- src/Text/Pandoc/Parsing.hs | 4 ++-- test/command/2103.md | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) create mode 100644 test/command/2103.md (limited to 'src') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 2f6189104..37ab0adaa 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -1447,7 +1447,7 @@ singleQuoteStart = do guard =<< notAfterString try $ do charOrRef "'\8216\145" - notFollowedBy (satisfy isSpaceChar) + void $ lookAhead (satisfy (not . isSpaceChar)) singleQuoteEnd :: Stream s m Char => ParserT s st m () @@ -1463,7 +1463,7 @@ doubleQuoteStart = do failIfInQuoteContext InDoubleQuote guard =<< notAfterString try $ do charOrRef "\"\8220\147" - notFollowedBy (satisfy isSpaceChar) + void $ lookAhead (satisfy (not . isSpaceChar)) doubleQuoteEnd :: Stream s m Char => ParserT s st m () diff --git a/test/command/2103.md b/test/command/2103.md new file mode 100644 index 000000000..14a522a5e --- /dev/null +++ b/test/command/2103.md @@ -0,0 +1,8 @@ +``` +pandoc -t latex +| A happy pandoc user said "fix this bug please +| or I'll go crazy!" +^D +A happy pandoc user said ``fix this bug please\\ +or I'll go crazy!'' +``` -- cgit v1.2.3 From b6a65445e10d74dfe384763c37438338bd395372 Mon Sep 17 00:00:00 2001 From: mbrackeantidot <65160241+mbrackeantidot@users.noreply.github.com> Date: Thu, 29 Apr 2021 18:11:44 +0200 Subject: Docx reader: add handling of vml image objects (jgm#4735) (#7257) They represent images, the same way as other images in vml format. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 11 +++++++++-- test/Tests/Readers/Docx.hs | 4 ++++ test/docx/image_vml_as_object.docx | Bin 0 -> 74199 bytes test/docx/image_vml_as_object.native | 2 ++ 4 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 test/docx/image_vml_as_object.docx create mode 100644 test/docx/image_vml_as_object.native (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index f8ed248d7..7325ff300 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -709,7 +709,8 @@ elemToParPart ns element case drawing of Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp title alt bs $ elemToExtent drawingElem) Nothing -> throwError WrongElem --- The below is an attempt to deal with images in deprecated vml format. +-- The two cases below are an attempt to deal with images in deprecated vml format. +-- Todo: check out title and attr for deprecated format. elemToParPart ns element | isElem ns "w" "r" element , Just _ <- findChildByName ns "w" "pict" element = @@ -717,9 +718,15 @@ elemToParPart ns element >>= findAttrByName ns "r" "id" in case drawing of - -- Todo: check out title and attr for deprecated format. Just s -> expandDrawingId s >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing) Nothing -> throwError WrongElem +elemToParPart ns element + | isElem ns "w" "r" element + , Just objectElem <- findChildByName ns "w" "object" element + , Just shapeElem <- findChildByName ns "v" "shape" objectElem + , Just imagedataElem <- findChildByName ns "v" "imagedata" shapeElem + , Just drawingId <- findAttrByName ns "r" "id" imagedataElem + = expandDrawingId drawingId >>= (\(fp, bs) -> return $ Drawing fp "" "" bs Nothing) -- Chart elemToParPart ns element | isElem ns "w" "r" element diff --git a/test/Tests/Readers/Docx.hs b/test/Tests/Readers/Docx.hs index 263e04173..2cce70cc5 100644 --- a/test/Tests/Readers/Docx.hs +++ b/test/Tests/Readers/Docx.hs @@ -155,6 +155,10 @@ tests = [ testGroup "document" "VML image" "docx/image_vml.docx" "docx/image_vml.native" + , testCompare + "VML image as object" + "docx/image_vml_as_object.docx" + "docx/image_vml_as_object.native" , testCompare "inline image in links" "docx/inline_images.docx" diff --git a/test/docx/image_vml_as_object.docx b/test/docx/image_vml_as_object.docx new file mode 100644 index 000000000..7e1e4d2ca Binary files /dev/null and b/test/docx/image_vml_as_object.docx differ diff --git a/test/docx/image_vml_as_object.native b/test/docx/image_vml_as_object.native new file mode 100644 index 000000000..6e689486a --- /dev/null +++ b/test/docx/image_vml_as_object.native @@ -0,0 +1,2 @@ +[Para [Str "Test",Space,Str "with",Space,Str "object",Space,Str "as",Space,Str "image:"] +,Para [Image ("",[],[]) [] ("media/image1.emf","")]] -- cgit v1.2.3 From 6b16f3bb0d4c3a0ddc051ff9948c67d5cac72bcb Mon Sep 17 00:00:00 2001 From: tecosaur <tec@tecosaur.com> Date: Fri, 30 Apr 2021 16:23:28 +0800 Subject: Org writer: inline latex envs need newlines (#7259) Closes #7252 As specified in https://orgmode.org/manual/LaTeX-fragments.html, an inline \begin{}...\end{} LaTeX block must start on a new line. --- src/Text/Pandoc/Writers/Org.hs | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Org.hs b/src/Text/Pandoc/Writers/Org.hs index d0c9813da..d404f1c8d 100644 --- a/src/Text/Pandoc/Writers/Org.hs +++ b/src/Text/Pandoc/Writers/Org.hs @@ -407,6 +407,8 @@ inlineToOrg (Math t str) = do then "\\(" <> literal str <> "\\)" else "\\[" <> literal str <> "\\]" inlineToOrg il@(RawInline f str) + | elem f ["tex", "latex"] && T.isPrefixOf "\\begin" str = + return $ cr <> literal str <> cr | isRawFormat f = return $ literal str | otherwise = do report $ InlineNotRendered il -- cgit v1.2.3 From 3da919e35d02ec1a7e3719e2fdfd699a69d74921 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Sat, 1 May 2021 18:52:24 +0200 Subject: Add new internal module Text.Pandoc.Writers.GridTable --- pandoc.cabal | 2 + src/Text/Pandoc/Writers/GridTable.hs | 157 +++++++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+) create mode 100644 src/Text/Pandoc/Writers/GridTable.hs (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index 29a5bfc7a..8ea3aa681 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -438,6 +438,7 @@ library SHA >= 1.6 && < 1.7, aeson >= 0.7 && < 1.6, aeson-pretty >= 0.8.5 && < 0.9, + array >= 0.5 && < 0.6, attoparsec >= 0.12 && < 0.15, base64-bytestring >= 0.1 && < 1.3, binary >= 0.7 && < 0.11, @@ -659,6 +660,7 @@ library Text.Pandoc.Writers.Docx.StyleMap, Text.Pandoc.Writers.Docx.Table, Text.Pandoc.Writers.Docx.Types, + Text.Pandoc.Writers.GridTable Text.Pandoc.Writers.JATS.References, Text.Pandoc.Writers.JATS.Table, Text.Pandoc.Writers.JATS.Types, diff --git a/src/Text/Pandoc/Writers/GridTable.hs b/src/Text/Pandoc/Writers/GridTable.hs new file mode 100644 index 000000000..c6f4cf456 --- /dev/null +++ b/src/Text/Pandoc/Writers/GridTable.hs @@ -0,0 +1,157 @@ +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE TupleSections #-} + +{- | +Module : Text.Pandoc.Writers.GridTable +Copyright : © 2020-2021 Albert Krewinkel +License : GNU GPL, version 2 or above + +Maintainer : Albert Krewinkel <albert@zeitkraut.de> + +Grid representation of pandoc tables. +-} +module Text.Pandoc.Writers.GridTable + ( Table (..) + , GridCell (..) + , RowIndex (..) + , ColIndex (..) + , CellIndex + , Part (..) + , toTable + , rowArray + ) where + +import Control.Monad (forM_) +import Control.Monad.ST +import Data.Array +import Data.Array.MArray +import Data.Array.ST +import Data.Maybe (listToMaybe) +import Data.STRef +import Text.Pandoc.Definition hiding (Table) +import qualified Text.Pandoc.Builder as B + +-- | A grid cell contains either a real table cell, or is the +-- continuation of a column or row-spanning cell. In the latter case, +-- the index of the continued cell is provided. +data GridCell + = ContentCell Attr Alignment RowSpan ColSpan [Block] + | ContinuationCell CellIndex + deriving (Show) + +-- | Row index in a table part. +newtype RowIndex = RowIndex Int deriving (Enum, Eq, Ix, Ord, Show) +-- | Column index in a table part. +newtype ColIndex = ColIndex Int deriving (Enum, Eq, Ix, Ord, Show) + +-- | Index to a cell in a table part. +type CellIndex = (RowIndex, ColIndex) + +-- | Cells are placed on a grid. Row attributes are stored in a separate +-- array. +data Part = Part + { partAttr :: Attr + , partCellArray :: Array (RowIndex,ColIndex) GridCell + , partRowAttrs :: Array RowIndex Attr + } + +data Table = Table + { tableAttr :: Attr + , tableCaption :: Caption + , tableColSpecs :: Array ColIndex ColSpec + , tableRowHeads :: RowHeadColumns + , tableHead :: Part + , tableBodies :: [Part] + , tableFoot :: Part + } + +toTable + :: B.Attr + -> B.Caption + -> [B.ColSpec] + -> B.TableHead + -> [B.TableBody] + -> B.TableFoot + -> Table +toTable attr caption colSpecs thead tbodies tfoot = + Table attr caption colSpecs' rowHeads thGrid tbGrids tfGrid + where + colSpecs' = listArray (ColIndex 1, ColIndex $ length colSpecs) colSpecs + rowHeads = case listToMaybe tbodies of + Nothing -> RowHeadColumns 0 + Just (TableBody _attr rowHeadCols _headerRows _rows) -> rowHeadCols + thGrid = let (TableHead headAttr rows) = thead + in rowsToPart headAttr rows + tbGrids = map bodyToGrid tbodies + tfGrid = let (TableFoot footAttr rows) = tfoot + in rowsToPart footAttr rows + bodyToGrid (TableBody bodyAttr _rowHeadCols _headRows rows) = + rowsToPart bodyAttr rows + +data BuilderCell + = FilledCell GridCell + | FreeCell + +fromBuilderCell :: BuilderCell -> GridCell +fromBuilderCell = \case + FilledCell c -> c + FreeCell -> error "Found an unassigned cell." + +rowsToPart :: Attr -> [B.Row] -> Part +rowsToPart attr = \case + [] -> Part + attr + (listArray ((RowIndex 1, ColIndex 1), (RowIndex 0, ColIndex 0)) []) + (listArray (RowIndex 1, RowIndex 0) []) + rows@(Row _attr firstRow:_) -> + let nrows = length rows + ncols = sum $ map (\(Cell _ _ _ (ColSpan cs) _) -> cs) firstRow + gbounds = ((RowIndex 1, ColIndex 1), (RowIndex nrows, ColIndex ncols)) + mutableGrid :: ST s (STArray s CellIndex GridCell) + mutableGrid = do + grid <- newArray gbounds FreeCell + ridx <- newSTRef (RowIndex 1) + forM_ rows $ \(Row _attr cells) -> do + cidx <- newSTRef (ColIndex 1) + forM_ cells $ \(Cell cellAttr align rs cs blks) -> do + ridx' <- readSTRef ridx + let nextFreeInRow colindex@(ColIndex c) = do + readArray grid (ridx', colindex) >>= \case + FreeCell -> pure colindex + _ -> nextFreeInRow $ ColIndex (c + 1) + cidx' <- readSTRef cidx >>= nextFreeInRow + writeArray grid (ridx', cidx') . FilledCell $ + ContentCell cellAttr align rs cs blks + forM_ (continuationIndices ridx' cidx' rs cs) $ \idx -> do + writeArray grid idx . FilledCell $ + ContinuationCell (ridx', cidx') + -- go to new column + writeSTRef cidx cidx' + -- go to next row + modifySTRef ridx (incrRowIndex 1) + -- Swap BuilderCells with normal GridCells. + mapArray fromBuilderCell grid + in Part + { partCellArray = runSTArray mutableGrid + , partRowAttrs = listArray (RowIndex 1, RowIndex nrows) $ + map (\(Row rowAttr _) -> rowAttr) rows + , partAttr = attr + } + +continuationIndices :: RowIndex -> ColIndex -> RowSpan -> ColSpan -> [CellIndex] +continuationIndices (RowIndex ridx) (ColIndex cidx) rowspan colspan = + let (RowSpan rs) = rowspan + (ColSpan cs) = colspan + in [ (RowIndex r, ColIndex c) | r <- [ridx..(ridx + rs - 1)] + , c <- [cidx..(cidx + cs - 1)] + , (r, c) /= (ridx, cidx)] + +rowArray :: RowIndex -> Array CellIndex GridCell -> Array ColIndex GridCell +rowArray ridx grid = + let ((_minRidx, minCidx), (_maxRidx, maxCidx)) = bounds grid + in ixmap (minCidx, maxCidx) (ridx,) grid + +incrRowIndex :: RowSpan -> RowIndex -> RowIndex +incrRowIndex (RowSpan n) (RowIndex r) = RowIndex $ r + n -- cgit v1.2.3 From ddbf83f62c8bb6516203c99acd894c404351b5ae Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Sat, 1 May 2021 18:52:24 +0200 Subject: Docx writer: support colspans and rowspans in tables See: #6315 --- src/Text/Pandoc/Writers/Docx.hs | 6 +- src/Text/Pandoc/Writers/Docx/Table.hs | 200 +++++++++++++++++++---------- src/Text/Pandoc/Writers/GridTable.hs | 4 +- test/docx/golden/table_one_row.docx | Bin 9904 -> 9925 bytes test/docx/golden/table_with_list_cell.docx | Bin 10212 -> 10230 bytes test/docx/golden/tables.docx | Bin 10239 -> 10271 bytes 6 files changed, 140 insertions(+), 70 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 7064ded09..e11961bfd 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -53,6 +53,7 @@ import Text.Pandoc.Writers.Docx.Table import Text.Pandoc.Writers.Docx.Types import Text.Pandoc.Shared import Text.Pandoc.Walk +import qualified Text.Pandoc.Writers.GridTable as Grid import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared import Text.TeXMath @@ -889,8 +890,9 @@ blockToOpenXML' _ HorizontalRule = do $ mknode "v:rect" [("style","width:0;height:1.5pt"), ("o:hralign","center"), ("o:hrstd","t"),("o:hr","t")] () ] -blockToOpenXML' opts (Table _ blkCapt specs thead tbody tfoot) = - tableToOpenXML (blocksToOpenXML opts) blkCapt specs thead tbody tfoot +blockToOpenXML' opts (Table attr caption colspecs thead tbodies tfoot) = + tableToOpenXML (blocksToOpenXML opts) + (Grid.toTable attr caption colspecs thead tbodies tfoot) blockToOpenXML' opts el | BulletList lst <- el = addOpenXMLList BulletMarker lst | OrderedList (start, numstyle, numdelim) lst <- el diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs index 349f3a4ce..bb931bf08 100644 --- a/src/Text/Pandoc/Writers/Docx/Table.hs +++ b/src/Text/Pandoc/Writers/Docx/Table.hs @@ -14,65 +14,39 @@ module Text.Pandoc.Writers.Docx.Table ) where import Control.Monad.State.Strict +import Data.Array import Data.Text (Text) import Text.Pandoc.Definition import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Writers.Docx.Types import Text.Pandoc.Shared -import Text.Pandoc.Writers.Shared import Text.Printf (printf) +import Text.Pandoc.Writers.GridTable hiding (Table) import Text.Pandoc.Writers.OOXML -import Text.Pandoc.XML.Light as XML +import Text.Pandoc.XML.Light as XML hiding (Attr) import qualified Data.Text as T +import qualified Text.Pandoc.Writers.GridTable as Grid tableToOpenXML :: PandocMonad m => ([Block] -> WS m [Content]) - -> Caption - -> [ColSpec] - -> TableHead - -> [TableBody] - -> TableFoot + -> Grid.Table -> WS m [Content] -tableToOpenXML blocksToOpenXML blkCapt specs thead tbody tfoot = do - let (caption, aligns, widths, headers, rows) = - toLegacyTable blkCapt specs thead tbody tfoot +tableToOpenXML blocksToOpenXML gridTable = do setFirstPara modify $ \s -> s { stInTable = True } - let captionStr = stringify caption - caption' <- if null caption - then return [] - else withParaPropM (pStyleM "Table Caption") - $ blocksToOpenXML [Para caption] - let alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] () - -- Table cells require a <w:p> element, even an empty one! - -- Not in the spec but in Word 2007, 2010. See #4953. And - -- apparently the last element must be a <w:p>, see #6983. - let cellToOpenXML (al, cell) = do - es <- withParaProp (alignmentFor al) $ blocksToOpenXML cell - return $ - case reverse (onlyElems es) of - b:e:_ | qName (elName b) == "bookmarkEnd" - , qName (elName e) == "p" -> es - e:_ | qName (elName e) == "p" -> es - _ -> es ++ [Elem $ mknode "w:p" [] ()] - headers' <- mapM cellToOpenXML $ zip aligns headers - rows' <- mapM (mapM cellToOpenXML . zip aligns) rows - compactStyle <- pStyleM "Compact" - let emptyCell' = [Elem $ mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]] - let mkcell contents = mknode "w:tc" [] - $ if null contents - then emptyCell' - else contents - let mkrow cells = - mknode "w:tr" [] $ - map mkcell cells - let textwidth = 7920 -- 5.5 in in twips, 1/20 pt - let fullrow = 5000 -- 100% specified in pct - let (rowwidth :: Int) = round $ fullrow * sum widths - let mkgridcol w = mknode "w:gridCol" - [("w:w", tshow (floor (textwidth * w) :: Integer))] () - let hasHeader = not $ all null headers - modify $ \s -> s { stInTable = False } + let (Grid.Table _attr caption colspecs _rowheads thead tbodies tfoot) = + gridTable + let (Caption _maybeShortCaption captionBlocks) = caption + let captionStr = stringify captionBlocks + captionXml <- if null captionBlocks + then return [] + else withParaPropM (pStyleM "Table Caption") + $ blocksToOpenXML captionBlocks + head' <- cellGridToOpenXML blocksToOpenXML thead + bodies <- mapM (cellGridToOpenXML blocksToOpenXML) tbodies + foot' <- cellGridToOpenXML blocksToOpenXML tfoot + + let hasHeader = not . null . indices . partRowAttrs $ thead -- for compatibility with Word <= 2007, we include a val with a bitmask -- 0×0020 Apply first row conditional formatting -- 0×0040 Apply last row conditional formatting @@ -80,18 +54,12 @@ tableToOpenXML blocksToOpenXML blkCapt specs thead tbody tfoot = do -- 0×0100 Apply last column conditional formatting -- 0×0200 Do not apply row banding conditional formatting -- 0×0400 Do not apply column banding conditional formattin - let tblLookVal :: Int - tblLookVal = if hasHeader then 0x20 else 0 - return $ - caption' ++ - [Elem $ - mknode "w:tbl" [] - ( mknode "w:tblPr" [] - ( mknode "w:tblStyle" [("w:val","Table")] () : - mknode "w:tblW" (if all (== 0) widths - then [("w:type", "auto"), ("w:w", "0")] - else [("w:type", "pct"), ("w:w", tshow rowwidth)]) - () : + let tblLookVal = if hasHeader then (0x20 :: Int) else 0 + let (gridCols, tblWattr) = tableLayout (elems colspecs) + let tbl = mknode "w:tbl" [] + ( mknode "w:tblPr" [] + ( mknode "w:tblStyle" [("w:val","Table")] () : + mknode "w:tblW" tblWattr () : mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0") ,("w:lastRow","0") ,("w:firstColumn","0") @@ -100,15 +68,14 @@ tableToOpenXML blocksToOpenXML blkCapt specs thead tbody tfoot = do ,("w:noVBand","0") ,("w:val", T.pack $ printf "%04x" tblLookVal) ] () : - [ mknode "w:tblCaption" [("w:val", captionStr)] () - | not (null caption) ] ) - : mknode "w:tblGrid" [] - (if all (==0) widths - then [] - else map mkgridcol widths) - : [ mkrow headers' | hasHeader ] ++ - map mkrow rows' - )] + [ mknode "w:tblCaption" [("w:val", captionStr)] () + | not (T.null captionStr) ] + ) + : mknode "w:tblGrid" [] gridCols + : head' ++ mconcat bodies ++ foot' + ) + modify $ \s -> s { stInTable = False } + return $ captionXml ++ [Elem tbl] alignmentToString :: Alignment -> Text alignmentToString = \case @@ -116,3 +83,104 @@ alignmentToString = \case AlignRight -> "right" AlignCenter -> "center" AlignDefault -> "left" + +tableLayout :: [ColSpec] -> ([Element], [(Text, Text)]) +tableLayout specs = + let + textwidth = 7920 -- 5.5 in in twips (1 twip == 1/20 pt) + fullrow = 5000 -- 100% specified in pct (1 pct == 1/50th of a percent) + ncols = length specs + getWidth = \case + ColWidth n -> n + _ -> 0 + widths = map (getWidth . snd) specs + rowwidth = round (fullrow * sum widths) :: Int + widthToTwips w = floor (textwidth * w) :: Int + mkGridCol w = mknode "w:gridCol" [("w:w", tshow (widthToTwips w))] () + in if all (== 0) widths + then ( replicate ncols $ mkGridCol (1.0 / fromIntegral ncols) + , [ ("w:type", "auto"), ("w:w", "0")]) + else ( map mkGridCol widths + , [ ("w:type", "pct"), ("w:w", tshow rowwidth) ]) + +cellGridToOpenXML :: PandocMonad m + => ([Block] -> WS m [Content]) + -> Part + -> WS m [Element] +cellGridToOpenXML blocksToOpenXML part@(Part _ _ rowAttrs) = + if null (indices rowAttrs) + then return mempty + else mapM (rowToOpenXML blocksToOpenXML) $ partToRows part + +data OOXMLCell + = OOXMLCell Attr Alignment RowSpan ColSpan [Block] + | OOXMLCellMerge ColSpan + +data OOXMLRow = OOXMLRow Attr [OOXMLCell] + +partToRows :: Part -> [OOXMLRow] +partToRows part = + let + toOOXMLCell :: RowIndex -> ColIndex -> GridCell -> [OOXMLCell] + toOOXMLCell ridx cidx = \case + ContentCell attr align rowspan colspan blocks -> + [OOXMLCell attr align rowspan colspan blocks] + ContinuationCell idx'@(ridx',cidx') | ridx /= ridx', cidx == cidx' -> + case (partCellArray part)!idx' of + (ContentCell _ _ _ colspan _) -> [OOXMLCellMerge colspan] + x -> error $ "Content cell expected, got, " ++ show x ++ + " at index " ++ show idx' + _ -> mempty + mkRow :: (RowIndex, Attr) -> OOXMLRow + mkRow (ridx, attr) = OOXMLRow attr + . concatMap (uncurry $ toOOXMLCell ridx) + . assocs + . rowArray ridx + $ partCellArray part + in map mkRow $ assocs (partRowAttrs part) + +rowToOpenXML :: PandocMonad m + => ([Block] -> WS m [Content]) + -> OOXMLRow + -> WS m Element +rowToOpenXML blocksToOpenXML (OOXMLRow _attr cells) = do + xmlcells <- mapM (ooxmlCellToOpenXML blocksToOpenXML) cells + -- let align' = case align of + -- AlignDefault -> colAlign + -- _ -> align + return $ mknode "w:tr" [] xmlcells + +ooxmlCellToOpenXML :: PandocMonad m + => ([Block] -> WS m [Content]) + -> OOXMLCell + -> WS m Element +ooxmlCellToOpenXML blocksToOpenXML = \case + OOXMLCellMerge (ColSpan colspan) -> do + return $ mknode "w:tc" [] + [ mknode "w:tcPr" [] [ mknode "w:gridSpan" [("w:val", tshow colspan)] () + , mknode "w:vMerge" [("w:val", "continue")] () ] + , mknode "w:p" [] [mknode "w:pPr" [] ()]] + OOXMLCell _attr align rowspan (ColSpan colspan) contents -> do + -- we handle rowspans via 'leftpad', so we can ignore those here + + compactStyle <- pStyleM "Compact" + es <- withParaProp (alignmentFor align) $ blocksToOpenXML contents + -- Table cells require a <w:p> element, even an empty one! + -- Not in the spec but in Word 2007, 2010. See #4953. And + -- apparently the last element must be a <w:p>, see #6983. + return . mknode "w:tc" [] $ + Elem + (mknode "w:tcPr" [] ([ mknode "w:gridSpan" [("w:val", tshow colspan)] () + | colspan > 1] ++ + [ mknode "w:vMerge" [("w:val", "restart")] () + | rowspan > RowSpan 1 ])) : + if null contents + then [Elem $ mknode "w:p" [] [mknode "w:pPr" [] [compactStyle]]] + else case reverse (onlyElems es) of + b:e:_ | qName (elName b) == "bookmarkEnd" -- y tho? + , qName (elName e) == "p" -> es + e:_ | qName (elName e) == "p" -> es + _ -> es ++ [Elem $ mknode "w:p" [] ()] + +alignmentFor :: Alignment -> Element +alignmentFor al = mknode "w:jc" [("w:val",alignmentToString al)] () diff --git a/src/Text/Pandoc/Writers/GridTable.hs b/src/Text/Pandoc/Writers/GridTable.hs index c6f4cf456..bc468febc 100644 --- a/src/Text/Pandoc/Writers/GridTable.hs +++ b/src/Text/Pandoc/Writers/GridTable.hs @@ -87,8 +87,8 @@ toTable attr caption colSpecs thead tbodies tfoot = tbGrids = map bodyToGrid tbodies tfGrid = let (TableFoot footAttr rows) = tfoot in rowsToPart footAttr rows - bodyToGrid (TableBody bodyAttr _rowHeadCols _headRows rows) = - rowsToPart bodyAttr rows + bodyToGrid (TableBody bodyAttr _rowHeadCols headRows rows) = + rowsToPart bodyAttr (headRows ++ rows) data BuilderCell = FilledCell GridCell diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx index cab3fc31c..e60bb303f 100644 Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx index 9238c7e20..a4037cf32 100644 Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx index 6f0379def..bc1bc27f8 100644 Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ -- cgit v1.2.3 From 8357b835d9c6d17f32bded56aa24059c2f6e0678 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Wed, 5 May 2021 19:00:53 +0200 Subject: App: allow tabs expansion even if file-scope is used Tabs in plain-text inputs are now handled correctly, even if the `--file-scope` flag is used. Closes: #6709 --- src/Text/Pandoc/App.hs | 18 +++++++++++------- test/command/6709.md | 11 +++++++++++ 2 files changed, 22 insertions(+), 7 deletions(-) create mode 100644 test/command/6709.md (limited to 'src') diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 4e8c9f2ab..96e4b5f47 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -255,13 +255,17 @@ convertWithOpts opts = do let sourceToDoc :: [FilePath] -> PandocIO Pandoc sourceToDoc sources' = case reader of - TextReader r - | optFileScope opts || readerNameBase == "json" -> - mconcat <$> mapM (readSource >=> r readerOpts) sources' - | otherwise -> - readSources sources' >>= r readerOpts - ByteStringReader r -> - mconcat <$> mapM (readFile' >=> r readerOpts) sources' + TextReader r + | readerNameBase == "json" -> + mconcat <$> mapM (readSource >=> r readerOpts) sources' + | optFileScope opts -> + -- Read source and convert tabs (see #6709) + let readSource' = fmap convertTabs . readSource + in mconcat <$> mapM (readSource' >=> r readerOpts) sources' + | otherwise -> + readSources sources' >>= r readerOpts + ByteStringReader r -> + mconcat <$> mapM (readFile' >=> r readerOpts) sources' when (readerNameBase == "markdown_github" || diff --git a/test/command/6709.md b/test/command/6709.md new file mode 100644 index 000000000..d6d514552 --- /dev/null +++ b/test/command/6709.md @@ -0,0 +1,11 @@ +Tabs must be expanded even if --file-scope is used +```` +% pandoc -t native --file-scope --tab-stop=2 +``` +if true; then + echo "yup" +fi +``` +^D +[CodeBlock ("",[],[]) "if true; then\n echo \"yup\"\nfi"] +```` -- cgit v1.2.3 From 295d93e96b1853c2ff4658aa7206ea1329024fab Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Fri, 7 May 2021 17:10:56 +0200 Subject: ConTeXt writer: support blank lines in line blocks. Fixes: #6564 Thanks to @denismaier. --- src/Text/Pandoc/Writers/ConTeXt.hs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index f14b1d894..b694437d8 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -178,8 +178,12 @@ blockToConTeXt (Para lst) = do contents <- inlineListToConTeXt lst return $ contents <> blankline blockToConTeXt (LineBlock lns) = do - doclines <- nowrap . vcat <$> mapM inlineListToConTeXt lns - return $ "\\startlines" $$ doclines $$ "\\stoplines" <> blankline + let emptyToBlankline doc = if isEmpty doc + then blankline + else doc + doclines <- mapM inlineListToConTeXt lns + let contextLines = vcat . map emptyToBlankline $ doclines + return $ "\\startlines" $$ contextLines $$ "\\stoplines" <> blankline blockToConTeXt (BlockQuote lst) = do contents <- blockListToConTeXt lst return $ "\\startblockquote" $$ nest 0 contents $$ "\\stopblockquote" <> blankline -- cgit v1.2.3 From 6e45607f9948f45b2e94f54b4825b667ca0d5441 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 1 May 2021 13:17:45 -0700 Subject: Change reader types, allowing better tracking of source positions. Previously, when multiple file arguments were provided, pandoc simply concatenated them and passed the contents to the readers, which took a Text argument. As a result, the readers had no way of knowing which file was the source of any particular bit of text. This meant that we couldn't report accurate source positions on errors or include accurate source positions as attributes in the AST. More seriously, it meant that we couldn't resolve resource paths relative to the files containing them (see e.g. #5501, #6632, #6384, #3752). Add Text.Pandoc.Sources (exported module), with a `Sources` type and a `ToSources` class. A `Sources` wraps a list of `(SourcePos, Text)` pairs. [API change] A parsec `Stream` instance is provided for `Sources`. The module also exports versions of parsec's `satisfy` and other Char parsers that track source positions accurately from a `Sources` stream (or any instance of the new `UpdateSourcePos` class). Text.Pandoc.Parsing now exports these modified Char parsers instead of the ones parsec provides. Modified parsers to use a `Sources` as stream [API change]. The readers that previously took a `Text` argument have been modified to take any instance of `ToSources`. So, they may still be used with a `Text`, but they can also be used with a `Sources` object. In Text.Pandoc.Error, modified the constructor PandocParsecError to take a `Sources` rather than a `Text` as first argument, so parse error locations can be accurately reported. T.P.Error: showPos, do not print "-" as source name. --- .hlint.yaml | 1 + pandoc.cabal | 1 + src/Text/Pandoc/App.hs | 8 +- src/Text/Pandoc/App/Opt.hs | 2 +- src/Text/Pandoc/Citeproc/BibTeX.hs | 14 +- src/Text/Pandoc/Error.hs | 39 ++- src/Text/Pandoc/Logging.hs | 8 +- src/Text/Pandoc/Parsing.hs | 474 ++++++++++++++++++------------ src/Text/Pandoc/Readers.hs | 97 +++--- src/Text/Pandoc/Readers/BibTeX.hs | 13 +- src/Text/Pandoc/Readers/CSV.hs | 14 +- src/Text/Pandoc/Readers/CommonMark.hs | 70 +++-- src/Text/Pandoc/Readers/Creole.hs | 11 +- src/Text/Pandoc/Readers/CslJson.hs | 9 +- src/Text/Pandoc/Readers/DocBook.hs | 11 +- src/Text/Pandoc/Readers/DokuWiki.hs | 17 +- src/Text/Pandoc/Readers/FB2.hs | 9 +- src/Text/Pandoc/Readers/HTML.hs | 33 ++- src/Text/Pandoc/Readers/Haddock.hs | 12 +- src/Text/Pandoc/Readers/Ipynb.hs | 8 +- src/Text/Pandoc/Readers/JATS.hs | 11 +- src/Text/Pandoc/Readers/Jira.hs | 16 +- src/Text/Pandoc/Readers/LaTeX.hs | 23 +- src/Text/Pandoc/Readers/LaTeX/Citation.hs | 2 +- src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 12 +- src/Text/Pandoc/Readers/LaTeX/Types.hs | 15 +- src/Text/Pandoc/Readers/Man.hs | 29 +- src/Text/Pandoc/Readers/Markdown.hs | 49 +-- src/Text/Pandoc/Readers/MediaWiki.hs | 13 +- src/Text/Pandoc/Readers/Metadata.hs | 26 +- src/Text/Pandoc/Readers/Muse.hs | 17 +- src/Text/Pandoc/Readers/Native.hs | 12 +- src/Text/Pandoc/Readers/OPML.hs | 13 +- src/Text/Pandoc/Readers/Org.hs | 11 +- src/Text/Pandoc/Readers/Org/Inlines.hs | 3 +- src/Text/Pandoc/Readers/Org/Parsing.hs | 2 +- src/Text/Pandoc/Readers/RST.hs | 76 ++--- src/Text/Pandoc/Readers/Roff.hs | 34 +-- src/Text/Pandoc/Readers/TWiki.hs | 12 +- src/Text/Pandoc/Readers/Textile.hs | 172 +++++------ src/Text/Pandoc/Readers/TikiWiki.hs | 12 +- src/Text/Pandoc/Readers/Txt2Tags.hs | 12 +- src/Text/Pandoc/Readers/Vimwiki.hs | 19 +- src/Text/Pandoc/Shared.hs | 1 + src/Text/Pandoc/Sources.hs | 195 ++++++++++++ test/Tests/Readers/Markdown.hs | 4 +- 46 files changed, 1025 insertions(+), 617 deletions(-) create mode 100644 src/Text/Pandoc/Sources.hs (limited to 'src') diff --git a/.hlint.yaml b/.hlint.yaml index 350794803..ad0f7ddb9 100644 --- a/.hlint.yaml +++ b/.hlint.yaml @@ -9,6 +9,7 @@ # Ignore some builtin hints # - ignore: {name: "Avoid lambda"} +- ignore: {name: "Use bimap"} - ignore: {name: "Eta reduce"} - ignore: {name: "Evaluate"} - ignore: {name: "Reduce duplication"} # TODO: could be more fine-grained diff --git a/pandoc.cabal b/pandoc.cabal index 8ea3aa681..de7951c54 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -513,6 +513,7 @@ library Text.Pandoc.Options, Text.Pandoc.Extensions, Text.Pandoc.Shared, + Text.Pandoc.Sources, Text.Pandoc.MediaBag, Text.Pandoc.Error, Text.Pandoc.Filter, diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 96e4b5f47..98b072ffb 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -160,9 +160,11 @@ convertWithOpts opts = do else optTabStop opts) - let readSources :: [FilePath] -> PandocIO Text - readSources srcs = convertTabs . T.intercalate (T.pack "\n") <$> - mapM readSource srcs + let readSources :: [FilePath] -> PandocIO [(FilePath, Text)] + readSources srcs = + mapM (\fp -> do + t <- readSource fp + return (if fp == "-" then "" else fp, convertTabs t)) srcs outputSettings <- optToOutputSettings opts diff --git a/src/Text/Pandoc/App/Opt.hs b/src/Text/Pandoc/App/Opt.hs index c72f63464..d54d932b7 100644 --- a/src/Text/Pandoc/App/Opt.hs +++ b/src/Text/Pandoc/App/Opt.hs @@ -687,7 +687,7 @@ yamlToMeta (Mapping _ _ m) = where pMetaString = pure . MetaString <$> P.manyChar P.anyChar runEverything p = - runPure (P.readWithM p (def :: P.ParserState) "") + runPure (P.readWithM p (def :: P.ParserState) ("" :: Text)) >>= fmap (Meta . flip P.runF def) yamlToMeta _ = return mempty diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs index 510e56f9c..f6833000c 100644 --- a/src/Text/Pandoc/Citeproc/BibTeX.hs +++ b/src/Text/Pandoc/Citeproc/BibTeX.hs @@ -59,10 +59,11 @@ data Variant = Bibtex | Biblatex deriving (Show, Eq, Ord) -- | Parse BibTeX or BibLaTeX into a list of 'Reference's. -readBibtexString :: Variant -- ^ bibtex or biblatex +readBibtexString :: ToSources a + => Variant -- ^ bibtex or biblatex -> Locale -- ^ Locale -> (Text -> Bool) -- ^ Filter on citation ids - -> Text -- ^ bibtex/biblatex text + -> a -- ^ bibtex/biblatex text -> Either ParseError [Reference Inlines] readBibtexString variant locale idpred contents = do case runParser (((resolveCrossRefs variant <$> bibEntries) <* eof) >>= @@ -70,7 +71,7 @@ readBibtexString variant locale idpred contents = do filter (\item -> idpred (identifier item) && entryType item /= "xdata")) (fromMaybe defaultLang $ localeLanguage locale, Map.empty) - "" contents of + "" (toSources contents) of Left err -> Left err Right xs -> return xs @@ -339,7 +340,7 @@ defaultLang = Lang "en" Nothing (Just "US") [] [] [] -- a map of bibtex "string" macros type StringMap = Map.Map Text Text -type BibParser = Parser Text (Lang, StringMap) +type BibParser = Parser Sources (Lang, StringMap) data Item = Item{ identifier :: Text , sourcePos :: SourcePos @@ -804,7 +805,7 @@ bibEntries = do (bibComment <|> bibPreamble <|> bibString)) bibSkip :: BibParser () -bibSkip = () <$ take1WhileP (/='@') +bibSkip = skipMany1 (satisfy (/='@')) bibComment :: BibParser () bibComment = do @@ -829,6 +830,9 @@ bibString = do updateState (\(l,m) -> (l, Map.insert k v m)) return () +take1WhileP :: Monad m => (Char -> Bool) -> ParserT Sources u m Text +take1WhileP f = T.pack <$> many1 (satisfy f) + inBraces :: BibParser Text inBraces = do char '{' diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 8102f04cc..81eb41f85 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -23,26 +23,27 @@ import Control.Exception (Exception, displayException) import Data.Typeable (Typeable) import Data.Word (Word8) import Data.Text (Text) +import Data.List (sortOn) import qualified Data.Text as T +import Data.Ord (Down(..)) import GHC.Generics (Generic) import Network.HTTP.Client (HttpException) import System.Exit (ExitCode (..), exitWith) import System.IO (stderr) import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Pandoc.Sources (Sources(..)) import Text.Printf (printf) import Text.Parsec.Error import Text.Parsec.Pos hiding (Line) import Text.Pandoc.Shared (tshow) import Citeproc (CiteprocError, prettyCiteprocError) -type Input = Text - data PandocError = PandocIOError Text IOError | PandocHttpError Text HttpException | PandocShouldNeverHappenError Text | PandocSomeError Text | PandocParseError Text - | PandocParsecError Input ParseError + | PandocParsecError Sources ParseError | PandocMakePDFError Text | PandocOptionError Text | PandocSyntaxMapError Text @@ -81,22 +82,28 @@ renderError e = "Please report this to pandoc's developers: " <> s PandocSomeError s -> s PandocParseError s -> s - PandocParsecError input err' -> + PandocParsecError (Sources inputs) err' -> let errPos = errorPos err' errLine = sourceLine errPos errColumn = sourceColumn errPos - ls = T.lines input <> [""] - errorInFile = if length ls > errLine - 1 - then T.concat ["\n", ls !! (errLine - 1) - ,"\n", T.replicate (errColumn - 1) " " - ,"^"] - else "" - in "\nError at " <> tshow err' <> - -- if error comes from a chunk or included file, - -- then we won't get the right text this way: - if sourceName errPos == "source" - then errorInFile - else "" + errFile = sourceName errPos + errorInFile = + case sortOn (Down . sourceLine . fst) + [ (pos,t) + | (pos,t) <- inputs + , sourceName pos == errFile + , sourceLine pos <= errLine + ] of + [] -> "" + ((pos,txt):_) -> + let ls = T.lines txt <> [""] + ln = errLine - sourceLine pos + in if length ls > ln - 1 + then T.concat ["\n", ls !! (ln - 1) + ,"\n", T.replicate (errColumn - 1) " " + ,"^"] + else "" + in "\nError at " <> tshow err' <> errorInFile PandocMakePDFError s -> s PandocOptionError s -> s PandocSyntaxMapError s -> s diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs index efd2188f1..8c7292b69 100644 --- a/src/Text/Pandoc/Logging.hs +++ b/src/Text/Pandoc/Logging.hs @@ -241,9 +241,11 @@ instance ToJSON LogMessage where showPos :: SourcePos -> Text showPos pos = Text.pack $ sn ++ "line " ++ show (sourceLine pos) ++ " column " ++ show (sourceColumn pos) - where sn = if sourceName pos == "source" || sourceName pos == "" - then "" - else sourceName pos ++ " " + where + sn' = sourceName pos + sn = if sn' == "source" || sn' == "" || sn' == "-" + then "" + else sn' ++ " " encodeLogMessages :: [LogMessage] -> BL.ByteString encodeLogMessages ms = diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 37ab0adaa..11c4c7a62 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -5,7 +5,6 @@ {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE IncoherentInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} -{-# LANGUAGE ViewPatterns #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Parsing @@ -19,8 +18,7 @@ A utility library with parsers used in pandoc readers. -} -module Text.Pandoc.Parsing ( take1WhileP, - takeP, +module Text.Pandoc.Parsing ( module Text.Pandoc.Sources, countChar, textStr, anyLine, @@ -134,22 +132,10 @@ module Text.Pandoc.Parsing ( take1WhileP, getInput, setInput, unexpected, - char, - letter, - digit, - alphaNum, skipMany, skipMany1, - spaces, - space, - anyChar, - satisfy, - newline, - string, count, eof, - noneOf, - oneOf, lookAhead, notFollowedBy, many, @@ -174,6 +160,8 @@ module Text.Pandoc.Parsing ( take1WhileP, SourcePos, getPosition, setPosition, + sourceName, + setSourceName, sourceColumn, sourceLine, setSourceColumn, @@ -189,16 +177,25 @@ module Text.Pandoc.Parsing ( take1WhileP, where import Control.Monad.Identity + ( guard, + join, + unless, + when, + void, + liftM2, + liftM, + Identity(..), + MonadPlus(mzero) ) import Control.Monad.Reader + ( asks, runReader, MonadReader(ask), Reader, ReaderT(ReaderT) ) import Data.Char (chr, isAlphaNum, isAscii, isAsciiUpper, isAsciiLower, isPunctuation, isSpace, ord, toLower, toUpper) -import Data.Default +import Data.Default ( Default(..) ) import Data.Functor (($>)) import Data.List (intercalate, transpose) import qualified Data.Map as M import Data.Maybe (fromMaybe) import qualified Data.Set as Set -import Data.String import Data.Text (Text) import qualified Data.Text as T import Text.HTML.TagSoup.Entity (lookupEntity) @@ -207,22 +204,108 @@ import Text.Pandoc.Builder (Blocks, HasMeta (..), Inlines, trimInlines) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class.PandocMonad (PandocMonad, readFileFromDirs, report) import Text.Pandoc.Definition + ( Target, + nullMeta, + nullAttr, + Meta, + ColWidth(ColWidthDefault, ColWidth), + TableFoot(TableFoot), + TableBody(TableBody), + Attr, + TableHead(TableHead), + Row(..), + Alignment(..), + Inline(Str), + ListNumberDelim(..), + ListAttributes, + ListNumberStyle(..) ) import Text.Pandoc.Logging + ( LogMessage(CouldNotLoadIncludeFile, DuplicateIdentifier) ) import Text.Pandoc.Options + ( extensionEnabled, + Extension(Ext_old_dashes, Ext_tex_math_dollars, + Ext_tex_math_single_backslash, Ext_tex_math_double_backslash, + Ext_auto_identifiers, Ext_ascii_identifiers, Ext_smart), + ReaderOptions(readerTabStop, readerColumns, readerExtensions) ) import Text.Pandoc.Readers.LaTeX.Types (Macro) import Text.Pandoc.Shared + ( uniqueIdent, + tshow, + mapLeft, + compactify, + trim, + trimr, + splitTextByIndices, + safeRead, + trimMath, + schemes, + escapeURI ) +import Text.Pandoc.Sources import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) import Text.Pandoc.XML (fromEntities) -import Text.Parsec hiding (token) -import Text.Parsec.Pos (initialPos, newPos, updatePosString) - -import Control.Monad.Except +import Text.Parsec + ( between, + setSourceName, + Parsec, + Column, + Line, + incSourceLine, + incSourceColumn, + setSourceLine, + setSourceColumn, + sourceLine, + sourceColumn, + sourceName, + setSourceName, + setPosition, + getPosition, + updateState, + setState, + getState, + optionMaybe, + optional, + option, + endBy1, + endBy, + sepEndBy1, + sepEndBy, + sepBy1, + sepBy, + try, + choice, + (<?>), + (<|>), + manyTill, + many1, + many, + notFollowedBy, + lookAhead, + eof, + count, + skipMany1, + skipMany, + unexpected, + setInput, + getInput, + anyToken, + tokenPrim, + parse, + runParserT, + runParser, + ParseError, + ParsecT, + SourcePos, + Stream(..) ) +import Text.Parsec.Pos (initialPos, newPos) +import Control.Monad.Except ( MonadError(throwError) ) import Text.Pandoc.Error + ( PandocError(PandocParseError, PandocParsecError) ) type Parser t s = Parsec t s type ParserT = ParsecT + -- | Reader monad wrapping the parser state. This is used to possibly delay -- evaluation until all relevant information has been parsed and made available -- in the parser state. @@ -251,70 +334,48 @@ instance (Semigroup a, Monoid a) => Monoid (Future s a) where mappend = (<>) -- | Like @count@, but packs its result -countChar :: (Stream s m Char, Monad m) +countChar :: (Stream s m Char, UpdateSourcePos s Char, Monad m) => Int -> ParsecT s st m Char -> ParsecT s st m Text countChar n = fmap T.pack . count n -- | Like @string@, but uses @Text@. -textStr :: Stream s m Char => Text -> ParsecT s u m Text +textStr :: (Stream s m Char, UpdateSourcePos s Char) + => Text -> ParsecT s u m Text textStr t = string (T.unpack t) $> t --- | Parse characters while a predicate is true. -take1WhileP :: Monad m - => (Char -> Bool) - -> ParserT Text st m Text -take1WhileP f = do - -- needed to persuade parsec that this won't match an empty string: - c <- satisfy f - inp <- getInput - pos <- getPosition - let (t, rest) = T.span f inp - setInput rest - setPosition $ - if f '\t' || f '\n' - then updatePosString pos $ T.unpack t - else incSourceColumn pos (T.length t) - return $ T.singleton c <> t - --- Parse n characters of input (or the rest of the input if --- there aren't n characters). -takeP :: Monad m => Int -> ParserT Text st m Text -takeP n = do - guard (n > 0) - -- faster than 'count n anyChar' - inp <- getInput - pos <- getPosition - let (xs, rest) = T.splitAt n inp - -- needed to persuade parsec that this won't match an empty string: - anyChar - setInput rest - setPosition $ updatePosString pos $ T.unpack xs - return xs - --- | Parse any line of text -anyLine :: Monad m => ParserT Text st m Text + +-- | Parse any line of text, returning the contents without the +-- final newline. +anyLine :: Monad m => ParserT Sources st m Text anyLine = do -- This is much faster than: -- manyTill anyChar newline inp <- getInput - pos <- getPosition - case T.break (=='\n') inp of - (this, T.uncons -> Just ('\n', rest)) -> do - -- needed to persuade parsec that this won't match an empty string: - anyChar - setInput rest - setPosition $ incSourceLine (setSourceColumn pos 1) 1 - return this - _ -> mzero + case inp of + Sources [] -> mzero + Sources ((fp,t):inps) -> + -- we assume that lines don't span different input files + case T.break (=='\n') t of + (this, rest) + | T.null rest + , not (null inps) -> + -- line may span different input files, so do it + -- character by character + T.pack <$> manyTill anyChar newline + | otherwise -> do -- either end of inputs or newline in rest + setInput $ Sources ((fp, rest):inps) + char '\n' -- needed so parsec knows we won't match empty string + -- and so source pos is updated + return this -- | Parse any line, include the final newline in the output -anyLineNewline :: Monad m => ParserT Text st m Text +anyLineNewline :: Monad m => ParserT Sources st m Text anyLineNewline = (<> "\n") <$> anyLine -- | Parse indent by specified number of spaces (or equiv. tabs) -indentWith :: Stream s m Char +indentWith :: (Stream s m Char, UpdateSourcePos s Char) => HasReaderOptions st => Int -> ParserT s st m Text indentWith num = do @@ -399,11 +460,13 @@ notFollowedBy' p = try $ join $ do a <- try p return (return ()) -- (This version due to Andrew Pimlott on the Haskell mailing list.) -oneOfStrings' :: Stream s m Char => (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text +oneOfStrings' :: (Stream s m Char, UpdateSourcePos s Char) + => (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text oneOfStrings' f = fmap T.pack . oneOfStrings'' f . fmap T.unpack -- TODO: This should be re-implemented in a Text-aware way -oneOfStrings'' :: Stream s m Char => (Char -> Char -> Bool) -> [String] -> ParserT s st m String +oneOfStrings'' :: (Stream s m Char, UpdateSourcePos s Char) + => (Char -> Char -> Bool) -> [String] -> ParserT s st m String oneOfStrings'' _ [] = Prelude.fail "no strings" oneOfStrings'' matches strs = try $ do c <- anyChar @@ -418,14 +481,16 @@ oneOfStrings'' matches strs = try $ do -- | Parses one of a list of strings. If the list contains -- two strings one of which is a prefix of the other, the longer -- string will be matched if possible. -oneOfStrings :: Stream s m Char => [Text] -> ParserT s st m Text +oneOfStrings :: (Stream s m Char, UpdateSourcePos s Char) + => [Text] -> ParserT s st m Text oneOfStrings = oneOfStrings' (==) -- | Parses one of a list of strings (tried in order), case insensitive. -- TODO: This will not be accurate with general Unicode (neither -- Text.toLower nor Text.toCaseFold can be implemented with a map) -oneOfStringsCI :: Stream s m Char => [Text] -> ParserT s st m Text +oneOfStringsCI :: (Stream s m Char, UpdateSourcePos s Char) + => [Text] -> ParserT s st m Text oneOfStringsCI = oneOfStrings' ciMatch where ciMatch x y = toLower' x == toLower' y -- this optimizes toLower by checking common ASCII case @@ -436,11 +501,13 @@ oneOfStringsCI = oneOfStrings' ciMatch | otherwise = toLower c -- | Parses a space or tab. -spaceChar :: Stream s m Char => ParserT s st m Char +spaceChar :: (Stream s m Char, UpdateSourcePos s Char) + => ParserT s st m Char spaceChar = satisfy $ \c -> c == ' ' || c == '\t' -- | Parses a nonspace, nonnewline character. -nonspaceChar :: Stream s m Char => ParserT s st m Char +nonspaceChar :: (Stream s m Char, UpdateSourcePos s Char) + => ParserT s st m Char nonspaceChar = satisfy (not . isSpaceChar) isSpaceChar :: Char -> Bool @@ -451,21 +518,24 @@ isSpaceChar '\r' = True isSpaceChar _ = False -- | Skips zero or more spaces or tabs. -skipSpaces :: Stream s m Char => ParserT s st m () +skipSpaces :: (Stream s m Char, UpdateSourcePos s Char) + => ParserT s st m () skipSpaces = skipMany spaceChar -- | Skips zero or more spaces or tabs, then reads a newline. -blankline :: Stream s m Char => ParserT s st m Char +blankline :: (Stream s m Char, UpdateSourcePos s Char) + => ParserT s st m Char blankline = try $ skipSpaces >> newline -- | Parses one or more blank lines and returns a string of newlines. -blanklines :: Stream s m Char => ParserT s st m Text +blanklines :: (Stream s m Char, UpdateSourcePos s Char) + => ParserT s st m Text blanklines = T.pack <$> many1 blankline -- | Gobble n spaces; if tabs are encountered, expand them -- and gobble some or all of their spaces, leaving the rest. gobbleSpaces :: (HasReaderOptions st, Monad m) - => Int -> ParserT Text st m () + => Int -> ParserT Sources st m () gobbleSpaces 0 = return () gobbleSpaces n | n < 0 = error "gobbleSpaces called with negative number" @@ -473,18 +543,26 @@ gobbleSpaces n char ' ' <|> eatOneSpaceOfTab gobbleSpaces (n - 1) -eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Text st m Char +eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Sources st m Char eatOneSpaceOfTab = do - char '\t' + lookAhead (char '\t') + pos <- getPosition tabstop <- getOption readerTabStop + -- replace the tab on the input stream with spaces + let numSpaces = tabstop - ((sourceColumn pos - 1) `mod` tabstop) inp <- getInput - setInput $ T.replicate (tabstop - 1) " " <> inp - return ' ' + setInput $ + case inp of + Sources [] -> error "eatOneSpaceOfTab - empty Sources list" + Sources ((fp,t):rest) -> + -- drop the tab and add spaces + Sources ((fp, T.replicate numSpaces " " <> T.drop 1 t):rest) + char ' ' -- | Gobble up to n spaces; if tabs are encountered, expand them -- and gobble some or all of their spaces, leaving the rest. gobbleAtMostSpaces :: (HasReaderOptions st, Monad m) - => Int -> ParserT Text st m Int + => Int -> ParserT Sources st m Int gobbleAtMostSpaces 0 = return 0 gobbleAtMostSpaces n | n < 0 = error "gobbleAtMostSpaces called with negative number" @@ -493,7 +571,8 @@ gobbleAtMostSpaces n (+ 1) <$> gobbleAtMostSpaces (n - 1) -- | Parses material enclosed between start and end parsers. -enclosed :: (Show end, Stream s m Char) => ParserT s st m t -- ^ start parser +enclosed :: (Show end, Stream s m Char, UpdateSourcePos s Char) + => ParserT s st m t -- ^ start parser -> ParserT s st m end -- ^ end parser -> ParserT s st m a -- ^ content parser (to be used repeatedly) -> ParserT s st m [a] @@ -501,39 +580,41 @@ enclosed start end parser = try $ start >> notFollowedBy space >> many1Till parser end -- | Parse string, case insensitive. -stringAnyCase :: Stream s m Char => Text -> ParserT s st m Text +stringAnyCase :: (Stream s m Char, UpdateSourcePos s Char) + => Text -> ParserT s st m Text stringAnyCase = fmap T.pack . stringAnyCase' . T.unpack -stringAnyCase' :: Stream s m Char => String -> ParserT s st m String +stringAnyCase' :: (Stream s m Char, UpdateSourcePos s Char) + => String -> ParserT s st m String stringAnyCase' [] = string "" stringAnyCase' (x:xs) = do firstChar <- char (toUpper x) <|> char (toLower x) rest <- stringAnyCase' xs return (firstChar:rest) +-- TODO rewrite by just adding to Sources stream? -- | Parse contents of 'str' using 'parser' and return result. -parseFromString :: (Stream s m Char, IsString s) - => ParserT s st m r +parseFromString :: Monad m + => ParserT Sources st m r -> Text - -> ParserT s st m r + -> ParserT Sources st m r parseFromString parser str = do oldPos <- getPosition - setPosition $ initialPos " chunk" + setPosition $ initialPos "chunk" oldInput <- getInput - setInput $ fromString $ T.unpack str + setInput $ toSources str result <- parser spaces - eof setInput oldInput setPosition oldPos return result -- | Like 'parseFromString' but specialized for 'ParserState'. -- This resets 'stateLastStrPos', which is almost always what we want. -parseFromString' :: (Stream s m Char, IsString s, HasLastStrPosition u) - => ParserT s u m a +parseFromString' :: (Monad m, HasLastStrPosition u) + => ParserT Sources u m a -> Text - -> ParserT s u m a + -> ParserT Sources u m a parseFromString' parser str = do oldLastStrPos <- getLastStrPos <$> getState updateState $ setLastStrPos Nothing @@ -542,7 +623,7 @@ parseFromString' parser str = do return res -- | Parse raw line block up to and including blank lines. -lineClump :: Monad m => ParserT Text st m Text +lineClump :: Monad m => ParserT Sources st m Text lineClump = blanklines <|> (T.unlines <$> many1 (notFollowedBy blankline >> anyLine)) @@ -551,7 +632,7 @@ lineClump = blanklines -- pairs of open and close, which must be different. For example, -- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))" -- and return "hello (there)". -charsInBalanced :: Stream s m Char => Char -> Char -> ParserT s st m Char +charsInBalanced :: (Stream s m Char, UpdateSourcePos s Char) => Char -> Char -> ParserT s st m Char -> ParserT s st m Text charsInBalanced open close parser = try $ do char open @@ -570,7 +651,7 @@ charsInBalanced open close parser = try $ do -- Auxiliary functions for romanNumeral: -- | Parses a roman numeral (uppercase or lowercase), returns number. -romanNumeral :: Stream s m Char => Bool -- ^ Uppercase if true +romanNumeral :: (Stream s m Char, UpdateSourcePos s Char) => Bool -- ^ Uppercase if true -> ParserT s st m Int romanNumeral upperCase = do let rchar uc = char $ if upperCase then uc else toLower uc @@ -606,7 +687,7 @@ romanNumeral upperCase = do -- | Parses an email address; returns original and corresponding -- escaped mailto: URI. -emailAddress :: Stream s m Char => ParserT s st m (Text, Text) +emailAddress :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (Text, Text) emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain) where toResult mbox dom = let full = fromEntities $ T.pack $ mbox ++ '@':dom in (full, escapeURI $ "mailto:" <> full) @@ -630,11 +711,11 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain) isEmailPunct c = T.any (== c) "!\"#$%&'*+-/=?^_{|}~;" -uriScheme :: Stream s m Char => ParserT s st m Text +uriScheme :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Text uriScheme = oneOfStringsCI (Set.toList schemes) -- | Parses a URI. Returns pair of original and URI-escaped version. -uri :: Stream s m Char => ParserT s st m (Text, Text) +uri :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (Text, Text) uri = try $ do scheme <- uriScheme char ':' @@ -677,7 +758,7 @@ uri = try $ do uriChunkBetween l r = try $ do chunk <- between (char l) (char r) uriChunk return (T.pack $ [l] ++ chunk ++ [r]) -mathInlineWith :: Stream s m Char => Text -> Text -> ParserT s st m Text +mathInlineWith :: (Stream s m Char, UpdateSourcePos s Char) => Text -> Text -> ParserT s st m Text mathInlineWith op cl = try $ do textStr op when (op == "$") $ notFollowedBy space @@ -698,10 +779,10 @@ mathInlineWith op cl = try $ do notFollowedBy digit -- to prevent capture of $5 return $ trimMath $ T.concat words' where - inBalancedBraces :: Stream s m Char => Int -> Text -> ParserT s st m Text + inBalancedBraces :: (Stream s m Char, UpdateSourcePos s Char) => Int -> Text -> ParserT s st m Text inBalancedBraces n = fmap T.pack . inBalancedBraces' n . T.unpack - inBalancedBraces' :: Stream s m Char => Int -> String -> ParserT s st m String + inBalancedBraces' :: (Stream s m Char, UpdateSourcePos s Char) => Int -> String -> ParserT s st m String inBalancedBraces' 0 "" = do c <- anyChar if c == '{' @@ -718,13 +799,13 @@ mathInlineWith op cl = try $ do '{' -> inBalancedBraces' (numOpen + 1) (c:xs) _ -> inBalancedBraces' numOpen (c:xs) -mathDisplayWith :: Stream s m Char => Text -> Text -> ParserT s st m Text +mathDisplayWith :: (Stream s m Char, UpdateSourcePos s Char) => Text -> Text -> ParserT s st m Text mathDisplayWith op cl = try $ fmap T.pack $ do textStr op many1Till (satisfy (/= '\n') <|> (newline <* notFollowedBy' blankline)) (try $ textStr cl) -mathDisplay :: (HasReaderOptions st, Stream s m Char) +mathDisplay :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Text mathDisplay = (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$") @@ -733,7 +814,7 @@ mathDisplay = <|> (guardEnabled Ext_tex_math_double_backslash >> mathDisplayWith "\\\\[" "\\\\]") -mathInline :: (HasReaderOptions st , Stream s m Char) +mathInline :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Text mathInline = (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$") @@ -746,7 +827,7 @@ mathInline = -- displacement (the difference between the source column at the end -- and the source column at the beginning). Vertical displacement -- (source row) is ignored. -withHorizDisplacement :: Stream s m Char +withHorizDisplacement :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m a -- ^ Parser to apply -> ParserT s st m (a, Int) -- ^ (result, displacement) withHorizDisplacement parser = do @@ -758,30 +839,37 @@ withHorizDisplacement parser = do -- | Applies a parser and returns the raw string that was parsed, -- along with the value produced by the parser. withRaw :: Monad m - => ParsecT Text st m a - -> ParsecT Text st m (a, Text) + => ParsecT Sources st m a + -> ParsecT Sources st m (a, Text) withRaw parser = do - pos1 <- getPosition - inp <- getInput + inps1 <- getInput result <- parser - pos2 <- getPosition - let (l1,c1) = (sourceLine pos1, sourceColumn pos1) - let (l2,c2) = (sourceLine pos2, sourceColumn pos2) - let inplines = take ((l2 - l1) + 1) $ T.lines inp - let raw = case inplines of - [] -> "" - [l] -> T.take (c2 - c1) l - ls -> T.unlines (init ls) <> T.take (c2 - 1) (last ls) - return (result, raw) + inps2 <- getInput + -- 'raw' is the difference between inps1 and inps2 + return (result, sourcesDifference inps1 inps2) + +sourcesDifference :: Sources -> Sources -> Text +sourcesDifference (Sources is1) (Sources is2) = go is1 is2 + where + go inps1 inps2 = + case (inps1, inps2) of + ([], _) -> mempty + (_, []) -> mconcat $ map snd inps1 + ((p1,t1):rest1, (p2, t2):rest2) + | p1 == p2 + , t1 == t2 -> go rest1 rest2 + | p1 == p2 + , t1 /= t2 -> fromMaybe mempty $ T.stripSuffix t2 t1 + | otherwise -> t1 <> go rest1 inps2 -- | Parses backslash, then applies character parser. -escaped :: Stream s m Char +escaped :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char -- ^ Parser for character to escape -> ParserT s st m Char escaped parser = try $ char '\\' >> parser -- | Parse character entity. -characterReference :: Stream s m Char => ParserT s st m Char +characterReference :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char characterReference = try $ do char '&' ent <- many1Till nonspaceChar (char ';') @@ -794,19 +882,19 @@ characterReference = try $ do _ -> Prelude.fail "entity not found" -- | Parses an uppercase roman numeral and returns (UpperRoman, number). -upperRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) +upperRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) upperRoman = do num <- romanNumeral True return (UpperRoman, num) -- | Parses a lowercase roman numeral and returns (LowerRoman, number). -lowerRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) +lowerRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) lowerRoman = do num <- romanNumeral False return (LowerRoman, num) -- | Parses a decimal numeral and returns (Decimal, number). -decimal :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) +decimal :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) decimal = do num <- many1 digit return (Decimal, fromMaybe 1 $ safeRead $ T.pack num) @@ -815,7 +903,7 @@ decimal = do -- returns (DefaultStyle, [next example number]). The next -- example number is incremented in parser state, and the label -- (if present) is added to the label table. -exampleNum :: Stream s m Char +exampleNum :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s ParserState m (ListNumberStyle, Int) exampleNum = do char '@' @@ -834,37 +922,37 @@ exampleNum = do return (Example, num) -- | Parses a '#' returns (DefaultStyle, 1). -defaultNum :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) +defaultNum :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) defaultNum = do char '#' return (DefaultStyle, 1) -- | Parses a lowercase letter and returns (LowerAlpha, number). -lowerAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) +lowerAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) lowerAlpha = do ch <- satisfy isAsciiLower return (LowerAlpha, ord ch - ord 'a' + 1) -- | Parses an uppercase letter and returns (UpperAlpha, number). -upperAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) +upperAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) upperAlpha = do ch <- satisfy isAsciiUpper return (UpperAlpha, ord ch - ord 'A' + 1) -- | Parses a roman numeral i or I -romanOne :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) +romanOne :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) romanOne = (char 'i' >> return (LowerRoman, 1)) <|> (char 'I' >> return (UpperRoman, 1)) -- | Parses an ordered list marker and returns list attributes. -anyOrderedListMarker :: Stream s m Char => ParserT s ParserState m ListAttributes +anyOrderedListMarker :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s ParserState m ListAttributes anyOrderedListMarker = choice [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens], numParser <- [decimal, exampleNum, defaultNum, romanOne, lowerAlpha, lowerRoman, upperAlpha, upperRoman]] -- | Parses a list number (num) followed by a period, returns list attributes. -inPeriod :: Stream s m Char +inPeriod :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) -> ParserT s st m ListAttributes inPeriod num = try $ do @@ -876,7 +964,7 @@ inPeriod num = try $ do return (start, style, delim) -- | Parses a list number (num) followed by a paren, returns list attributes. -inOneParen :: Stream s m Char +inOneParen :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) -> ParserT s st m ListAttributes inOneParen num = try $ do @@ -885,7 +973,7 @@ inOneParen num = try $ do return (start, style, OneParen) -- | Parses a list number (num) enclosed in parens, returns list attributes. -inTwoParens :: Stream s m Char +inTwoParens :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) -> ParserT s st m ListAttributes inTwoParens num = try $ do @@ -896,7 +984,7 @@ inTwoParens num = try $ do -- | Parses an ordered list marker with a given style and delimiter, -- returns number. -orderedListMarker :: Stream s m Char +orderedListMarker :: (Stream s m Char, UpdateSourcePos s Char) => ListNumberStyle -> ListNumberDelim -> ParserT s ParserState m Int @@ -919,10 +1007,10 @@ orderedListMarker style delim = do return start -- | Parses a character reference and returns a Str element. -charRef :: Stream s m Char => ParserT s st m Inline +charRef :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inline charRef = Str . T.singleton <$> characterReference -lineBlockLine :: Monad m => ParserT Text st m Text +lineBlockLine :: Monad m => ParserT Sources st m Text lineBlockLine = try $ do char '|' char ' ' @@ -932,11 +1020,11 @@ lineBlockLine = try $ do continuations <- many (try $ char ' ' >> anyLine) return $ white <> T.unwords (line : continuations) -blankLineBlockLine :: Stream s m Char => ParserT s st m Char +blankLineBlockLine :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char blankLineBlockLine = try (char '|' >> blankline) -- | Parses an RST-style line block and returns a list of strings. -lineBlockLines :: Monad m => ParserT Text st m [Text] +lineBlockLines :: Monad m => ParserT Sources st m [Text] lineBlockLines = try $ do lines' <- many1 (lineBlockLine <|> (T.singleton <$> blankLineBlockLine)) skipMany blankline @@ -944,7 +1032,8 @@ lineBlockLines = try $ do -- | Parse a table using 'headerParser', 'rowParser', -- 'lineParser', and 'footerParser'. -tableWith :: (Stream s m Char, HasReaderOptions st, Monad mf) +tableWith :: (Stream s m Char, UpdateSourcePos s Char, + HasReaderOptions st, Monad mf) => ParserT s st m (mf [Blocks], [Alignment], [Int]) -> ([Int] -> ParserT s st m (mf [Blocks])) -> ParserT s st m sep @@ -964,7 +1053,8 @@ tableWith headerParser rowParser lineParser footerParser = try $ do type TableComponents mf = ([Alignment], [Double], mf [Row], mf [Row]) -tableWith' :: (Stream s m Char, HasReaderOptions st, Monad mf) +tableWith' :: (Stream s m Char, UpdateSourcePos s Char, + HasReaderOptions st, Monad mf) => ParserT s st m (mf [Blocks], [Alignment], [Int]) -> ([Int] -> ParserT s st m (mf [Blocks])) -> ParserT s st m sep @@ -1013,20 +1103,19 @@ widthsFromIndices numColumns' indices = -- (which may be grid), then the rows, -- which may be grid, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). -gridTableWith :: (Stream s m Char, HasReaderOptions st, HasLastStrPosition st, - Monad mf, IsString s) - => ParserT s st m (mf Blocks) -- ^ Block list parser +gridTableWith :: (Monad m, HasReaderOptions st, HasLastStrPosition st, Monad mf) + => ParserT Sources st m (mf Blocks) -- ^ Block list parser -> Bool -- ^ Headerless table - -> ParserT s st m (mf Blocks) + -> ParserT Sources st m (mf Blocks) gridTableWith blocks headless = tableWith (gridTableHeader headless blocks) (gridTableRow blocks) (gridTableSep '-') gridTableFooter -gridTableWith' :: (Stream s m Char, HasReaderOptions st, HasLastStrPosition st, - Monad mf, IsString s) - => ParserT s st m (mf Blocks) -- ^ Block list parser +gridTableWith' :: (Monad m, HasReaderOptions st, HasLastStrPosition st, + Monad mf) + => ParserT Sources st m (mf Blocks) -- ^ Block list parser -> Bool -- ^ Headerless table - -> ParserT s st m (TableComponents mf) + -> ParserT Sources st m (TableComponents mf) gridTableWith' blocks headless = tableWith' (gridTableHeader headless blocks) (gridTableRow blocks) (gridTableSep '-') gridTableFooter @@ -1035,7 +1124,7 @@ gridTableSplitLine :: [Int] -> Text -> [Text] gridTableSplitLine indices line = map removeFinalBar $ tail $ splitTextByIndices (init indices) $ trimr line -gridPart :: Stream s m Char => Char -> ParserT s st m ((Int, Int), Alignment) +gridPart :: Monad m => Char -> ParserT Sources st m ((Int, Int), Alignment) gridPart ch = do leftColon <- option False (True <$ char ':') dashes <- many1 (char ch) @@ -1050,7 +1139,7 @@ gridPart ch = do (False, False) -> AlignDefault return ((lengthDashes, lengthDashes + 1), alignment) -gridDashedLines :: Stream s m Char => Char -> ParserT s st m [((Int, Int), Alignment)] +gridDashedLines :: Monad m => Char -> ParserT Sources st m [((Int, Int), Alignment)] gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline removeFinalBar :: Text -> Text @@ -1059,14 +1148,14 @@ removeFinalBar = T.dropWhileEnd go . T.dropWhileEnd (=='|') go c = T.any (== c) " \t" -- | Separator between rows of grid table. -gridTableSep :: Stream s m Char => Char -> ParserT s st m Char +gridTableSep :: Monad m => Char -> ParserT Sources st m Char gridTableSep ch = try $ gridDashedLines ch >> return '\n' -- | Parse header for a grid table. -gridTableHeader :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st) +gridTableHeader :: (Monad m, Monad mf, HasLastStrPosition st) => Bool -- ^ Headerless table - -> ParserT s st m (mf Blocks) - -> ParserT s st m (mf [Blocks], [Alignment], [Int]) + -> ParserT Sources st m (mf Blocks) + -> ParserT Sources st m (mf [Blocks], [Alignment], [Int]) gridTableHeader True _ = do optional blanklines dashes <- gridDashedLines '-' @@ -1089,17 +1178,17 @@ gridTableHeader False blocks = try $ do heads <- sequence <$> mapM (parseFromString' blocks . trim) rawHeads return (heads, aligns, indices) -gridTableRawLine :: Stream s m Char => [Int] -> ParserT s st m [Text] +gridTableRawLine :: (Stream s m Char, UpdateSourcePos s Char) => [Int] -> ParserT s st m [Text] gridTableRawLine indices = do char '|' line <- many1Till anyChar newline return (gridTableSplitLine indices $ T.pack line) -- | Parse row of grid table. -gridTableRow :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st) - => ParserT s st m (mf Blocks) +gridTableRow :: (Monad m, Monad mf, HasLastStrPosition st) + => ParserT Sources st m (mf Blocks) -> [Int] - -> ParserT s st m (mf [Blocks]) + -> ParserT Sources st m (mf [Blocks]) gridTableRow blocks indices = do colLines <- many1 (gridTableRawLine indices) let cols = map ((<> "\n") . T.unlines . removeOneLeadingSpace) $ @@ -1120,34 +1209,38 @@ removeOneLeadingSpace xs = Just (c, _) -> c == ' ' -- | Parse footer for a grid table. -gridTableFooter :: Stream s m Char => ParserT s st m () +gridTableFooter :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m () gridTableFooter = optional blanklines --- -- | Removes the ParsecT layer from the monad transformer stack -readWithM :: Monad m - => ParserT Text st m a -- ^ parser - -> st -- ^ initial state - -> Text -- ^ input +readWithM :: (Monad m, ToSources t) + => ParserT Sources st m a -- ^ parser + -> st -- ^ initial state + -> t -- ^ input -> m (Either PandocError a) readWithM parser state input = - mapLeft (PandocParsecError input) <$> runParserT parser state "source" input + mapLeft (PandocParsecError sources) + <$> runParserT parser state (initialSourceName sources) sources + where + sources = toSources input -- | Parse a string with a given parser and state -readWith :: Parser Text st a +readWith :: ToSources t + => Parser Sources st a -> st - -> Text + -> t -> Either PandocError a readWith p t inp = runIdentity $ readWithM p t inp -- | Parse a string with @parser@ (for testing). testStringWith :: Show a - => ParserT Text ParserState Identity a + => ParserT Sources ParserState Identity a -> Text -> IO () testStringWith parser str = UTF8.putStrLn $ tshow $ - readWith parser defaultParserState str + readWith parser defaultParserState (toSources str) -- | Parsing options. data ParserState = ParserState @@ -1394,19 +1487,23 @@ registerHeader (ident,classes,kvs) header' = do updateState $ updateIdentifierList $ Set.insert ident return (ident,classes,kvs) -smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char) +smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, + HasQuoteContext st m, + Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines -> ParserT s st m Inlines smartPunctuation inlineParser = do guardEnabled Ext_smart choice [ quoted inlineParser, apostrophe, doubleCloseQuote, dash, ellipses ] -quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char) +quoted :: (HasLastStrPosition st, HasQuoteContext st m, + Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines -> ParserT s st m Inlines quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser -singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char) +singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, + Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines -> ParserT s st m Inlines singleQuoted inlineParser = do @@ -1416,7 +1513,8 @@ singleQuoted inlineParser = do (withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd))) <|> pure "\8217" -doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st, Stream s m Char) +doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st, + Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines -> ParserT s st m Inlines doubleQuoted inlineParser = do @@ -1433,13 +1531,14 @@ failIfInQuoteContext context = do context' <- getQuoteContext when (context' == context) $ Prelude.fail "already inside quotes" -charOrRef :: Stream s m Char => [Char] -> ParserT s st m Char +charOrRef :: (Stream s m Char, UpdateSourcePos s Char) => [Char] -> ParserT s st m Char charOrRef cs = oneOf cs <|> try (do c <- characterReference guard (c `elem` cs) return c) -singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char) +singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, + Stream s m Char, UpdateSourcePos s Char) => ParserT s st m () singleQuoteStart = do failIfInQuoteContext InSingleQuote @@ -1449,7 +1548,7 @@ singleQuoteStart = do charOrRef "'\8216\145" void $ lookAhead (satisfy (not . isSpaceChar)) -singleQuoteEnd :: Stream s m Char +singleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m () singleQuoteEnd = try $ do charOrRef "'\8217\146" @@ -1457,7 +1556,7 @@ singleQuoteEnd = try $ do doubleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, - Stream s m Char) + Stream s m Char, UpdateSourcePos s Char) => ParserT s st m () doubleQuoteStart = do failIfInQuoteContext InDoubleQuote @@ -1465,21 +1564,21 @@ doubleQuoteStart = do try $ do charOrRef "\"\8220\147" void $ lookAhead (satisfy (not . isSpaceChar)) -doubleQuoteEnd :: Stream s m Char +doubleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m () doubleQuoteEnd = void (charOrRef "\"\8221\148") -apostrophe :: Stream s m Char => ParserT s st m Inlines +apostrophe :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\8217") -doubleCloseQuote :: Stream s m Char => ParserT s st m Inlines +doubleCloseQuote :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines doubleCloseQuote = B.str "\8221" <$ char '"' -ellipses :: Stream s m Char +ellipses :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines ellipses = try (string "..." >> return (B.str "\8230")) -dash :: (HasReaderOptions st, Stream s m Char) +dash :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines dash = try $ do oldDashes <- extensionEnabled Ext_old_dashes <$> getOption readerExtensions @@ -1506,7 +1605,7 @@ nested p = do updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } return res -citeKey :: (Stream s m Char, HasLastStrPosition st) +citeKey :: (Stream s m Char, UpdateSourcePos s Char, HasLastStrPosition st) => ParserT s st m (Bool, Text) citeKey = try $ do guard =<< notAfterString @@ -1575,10 +1674,11 @@ insertIncludedFile :: (PandocMonad m, HasIncludeFiles st) insertIncludedFile blocks totoks dirs f = runIdentity <$> insertIncludedFile' (Identity <$> blocks) totoks dirs f +-- TODO: replace this with something using addToSources. -- | Parse content of include file as future blocks. Circular includes result in -- an @PandocParseError@. insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st) - => ParserT Text st m (Future st Blocks) + => ParserT Sources st m (Future st Blocks) -> [FilePath] -> FilePath - -> ParserT Text st m (Future st Blocks) -insertIncludedFileF p = insertIncludedFile' p id + -> ParserT Sources st m (Future st Blocks) +insertIncludedFileF p = insertIncludedFile' p (\t -> Sources [(initialPos "",t)]) diff --git a/src/Text/Pandoc/Readers.hs b/src/Text/Pandoc/Readers.hs index 7ae9db34f..5106f8058 100644 --- a/src/Text/Pandoc/Readers.hs +++ b/src/Text/Pandoc/Readers.hs @@ -1,4 +1,5 @@ {-# LANGUAGE MonoLocalBinds #-} +{-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE OverloadedStrings #-} {- | @@ -72,6 +73,7 @@ import Text.Pandoc.Error import Text.Pandoc.Extensions import Text.Pandoc.Options import Text.Pandoc.Readers.CommonMark +import Text.Pandoc.Readers.Markdown import Text.Pandoc.Readers.Creole import Text.Pandoc.Readers.DocBook import Text.Pandoc.Readers.Docx @@ -84,7 +86,6 @@ import Text.Pandoc.Readers.HTML (readHtml) import Text.Pandoc.Readers.JATS (readJATS) import Text.Pandoc.Readers.Jira (readJira) import Text.Pandoc.Readers.LaTeX -import Text.Pandoc.Readers.Markdown import Text.Pandoc.Readers.MediaWiki import Text.Pandoc.Readers.Muse import Text.Pandoc.Readers.Native @@ -102,50 +103,52 @@ import Text.Pandoc.Readers.CSV import Text.Pandoc.Readers.CslJson import Text.Pandoc.Readers.BibTeX import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Pandoc.Sources (ToSources(..), sourcesToText) -data Reader m = TextReader (ReaderOptions -> Text -> m Pandoc) +data Reader m = TextReader (forall a . ToSources a => + ReaderOptions -> a -> m Pandoc) | ByteStringReader (ReaderOptions -> BL.ByteString -> m Pandoc) -- | Association list of formats and readers. readers :: PandocMonad m => [(Text, Reader m)] -readers = [ ("native" , TextReader readNative) - ,("json" , TextReader readJSON) - ,("markdown" , TextReader readMarkdown) - ,("markdown_strict" , TextReader readMarkdown) - ,("markdown_phpextra" , TextReader readMarkdown) - ,("markdown_github" , TextReader readMarkdown) - ,("markdown_mmd", TextReader readMarkdown) - ,("commonmark" , TextReader readCommonMark) - ,("commonmark_x" , TextReader readCommonMark) - ,("creole" , TextReader readCreole) - ,("dokuwiki" , TextReader readDokuWiki) - ,("gfm" , TextReader readCommonMark) - ,("rst" , TextReader readRST) - ,("mediawiki" , TextReader readMediaWiki) - ,("vimwiki" , TextReader readVimwiki) - ,("docbook" , TextReader readDocBook) - ,("opml" , TextReader readOPML) - ,("org" , TextReader readOrg) - ,("textile" , TextReader readTextile) -- TODO : textile+lhs - ,("html" , TextReader readHtml) - ,("jats" , TextReader readJATS) - ,("jira" , TextReader readJira) - ,("latex" , TextReader readLaTeX) - ,("haddock" , TextReader readHaddock) - ,("twiki" , TextReader readTWiki) - ,("tikiwiki" , TextReader readTikiWiki) - ,("docx" , ByteStringReader readDocx) - ,("odt" , ByteStringReader readOdt) - ,("t2t" , TextReader readTxt2Tags) - ,("epub" , ByteStringReader readEPUB) - ,("muse" , TextReader readMuse) - ,("man" , TextReader readMan) - ,("fb2" , TextReader readFB2) - ,("ipynb" , TextReader readIpynb) - ,("csv" , TextReader readCSV) - ,("csljson" , TextReader readCslJson) - ,("bibtex" , TextReader readBibTeX) - ,("biblatex" , TextReader readBibLaTeX) +readers = [("native" , TextReader readNative) + ,("json" , TextReader readJSON) + ,("markdown" , TextReader readMarkdown) + ,("markdown_strict" , TextReader readMarkdown) + ,("markdown_phpextra" , TextReader readMarkdown) + ,("markdown_github" , TextReader readMarkdown) + ,("markdown_mmd", TextReader readMarkdown) + ,("commonmark" , TextReader readCommonMark) + ,("commonmark_x" , TextReader readCommonMark) + ,("creole" , TextReader readCreole) + ,("dokuwiki" , TextReader readDokuWiki) + ,("gfm" , TextReader readCommonMark) + ,("rst" , TextReader readRST) + ,("mediawiki" , TextReader readMediaWiki) + ,("vimwiki" , TextReader readVimwiki) + ,("docbook" , TextReader readDocBook) + ,("opml" , TextReader readOPML) + ,("org" , TextReader readOrg) + ,("textile" , TextReader readTextile) -- TODO : textile+lhs + ,("html" , TextReader readHtml) + ,("jats" , TextReader readJATS) + ,("jira" , TextReader readJira) + ,("latex" , TextReader readLaTeX) + ,("haddock" , TextReader readHaddock) + ,("twiki" , TextReader readTWiki) + ,("tikiwiki" , TextReader readTikiWiki) + ,("docx" , ByteStringReader readDocx) + ,("odt" , ByteStringReader readOdt) + ,("t2t" , TextReader readTxt2Tags) + ,("epub" , ByteStringReader readEPUB) + ,("muse" , TextReader readMuse) + ,("man" , TextReader readMan) + ,("fb2" , TextReader readFB2) + ,("ipynb" , TextReader readIpynb) + ,("csv" , TextReader readCSV) + ,("csljson" , TextReader readCslJson) + ,("bibtex" , TextReader readBibTeX) + ,("biblatex" , TextReader readBibLaTeX) ] -- | Retrieve reader, extensions based on formatSpec (format+extensions). @@ -173,9 +176,13 @@ getReader s = return (r, exts) -- | Read pandoc document from JSON format. -readJSON :: PandocMonad m - => ReaderOptions -> Text -> m Pandoc -readJSON _ t = - case eitherDecode' . BL.fromStrict . UTF8.fromText $ t of +readJSON :: (PandocMonad m, ToSources a) + => ReaderOptions + -> a + -> m Pandoc +readJSON _ s = + case eitherDecode' . BL.fromStrict . UTF8.fromText + . sourcesToText . toSources $ s of Right doc -> return doc - Left e -> throwError $ PandocParseError ("JSON parse error: " <> T.pack e) + Left e -> throwError $ PandocParseError ("JSON parse error: " + <> T.pack e) diff --git a/src/Text/Pandoc/Readers/BibTeX.hs b/src/Text/Pandoc/Readers/BibTeX.hs index b82a81350..318afda85 100644 --- a/src/Text/Pandoc/Readers/BibTeX.hs +++ b/src/Text/Pandoc/Readers/BibTeX.hs @@ -23,30 +23,33 @@ where import Text.Pandoc.Options import Text.Pandoc.Definition import Text.Pandoc.Builder (setMeta, cite, str) -import Data.Text (Text) import Citeproc (Lang(..), parseLang) import Citeproc.Locale (getLocale) import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Class (PandocMonad, lookupEnv) import Text.Pandoc.Citeproc.BibTeX as BibTeX import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue) +import Text.Pandoc.Sources (ToSources(..)) import Control.Monad.Except (throwError) -- | Read BibTeX from an input string and return a Pandoc document. -- The document will have only metadata, with an empty body. -- The metadata will contain a `references` field with the -- bibliography entries, and a `nocite` field with the wildcard `[@*]`. -readBibTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readBibTeX :: (PandocMonad m, ToSources a) + => ReaderOptions -> a -> m Pandoc readBibTeX = readBibTeX' BibTeX.Bibtex -- | Read BibLaTeX from an input string and return a Pandoc document. -- The document will have only metadata, with an empty body. -- The metadata will contain a `references` field with the -- bibliography entries, and a `nocite` field with the wildcard `[@*]`. -readBibLaTeX :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readBibLaTeX :: (PandocMonad m, ToSources a) + => ReaderOptions -> a -> m Pandoc readBibLaTeX = readBibTeX' BibTeX.Biblatex -readBibTeX' :: PandocMonad m => Variant -> ReaderOptions -> Text -> m Pandoc +readBibTeX' :: (PandocMonad m, ToSources a) + => Variant -> ReaderOptions -> a -> m Pandoc readBibTeX' variant _opts t = do mblangEnv <- lookupEnv "LANG" let defaultLang = Lang "en" Nothing (Just "US") [] [] [] @@ -60,7 +63,7 @@ readBibTeX' variant _opts t = do Left _ -> throwError $ PandocCiteprocError e Right l -> return l case BibTeX.readBibtexString variant locale (const True) t of - Left e -> throwError $ PandocParsecError t e + Left e -> throwError $ PandocParsecError (toSources t) e Right refs -> return $ setMeta "references" (map referenceToMetaValue refs) . setMeta "nocite" diff --git a/src/Text/Pandoc/Readers/CSV.hs b/src/Text/Pandoc/Readers/CSV.hs index 2958d6180..eca8f9425 100644 --- a/src/Text/Pandoc/Readers/CSV.hs +++ b/src/Text/Pandoc/Readers/CSV.hs @@ -13,23 +13,23 @@ Conversion from CSV to a 'Pandoc' table. -} module Text.Pandoc.Readers.CSV ( readCSV ) where -import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.CSV (parseCSV, defaultCSVOptions) import Text.Pandoc.Definition import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad) -import Text.Pandoc.Shared (crFilter) import Text.Pandoc.Error +import Text.Pandoc.Sources (ToSources(..), sourcesToText) import Text.Pandoc.Options (ReaderOptions) import Control.Monad.Except (throwError) -readCSV :: PandocMonad m +readCSV :: (PandocMonad m, ToSources a) => ReaderOptions -- ^ Reader options - -> Text -- ^ Text to parse (assuming @'\n'@ line endings) + -> a -> m Pandoc -readCSV _opts s = - case parseCSV defaultCSVOptions (crFilter s) of +readCSV _opts s = do + let txt = sourcesToText $ toSources s + case parseCSV defaultCSVOptions txt of Right (r:rs) -> return $ B.doc $ B.table capt (zip aligns widths) (TableHead nullAttr hdrs) @@ -45,4 +45,4 @@ readCSV _opts s = aligns = replicate numcols AlignDefault widths = replicate numcols ColWidthDefault Right [] -> return $ B.doc mempty - Left e -> throwError $ PandocParsecError s e + Left e -> throwError $ PandocParsecError (toSources [("",txt)]) e diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index 244f77940..b099a9b50 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -30,45 +30,55 @@ import Text.Pandoc.Readers.Metadata (yamlMetaBlock) import Control.Monad.Except import Data.Functor.Identity (runIdentity) import Data.Typeable -import Text.Pandoc.Parsing (runParserT, getPosition, sourceLine, - runF, defaultParserState, take1WhileP, option) +import Text.Pandoc.Parsing (runParserT, getPosition, + runF, defaultParserState, option, many1, anyChar, + Sources(..), ToSources(..), ParserT, Future, + sourceName) import qualified Data.Text as T -- | Parse a CommonMark formatted string into a 'Pandoc' structure. -readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readCommonMark :: (PandocMonad m, ToSources a) + => ReaderOptions -> a -> m Pandoc readCommonMark opts s - | isEnabled Ext_yaml_metadata_block opts - , "---" `T.isPrefixOf` s = do - let metaValueParser = do - inp <- option "" $ take1WhileP (const True) - case runIdentity - (commonmarkWith (specFor opts) "metadata value" inp) of - Left _ -> mzero - Right (Cm bls :: Cm () Blocks) - -> return $ return $ B.toMetaValue bls - res <- runParserT (do meta <- yamlMetaBlock metaValueParser - pos <- getPosition - return (meta, pos)) - defaultParserState "YAML metadata" s - case res of - Left _ -> readCommonMarkBody opts s - Right (meta, pos) -> do - let dropLines 0 = id - dropLines n = dropLines (n - 1) . T.drop 1 . T.dropWhile (/='\n') - let metaLines = sourceLine pos - 1 - let body = T.replicate metaLines "\n" <> dropLines metaLines s - Pandoc _ bs <- readCommonMarkBody opts body - return $ Pandoc (runF meta defaultParserState) bs - | otherwise = readCommonMarkBody opts s + | isEnabled Ext_yaml_metadata_block opts = do + let sources = toSources s + let toks = concatMap sourceToToks (unSources sources) + res <- runParserT (do meta <- yamlMetaBlock (metaValueParser opts) + pos <- getPosition + return (meta, pos)) + defaultParserState "YAML metadata" (toSources s) + case res of + Left _ -> readCommonMarkBody opts sources toks + Right (meta, pos) -> do + -- strip off metadata section and parse body + let body = dropWhile (\t -> tokPos t < pos) toks + Pandoc _ bs <- readCommonMarkBody opts sources body + return $ Pandoc (runF meta defaultParserState) bs + | otherwise = do + let sources = toSources s + let toks = concatMap sourceToToks (unSources sources) + readCommonMarkBody opts sources toks -readCommonMarkBody :: PandocMonad m => ReaderOptions -> Text -> m Pandoc -readCommonMarkBody opts s +sourceToToks :: (SourcePos, Text) -> [Tok] +sourceToToks (pos, s) = tokenize (sourceName pos) s + +metaValueParser :: Monad m + => ReaderOptions -> ParserT Sources st m (Future st MetaValue) +metaValueParser opts = do + inp <- option "" $ T.pack <$> many1 anyChar + let toks = concatMap sourceToToks (unSources (toSources inp)) + case runIdentity (parseCommonmarkWith (specFor opts) toks) of + Left _ -> mzero + Right (Cm bls :: Cm () Blocks) -> return $ return $ B.toMetaValue bls + +readCommonMarkBody :: PandocMonad m => ReaderOptions -> Sources -> [Tok] -> m Pandoc +readCommonMarkBody opts s toks | isEnabled Ext_sourcepos opts = - case runIdentity (commonmarkWith (specFor opts) "" s) of + case runIdentity (parseCommonmarkWith (specFor opts) toks) of Left err -> throwError $ PandocParsecError s err Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls | otherwise = - case runIdentity (commonmarkWith (specFor opts) "" s) of + case runIdentity (parseCommonmarkWith (specFor opts) toks) of Left err -> throwError $ PandocParsecError s err Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls diff --git a/src/Text/Pandoc/Readers/Creole.hs b/src/Text/Pandoc/Readers/Creole.hs index 2658dfea2..ad848ada7 100644 --- a/src/Text/Pandoc/Readers/Creole.hs +++ b/src/Text/Pandoc/Readers/Creole.hs @@ -23,21 +23,20 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad (..)) import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed) -import Text.Pandoc.Shared (crFilter) - -- | Read creole from an input string and return a Pandoc document. -readCreole :: PandocMonad m +readCreole :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text + -> a -> m Pandoc readCreole opts s = do - res <- readWithM parseCreole def{ stateOptions = opts } $ crFilter s <> "\n\n" + let sources = ensureFinalNewlines 2 (toSources s) + res <- readWithM parseCreole def{ stateOptions = opts } sources case res of Left e -> throwError e Right d -> return d -type CRLParser = ParserT Text ParserState +type CRLParser = ParserT Sources ParserState -- -- Utility functions diff --git a/src/Text/Pandoc/Readers/CslJson.hs b/src/Text/Pandoc/Readers/CslJson.hs index 30bb19483..a0af5c325 100644 --- a/src/Text/Pandoc/Readers/CslJson.hs +++ b/src/Text/Pandoc/Readers/CslJson.hs @@ -24,21 +24,22 @@ import Text.Pandoc.Options import Text.Pandoc.Definition import Text.Pandoc.Builder (setMeta, cite, str) import qualified Text.Pandoc.UTF8 as UTF8 -import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences) import Text.Pandoc.Citeproc.MetaValue (referenceToMetaValue) import Control.Monad.Except (throwError) +import Text.Pandoc.Sources (ToSources(..), sourcesToText) -- | Read CSL JSON from an input string and return a Pandoc document. -- The document will have only metadata, with an empty body. -- The metadata will contain a `references` field with the -- bibliography entries, and a `nocite` field with the wildcard `[@*]`. -readCslJson :: PandocMonad m => ReaderOptions -> Text -> m Pandoc -readCslJson _opts t = - case cslJsonToReferences (UTF8.fromText t) of +readCslJson :: (PandocMonad m, ToSources a) + => ReaderOptions -> a -> m Pandoc +readCslJson _opts x = + case cslJsonToReferences (UTF8.fromText $ sourcesToText $ toSources x) of Left e -> throwError $ PandocParseError $ T.pack e Right refs -> return $ setMeta "references" (map referenceToMetaValue refs) diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index ac3caa2c0..3db459cfd 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -30,7 +30,8 @@ import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad, report) import Text.Pandoc.Options import Text.Pandoc.Logging (LogMessage(..)) -import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces) +import Text.Pandoc.Shared (safeRead, extractSpaces) +import Text.Pandoc.Sources (ToSources(..), sourcesToText) import Text.TeXMath (readMathML, writeTeX) import Text.Pandoc.XML.Light @@ -539,11 +540,15 @@ instance Default DBState where , dbContent = [] } -readDocBook :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readDocBook :: (PandocMonad m, ToSources a) + => ReaderOptions + -> a + -> m Pandoc readDocBook _ inp = do + let sources = toSources inp tree <- either (throwError . PandocXMLError "") return $ parseXMLContents - (TL.fromStrict . handleInstructions $ crFilter inp) + (TL.fromStrict . handleInstructions . sourcesToText $ sources) (bs, st') <- flip runStateT (def{ dbContent = tree }) $ mapM parseBlock tree return $ Pandoc (dbMeta st') (toList . mconcat $ bs) diff --git a/src/Text/Pandoc/Readers/DokuWiki.hs b/src/Text/Pandoc/Readers/DokuWiki.hs index dedc1f03f..db98ac8de 100644 --- a/src/Text/Pandoc/Readers/DokuWiki.hs +++ b/src/Text/Pandoc/Readers/DokuWiki.hs @@ -29,26 +29,27 @@ import Text.Pandoc.Definition import Text.Pandoc.Error (PandocError (PandocParsecError)) import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed, nested) -import Text.Pandoc.Shared (crFilter, trim, stringify, tshow) +import Text.Pandoc.Shared (trim, stringify, tshow) -- | Read DokuWiki from an input string and return a Pandoc document. -readDokuWiki :: PandocMonad m +readDokuWiki :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text + -> a -> m Pandoc readDokuWiki opts s = do - let input = crFilter s - res <- runParserT parseDokuWiki def {stateOptions = opts } "source" input + let sources = toSources s + res <- runParserT parseDokuWiki def {stateOptions = opts } + (initialSourceName sources) sources case res of - Left e -> throwError $ PandocParsecError input e + Left e -> throwError $ PandocParsecError sources e Right d -> return d -type DWParser = ParserT Text ParserState +type DWParser = ParserT Sources ParserState -- * Utility functions -- | Parse end-of-line, which can be either a newline or end-of-file. -eol :: Stream s m Char => ParserT s st m () +eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m () eol = void newline <|> eof nested :: PandocMonad m => DWParser m a -> DWParser m a diff --git a/src/Text/Pandoc/Readers/FB2.hs b/src/Text/Pandoc/Readers/FB2.hs index 66e390bd7..84e5278db 100644 --- a/src/Text/Pandoc/Readers/FB2.hs +++ b/src/Text/Pandoc/Readers/FB2.hs @@ -40,9 +40,9 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad, insertMedia, report) import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.Options -import Text.Pandoc.Shared (crFilter) import Text.Pandoc.XML.Light import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Pandoc.Sources (ToSources(..), sourcesToText) type FB2 m = StateT FB2State m @@ -63,9 +63,12 @@ instance HasMeta FB2State where setMeta field v s = s {fb2Meta = setMeta field v (fb2Meta s)} deleteMeta field s = s {fb2Meta = deleteMeta field (fb2Meta s)} -readFB2 :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readFB2 :: (PandocMonad m, ToSources a) + => ReaderOptions + -> a + -> m Pandoc readFB2 _ inp = - case parseXMLElement $ TL.fromStrict $ crFilter inp of + case parseXMLElement $ TL.fromStrict $ sourcesToText $ toSources inp of Left msg -> throwError $ PandocXMLError "" msg Right el -> do (bs, st) <- runStateT (parseRootElement el) def diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index c3e68afd8..f5c8a2277 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -62,21 +62,21 @@ import Text.Pandoc.Options ( extensionEnabled) import Text.Pandoc.Parsing hiding ((<|>)) import Text.Pandoc.Shared ( - addMetaField, blocksToInlines', crFilter, escapeURI, extractSpaces, + addMetaField, blocksToInlines', escapeURI, extractSpaces, htmlSpanLikeElements, renderTags', safeRead, tshow) import Text.Pandoc.Walk import Text.Parsec.Error import Text.TeXMath (readMathML, writeTeX) -- | Convert HTML-formatted string to 'Pandoc' document. -readHtml :: PandocMonad m +readHtml :: (PandocMonad m, ToSources a) => ReaderOptions -- ^ Reader options - -> Text -- ^ String to parse (assumes @'\n'@ line endings) + -> a -- ^ Input to parse -> m Pandoc readHtml opts inp = do let tags = stripPrefixes $ canonicalizeTags $ parseTagsOptions parseOptions{ optTagPosition = True } - (crFilter inp) + (sourcesToText $ toSources inp) parseDoc = do blocks <- fixPlains False . mconcat <$> manyTill block eof meta <- stateMeta . parserState <$> getState @@ -830,17 +830,19 @@ pInlinesInTags tagtype f = extractSpaces f <$> pInTags tagtype inline pTagText :: PandocMonad m => TagParser m Inlines pTagText = try $ do + pos <- getPosition (TagText str) <- pSatisfy isTagText st <- getState qu <- ask parsed <- lift $ lift $ - flip runReaderT qu $ runParserT (many pTagContents) st "text" str + flip runReaderT qu $ runParserT (many pTagContents) st "text" + (Sources [(pos, str)]) case parsed of Left _ -> throwError $ PandocParseError $ "Could not parse `" <> str <> "'" Right result -> return $ mconcat result -type InlinesParser m = HTMLParser m Text +type InlinesParser m = HTMLParser m Sources pTagContents :: PandocMonad m => InlinesParser m Inlines pTagContents = @@ -970,13 +972,14 @@ isCommentTag = tagComment (const True) -- | Matches a stretch of HTML in balanced tags. htmlInBalanced :: Monad m => (Tag Text -> Bool) - -> ParserT Text st m Text + -> ParserT Sources st m Text htmlInBalanced f = try $ do lookAhead (char '<') - inp <- getInput - let ts = canonicalizeTags $ - parseTagsOptions parseOptions{ optTagWarning = True, - optTagPosition = True } inp + sources <- getInput + let ts = canonicalizeTags + $ parseTagsOptions parseOptions{ optTagWarning = True, + optTagPosition = True } + $ sourcesToText sources case ts of (TagPosition sr sc : t@(TagOpen tn _) : rest) -> do guard $ f t @@ -1018,15 +1021,17 @@ hasTagWarning _ = False -- | Matches a tag meeting a certain condition. htmlTag :: (HasReaderOptions st, Monad m) => (Tag Text -> Bool) - -> ParserT Text st m (Tag Text, Text) + -> ParserT Sources st m (Tag Text, Text) htmlTag f = try $ do lookAhead (char '<') startpos <- getPosition - inp <- getInput + sources <- getInput + let inp = sourcesToText sources let ts = canonicalizeTags $ parseTagsOptions parseOptions{ optTagWarning = False , optTagPosition = True } - (inp <> " ") -- add space to ensure that + (inp <> " ") + -- add space to ensure that -- we get a TagPosition after the tag (next, ln, col) <- case ts of (TagPosition{} : next : TagPosition ln col : _) diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index 48454e353..35eaac0a9 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -19,7 +19,7 @@ import Control.Monad.Except (throwError) import Data.List (intersperse) import Data.List.NonEmpty (nonEmpty) import Data.Maybe (fromMaybe) -import Data.Text (Text, unpack) +import Data.Text (unpack) import qualified Data.Text as T import Documentation.Haddock.Parser import Documentation.Haddock.Types as H @@ -29,15 +29,17 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Options -import Text.Pandoc.Shared (crFilter, splitTextBy, trim) +import Text.Pandoc.Sources (ToSources(..), sourcesToText) +import Text.Pandoc.Shared (splitTextBy, trim) -- | Parse Haddock markup and return a 'Pandoc' document. -readHaddock :: PandocMonad m +readHaddock :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text + -> a -> m Pandoc -readHaddock opts s = case readHaddockEither opts (unpack (crFilter s)) of +readHaddock opts s = case readHaddockEither opts + (unpack . sourcesToText . toSources $ s) of Right result -> return result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/Ipynb.hs b/src/Text/Pandoc/Readers/Ipynb.hs index 70296bb6b..cd1093109 100644 --- a/src/Text/Pandoc/Readers/Ipynb.hs +++ b/src/Text/Pandoc/Readers/Ipynb.hs @@ -39,10 +39,12 @@ import Data.Aeson as Aeson import Control.Monad.Except (throwError) import Text.Pandoc.Readers.Markdown (readMarkdown) import qualified Text.Pandoc.UTF8 as UTF8 +import Text.Pandoc.Sources (ToSources(..), sourcesToText) -readIpynb :: PandocMonad m => ReaderOptions -> Text -> m Pandoc -readIpynb opts t = do - let src = BL.fromStrict (TE.encodeUtf8 t) +readIpynb :: (PandocMonad m, ToSources a) + => ReaderOptions -> a -> m Pandoc +readIpynb opts x = do + let src = BL.fromStrict . TE.encodeUtf8 . sourcesToText $ toSources x case eitherDecode src of Right (notebook4 :: Notebook NbV4) -> notebookToPandoc opts notebook4 Left _ -> diff --git a/src/Text/Pandoc/Readers/JATS.hs b/src/Text/Pandoc/Readers/JATS.hs index c068f3774..9cdbf1611 100644 --- a/src/Text/Pandoc/Readers/JATS.hs +++ b/src/Text/Pandoc/Readers/JATS.hs @@ -29,11 +29,12 @@ import Text.HTML.TagSoup.Entity (lookupEntity) import Text.Pandoc.Builder import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Options -import Text.Pandoc.Shared (crFilter, safeRead, extractSpaces) +import Text.Pandoc.Shared (safeRead, extractSpaces) import Text.TeXMath (readMathML, writeTeX) import Text.Pandoc.XML.Light import qualified Data.Set as S (fromList, member) import Data.Set ((\\)) +import Text.Pandoc.Sources (ToSources(..), sourcesToText) type JATS m = StateT JATSState m @@ -52,10 +53,14 @@ instance Default JATSState where , jatsContent = [] } -readJATS :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readJATS :: (PandocMonad m, ToSources a) + => ReaderOptions + -> a + -> m Pandoc readJATS _ inp = do + let sources = toSources inp tree <- either (throwError . PandocXMLError "") return $ - parseXMLContents (TL.fromStrict $ crFilter inp) + parseXMLContents (TL.fromStrict . sourcesToText $ sources) (bs, st') <- flip runStateT (def{ jatsContent = tree }) $ mapM parseBlock tree return $ Pandoc (jatsMeta st') (toList . mconcat $ bs) diff --git a/src/Text/Pandoc/Readers/Jira.hs b/src/Text/Pandoc/Readers/Jira.hs index 89aecbf56..a3b415f09 100644 --- a/src/Text/Pandoc/Readers/Jira.hs +++ b/src/Text/Pandoc/Readers/Jira.hs @@ -20,18 +20,20 @@ import Text.Pandoc.Builder hiding (cell) import Text.Pandoc.Error (PandocError (PandocParseError)) import Text.Pandoc.Options (ReaderOptions) import Text.Pandoc.Shared (stringify) - +import Text.Pandoc.Sources (ToSources(..), sourcesToText) import qualified Text.Jira.Markup as Jira -- | Read Jira wiki markup. -readJira :: PandocMonad m +readJira :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text + -> a -> m Pandoc -readJira _opts s = case parse s of - Right d -> return $ jiraToPandoc d - Left e -> throwError . PandocParseError $ - "Jira parse error" `append` pack (show e) +readJira _opts inp = do + let sources = toSources inp + case parse (sourcesToText sources) of + Right d -> return $ jiraToPandoc d + Left e -> throwError . PandocParseError $ + "Jira parse error" `append` pack (show e) jiraToPandoc :: Jira.Doc -> Pandoc jiraToPandoc (Jira.Doc blks) = doc $ foldMap jiraToPandocBlocks blks diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 9ad168293..f90d562ae 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -77,16 +77,17 @@ import Data.List.NonEmpty (nonEmpty) -- import Debug.Trace (traceShowId) -- | Parse LaTeX from string and return 'Pandoc' document. -readLaTeX :: PandocMonad m +readLaTeX :: (PandocMonad m, ToSources a) => ReaderOptions -- ^ Reader options - -> Text -- ^ String to parse (assumes @'\n'@ line endings) + -> a -- ^ Input to parse -> m Pandoc readLaTeX opts ltx = do + let sources = toSources ltx parsed <- runParserT parseLaTeX def{ sOptions = opts } "source" - (tokenize "source" (crFilter ltx)) + (tokenizeSources sources) case parsed of Right result -> return result - Left e -> throwError $ PandocParsecError ltx e + Left e -> throwError $ PandocParsecError sources e parseLaTeX :: PandocMonad m => LP m Pandoc parseLaTeX = do @@ -132,11 +133,11 @@ resolveRefs _ x = x rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => ParserT Text s m Text + => ParserT Sources s m Text rawLaTeXBlock = do lookAhead (try (char '\\' >> letter)) inp <- getInput - let toks = tokenize "source" inp + let toks = tokenizeSources inp snd <$> (rawLaTeXParser toks False (macroDef (const mempty)) blocks <|> rawLaTeXParser toks True (do choice (map controlSeq @@ -163,11 +164,11 @@ beginOrEndCommand = try $ do (txt <> untokenize rawargs) rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => ParserT Text s m Text + => ParserT Sources s m Text rawLaTeXInline = do lookAhead (try (char '\\' >> letter)) inp <- getInput - let toks = tokenize "source" inp + let toks = tokenizeSources inp raw <- snd <$> ( rawLaTeXParser toks True (mempty <$ (controlSeq "input" >> skipMany rawopt >> braced)) @@ -178,11 +179,11 @@ rawLaTeXInline = do finalbraces <- mconcat <$> many (try (string "{}")) -- see #5439 return $ raw <> T.pack finalbraces -inlineCommand :: PandocMonad m => ParserT Text ParserState m Inlines +inlineCommand :: PandocMonad m => ParserT Sources ParserState m Inlines inlineCommand = do lookAhead (try (char '\\' >> letter)) inp <- getInput - let toks = tokenize "source" inp + let toks = tokenizeSources inp fst <$> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand') inlines @@ -641,7 +642,7 @@ opt = do parsed <- runParserT (mconcat <$> many inline) st "bracketed option" toks case parsed of Right result -> return result - Left e -> throwError $ PandocParsecError (untokenize toks) e + Left e -> throwError $ PandocParsecError (toSources toks) e -- block elements: diff --git a/src/Text/Pandoc/Readers/LaTeX/Citation.hs b/src/Text/Pandoc/Readers/LaTeX/Citation.hs index 655823dab..af97125c6 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Citation.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Citation.hs @@ -120,7 +120,7 @@ simpleCiteArgs inline = try $ do runParserT (mconcat <$> many inline) st "bracketed option" toks case parsed of Right result -> return result - Left e -> throwError $ PandocParsecError (untokenize toks) e + Left e -> throwError $ PandocParsecError (toSources toks) e diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index db58b333d..35ce3509d 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -27,6 +27,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing , rawLaTeXParser , applyMacros , tokenize + , tokenizeSources , untokenize , untoken , totoks @@ -248,7 +249,7 @@ withVerbatimMode parser = do rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s) => [Tok] -> Bool -> LP m a -> LP m a - -> ParserT Text s m (a, Text) + -> ParserT Sources s m (a, Text) rawLaTeXParser toks retokenize parser valParser = do pstate <- getState let lstate = def{ sOptions = extractReaderOptions pstate } @@ -268,7 +269,7 @@ rawLaTeXParser toks retokenize parser valParser = do Left _ -> mzero Right ((val, raw), st) -> do updateState (updateMacros (sMacros st <>)) - _ <- takeP (T.length (untokenize toks')) + void $ count (T.length (untokenize toks')) anyChar let result = untokenize raw -- ensure we end with space if input did, see #4442 let result' = @@ -281,7 +282,7 @@ rawLaTeXParser toks retokenize parser valParser = do return (val, result') applyMacros :: (PandocMonad m, HasMacros s, HasReaderOptions s) - => Text -> ParserT Text s m Text + => Text -> ParserT Sources s m Text applyMacros s = (guardDisabled Ext_latex_macros >> return s) <|> do let retokenize = untokenize <$> many (satisfyTok (const True)) pstate <- getState @@ -301,6 +302,11 @@ QuickCheck property: > let t = T.pack s in untokenize (tokenize "random" t) == t -} +tokenizeSources :: Sources -> [Tok] +tokenizeSources = concatMap tokenizeSource . unSources + where + tokenizeSource (pos, t) = totoks pos t + tokenize :: SourceName -> Text -> [Tok] tokenize sourcename = totoks (initialPos sourcename) diff --git a/src/Text/Pandoc/Readers/LaTeX/Types.hs b/src/Text/Pandoc/Readers/LaTeX/Types.hs index f8c214318..c20b72bc5 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Types.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Types.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE FlexibleInstances #-} {- | Module : Text.Pandoc.Readers.LaTeX.Types Copyright : Copyright (C) 2017-2021 John MacFarlane @@ -18,7 +19,9 @@ module Text.Pandoc.Readers.LaTeX.Types ( Tok(..) ) where import Data.Text (Text) -import Text.Parsec.Pos (SourcePos) +import Text.Parsec.Pos (SourcePos, sourceName) +import Text.Pandoc.Sources +import Data.List (groupBy) data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment | Esc1 | Esc2 | Arg Int @@ -27,6 +30,16 @@ data TokType = CtrlSeq Text | Spaces | Newline | Symbol | Word | Comment | data Tok = Tok SourcePos TokType Text deriving (Eq, Ord, Show) +instance ToSources [Tok] where + toSources = Sources + . map (\ts -> case ts of + Tok p _ _ : _ -> (p, mconcat $ map tokToText ts) + _ -> error "toSources [Tok] encountered empty group") + . groupBy (\(Tok p1 _ _) (Tok p2 _ _) -> sourceName p1 == sourceName p2) + +tokToText :: Tok -> Text +tokToText (Tok _ _ t) = t + data ExpansionPoint = ExpandWhenDefined | ExpandWhenUsed deriving (Eq, Ord, Show) diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs index 21b8feaab..1141af66f 100644 --- a/src/Text/Pandoc/Readers/Man.hs +++ b/src/Text/Pandoc/Readers/Man.hs @@ -20,7 +20,7 @@ import Control.Monad (liftM, mzero, guard, void) import Control.Monad.Trans (lift) import Control.Monad.Except (throwError) import Data.Maybe (catMaybes, isJust) -import Data.List (intersperse, intercalate) +import Data.List (intersperse) import qualified Data.Text as T import Text.Pandoc.Builder as B import Text.Pandoc.Class.PandocMonad (PandocMonad(..), report) @@ -29,9 +29,8 @@ import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Walk (query) -import Text.Pandoc.Shared (crFilter, mapLeft) +import Text.Pandoc.Shared (mapLeft) import Text.Pandoc.Readers.Roff -- TODO explicit imports -import Text.Parsec hiding (tokenPrim) import qualified Text.Parsec as Parsec import Text.Parsec.Pos (updatePosString) import qualified Data.Foldable as Foldable @@ -50,13 +49,20 @@ type ManParser m = ParserT [RoffToken] ManState m -- | Read man (troff) from an input string and return a Pandoc document. -readMan :: PandocMonad m => ReaderOptions -> T.Text -> m Pandoc -readMan opts txt = do - tokenz <- lexRoff (initialPos "input") (crFilter txt) +readMan :: (PandocMonad m, ToSources a) + => ReaderOptions + -> a + -> m Pandoc +readMan opts s = do + let Sources inps = toSources s + tokenz <- mconcat <$> mapM (uncurry lexRoff) inps let state = def {readerOptions = opts} :: ManState + let fixError (PandocParsecError _ e) = PandocParsecError (Sources inps) e + fixError e = e eitherdoc <- readWithMTokens parseMan state (Foldable.toList . unRoffTokens $ tokenz) - either throwError return eitherdoc + either (throwError . fixError) return eitherdoc + readWithMTokens :: PandocMonad m => ParserT [RoffToken] ManState m a -- ^ parser @@ -64,9 +70,10 @@ readWithMTokens :: PandocMonad m -> [RoffToken] -- ^ input -> m (Either PandocError a) readWithMTokens parser state input = - let leftF = PandocParsecError . T.pack . intercalate "\n" $ show <$> input + let leftF = PandocParsecError mempty in mapLeft leftF `liftM` runParserT parser state "source" input + parseMan :: PandocMonad m => ManParser m Pandoc parseMan = do bs <- many parseBlock <* eof @@ -89,7 +96,7 @@ parseBlock = choice [ parseList parseTable :: PandocMonad m => ManParser m Blocks parseTable = do - modifyState $ \st -> st { tableCellsPlain = True } + updateState $ \st -> st { tableCellsPlain = True } let isTbl Tbl{} = True isTbl _ = False Tbl _opts rows pos <- msatisfy isTbl @@ -135,7 +142,7 @@ parseTable = do case res' of Left _ -> Prelude.fail "Could not parse table cell" Right x -> do - modifyState $ \s -> s{ tableCellsPlain = False } + updateState $ \s -> s{ tableCellsPlain = False } return x Right x -> return x @@ -222,7 +229,7 @@ parseTitle = do setMeta "section" (linePartsToInlines y) [x] -> setMeta "title" (linePartsToInlines x) [] -> id - modifyState $ \st -> st{ metadata = adjustMeta $ metadata st } + updateState $ \st -> st{ metadata = adjustMeta $ metadata st } return mempty linePartsToInlines :: [LinePart] -> Inlines diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index ba8ed147e..69dd51bc4 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -47,19 +47,20 @@ import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline) import Text.Pandoc.Shared import Text.Pandoc.XML (fromEntities) import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs, yamlMetaBlock) +-- import Debug.Trace (traceShowId) -type MarkdownParser m = ParserT Text ParserState m +type MarkdownParser m = ParserT Sources ParserState m type F = Future ParserState -- | Read markdown from an input string and return a Pandoc document. -readMarkdown :: PandocMonad m +readMarkdown :: (PandocMonad m, ToSources a) => ReaderOptions -- ^ Reader options - -> Text -- ^ String to parse (assuming @'\n'@ line endings) + -> a -- ^ Input -> m Pandoc readMarkdown opts s = do parsed <- readWithM parseMarkdown def{ stateOptions = opts } - (crFilter s <> "\n\n") + (ensureFinalNewlines 3 (toSources s)) case parsed of Right result -> return result Left e -> throwError e @@ -80,7 +81,7 @@ yamlToMeta opts mbfp bstr = do meta <- yamlBsToMeta (fmap B.toMetaValue <$> parseBlocks) bstr setPosition oldPos return $ runF meta defaultParserState - parsed <- readWithM parser def{ stateOptions = opts } "" + parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text) case parsed of Right result -> return result Left e -> throwError e @@ -103,7 +104,7 @@ yamlToRefs idpred opts mbfp bstr = do refs <- yamlBsToRefs (fmap B.toMetaValue <$> parseBlocks) idpred bstr setPosition oldPos return $ runF refs defaultParserState - parsed <- readWithM parser def{ stateOptions = opts } "" + parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text) case parsed of Right result -> return result Left e -> throwError e @@ -146,14 +147,14 @@ inList = do ctx <- stateParserContext <$> getState guard (ctx == ListItemState) -spnl :: PandocMonad m => ParserT Text st m () +spnl :: PandocMonad m => ParserT Sources st m () spnl = try $ do skipSpaces optional newline skipSpaces notFollowedBy (char '\n') -spnl' :: PandocMonad m => ParserT Text st m Text +spnl' :: PandocMonad m => ParserT Sources st m Text spnl' = try $ do xs <- many spaceChar ys <- option "" $ try $ (:) <$> newline @@ -568,7 +569,7 @@ registerImplicitHeader raw attr@(ident, _, _) -- hrule block -- -hrule :: PandocMonad m => ParserT Text st m (F Blocks) +hrule :: PandocMonad m => ParserT Sources st m (F Blocks) hrule = try $ do skipSpaces start <- satisfy isHruleChar @@ -588,7 +589,7 @@ indentedLine = indentSpaces >> anyLineNewline blockDelimiter :: PandocMonad m => (Char -> Bool) -> Maybe Int - -> ParserT Text ParserState m Int + -> ParserT Sources ParserState m Int blockDelimiter f len = try $ do skipNonindentSpaces c <- lookAhead (satisfy f) @@ -732,7 +733,7 @@ lhsCodeBlockBirdWith c = try $ do blanklines return $ T.intercalate "\n" lns' -birdTrackLine :: PandocMonad m => Char -> ParserT Text st m Text +birdTrackLine :: PandocMonad m => Char -> ParserT Sources st m Text birdTrackLine c = try $ do char c -- allow html tags on left margin: @@ -1025,7 +1026,7 @@ para = try $ do option (B.plain <$> result) $ try $ do newline - (blanklines >> return mempty) + (mempty <$ blanklines) <|> (guardDisabled Ext_blank_before_blockquote >> () <$ lookAhead blockQuote) <|> (guardEnabled Ext_backtick_code_blocks >> () <$ lookAhead codeBlockFenced) <|> (guardDisabled Ext_blank_before_header >> () <$ lookAhead header) @@ -1170,7 +1171,7 @@ lineBlock = do -- and the length including trailing space. dashedLine :: PandocMonad m => Char - -> ParserT Text st m (Int, Int) + -> ParserT Sources st m (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many spaceChar @@ -1239,7 +1240,7 @@ rawTableLine :: PandocMonad m -> MarkdownParser m [Text] rawTableLine indices = do notFollowedBy' (blanklines' <|> tableFooter) - line <- take1WhileP (/='\n') <* newline + line <- anyLine return $ map trim $ tail $ splitTextByIndices (init indices) line @@ -1390,7 +1391,7 @@ pipeTableCell = return $ B.plain <$> result) <|> return mempty -pipeTableHeaderPart :: PandocMonad m => ParserT Text st m (Alignment, Int) +pipeTableHeaderPart :: PandocMonad m => ParserT Sources st m (Alignment, Int) pipeTableHeaderPart = try $ do skipMany spaceChar left <- optionMaybe (char ':') @@ -1406,10 +1407,14 @@ pipeTableHeaderPart = try $ do (Just _,Just _) -> AlignCenter, len) -- Succeed only if current line contains a pipe. -scanForPipe :: PandocMonad m => ParserT Text st m () +scanForPipe :: PandocMonad m => ParserT Sources st m () scanForPipe = do - inp <- getInput - case T.break (\c -> c == '\n' || c == '|') inp of + Sources inps <- getInput + let ln = case inps of + [] -> "" + ((_,t):(_,t'):_) | T.null t -> t' + ((_,t):_) -> t + case T.break (\c -> c == '\n' || c == '|') ln of (_, T.uncons -> Just ('|', _)) -> return () _ -> mzero @@ -1703,13 +1708,13 @@ whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace" where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak) regsp = skipMany spaceChar >> return B.space -nonEndline :: PandocMonad m => ParserT Text st m Char +nonEndline :: PandocMonad m => ParserT Sources st m Char nonEndline = satisfy (/='\n') str :: PandocMonad m => MarkdownParser m (F Inlines) str = do result <- mconcat <$> many1 - ( take1WhileP isAlphaNum + ( T.pack <$> (many1 alphaNum) <|> "." <$ try (char '.' <* notFollowedBy (char '.')) ) updateLastStrPos (do guardEnabled Ext_smart @@ -1962,7 +1967,7 @@ rawLaTeXInline' = do s <- rawLaTeXInline return $ return $ B.rawInline "tex" s -- "tex" because it might be context -rawConTeXtEnvironment :: PandocMonad m => ParserT Text st m Text +rawConTeXtEnvironment :: PandocMonad m => ParserT Sources st m Text rawConTeXtEnvironment = try $ do string "\\start" completion <- inBrackets (letter <|> digit <|> spaceChar) @@ -1971,7 +1976,7 @@ rawConTeXtEnvironment = try $ do (try $ string "\\stop" >> textStr completion) return $ "\\start" <> completion <> T.concat contents <> "\\stop" <> completion -inBrackets :: PandocMonad m => ParserT Text st m Char -> ParserT Text st m Text +inBrackets :: PandocMonad m => ParserT Sources st m Char -> ParserT Sources st m Text inBrackets parser = do char '[' contents <- manyChar parser diff --git a/src/Text/Pandoc/Readers/MediaWiki.hs b/src/Text/Pandoc/Readers/MediaWiki.hs index 9f4d5e170..825e4a2eb 100644 --- a/src/Text/Pandoc/Readers/MediaWiki.hs +++ b/src/Text/Pandoc/Readers/MediaWiki.hs @@ -36,17 +36,18 @@ import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (nested) import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isCommentTag) -import Text.Pandoc.Shared (crFilter, safeRead, stringify, stripTrailingNewlines, +import Text.Pandoc.Shared (safeRead, stringify, stripTrailingNewlines, trim, splitTextBy, tshow) import Text.Pandoc.Walk (walk) import Text.Pandoc.XML (fromEntities) -- | Read mediawiki from an input string and return a Pandoc document. -readMediaWiki :: PandocMonad m - => ReaderOptions -- ^ Reader options - -> Text -- ^ String to parse (assuming @'\n'@ line endings) +readMediaWiki :: (PandocMonad m, ToSources a) + => ReaderOptions + -> a -> m Pandoc readMediaWiki opts s = do + let sources = toSources s parsed <- readWithM parseMediaWiki MWState{ mwOptions = opts , mwMaxNestingLevel = 4 , mwNextLinkNumber = 1 @@ -55,7 +56,7 @@ readMediaWiki opts s = do , mwLogMessages = [] , mwInTT = False } - (crFilter s <> "\n") + sources case parsed of Right result -> return result Left e -> throwError e @@ -69,7 +70,7 @@ data MWState = MWState { mwOptions :: ReaderOptions , mwInTT :: Bool } -type MWParser m = ParserT Text MWState m +type MWParser m = ParserT Sources MWState m instance HasReaderOptions MWState where extractReaderOptions = mwOptions diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs index cb141cba5..bbcfe62ea 100644 --- a/src/Text/Pandoc/Readers/Metadata.hs +++ b/src/Text/Pandoc/Readers/Metadata.hs @@ -35,9 +35,9 @@ import qualified Data.Text.Lazy as TL import qualified Text.Pandoc.UTF8 as UTF8 yamlBsToMeta :: (PandocMonad m, HasLastStrPosition st) - => ParserT Text st m (Future st MetaValue) + => ParserT Sources st m (Future st MetaValue) -> BL.ByteString - -> ParserT Text st m (Future st Meta) + -> ParserT Sources st m (Future st Meta) yamlBsToMeta pMetaValue bstr = do case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of Right (YAML.Doc (YAML.Mapping _ _ o):_) @@ -67,10 +67,10 @@ lookupYAML _ _ = Nothing -- Returns filtered list of references. yamlBsToRefs :: (PandocMonad m, HasLastStrPosition st) - => ParserT Text st m (Future st MetaValue) + => ParserT Sources st m (Future st MetaValue) -> (Text -> Bool) -- ^ Filter for id -> BL.ByteString - -> ParserT Text st m (Future st [MetaValue]) + -> ParserT Sources st m (Future st [MetaValue]) yamlBsToRefs pMetaValue idpred bstr = case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of Right (YAML.Doc o@YAML.Mapping{}:_) @@ -108,9 +108,9 @@ nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = Just t nodeToKey _ = Nothing normalizeMetaValue :: (PandocMonad m, HasLastStrPosition st) - => ParserT Text st m (Future st MetaValue) + => ParserT Sources st m (Future st MetaValue) -> Text - -> ParserT Text st m (Future st MetaValue) + -> ParserT Sources st m (Future st MetaValue) normalizeMetaValue pMetaValue x = -- Note: a standard quoted or unquoted YAML value will -- not end in a newline, but a "block" set off with @@ -133,9 +133,9 @@ checkBoolean t | otherwise = Nothing yamlToMetaValue :: (PandocMonad m, HasLastStrPosition st) - => ParserT Text st m (Future st MetaValue) + => ParserT Sources st m (Future st MetaValue) -> YAML.Node YE.Pos - -> ParserT Text st m (Future st MetaValue) + -> ParserT Sources st m (Future st MetaValue) yamlToMetaValue pMetaValue (YAML.Scalar _ x) = case x of YAML.SStr t -> normalizeMetaValue pMetaValue t @@ -156,9 +156,9 @@ yamlToMetaValue pMetaValue (YAML.Mapping _ _ o) = yamlToMetaValue _ _ = return $ return $ MetaString "" yamlMap :: (PandocMonad m, HasLastStrPosition st) - => ParserT Text st m (Future st MetaValue) + => ParserT Sources st m (Future st MetaValue) -> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos) - -> ParserT Text st m (Future st (M.Map Text MetaValue)) + -> ParserT Sources st m (Future st (M.Map Text MetaValue)) yamlMap pMetaValue o = do kvs <- forM (M.toList o) $ \(key, v) -> do k <- maybe (throwError $ PandocParseError @@ -177,8 +177,8 @@ yamlMap pMetaValue o = do -- | Parse a YAML metadata block using the supplied 'MetaValue' parser. yamlMetaBlock :: (HasLastStrPosition st, PandocMonad m) - => ParserT Text st m (Future st MetaValue) - -> ParserT Text st m (Future st Meta) + => ParserT Sources st m (Future st MetaValue) + -> ParserT Sources st m (Future st Meta) yamlMetaBlock parser = try $ do string "---" blankline @@ -189,5 +189,5 @@ yamlMetaBlock parser = try $ do optional blanklines yamlBsToMeta parser $ UTF8.fromTextLazy $ TL.fromStrict rawYaml -stopLine :: Monad m => ParserT Text st m () +stopLine :: Monad m => ParserT Sources st m () stopLine = try $ (string "---" <|> string "...") >> blankline >> return () diff --git a/src/Text/Pandoc/Readers/Muse.hs b/src/Text/Pandoc/Readers/Muse.hs index 698bfd3d7..a0d4534f1 100644 --- a/src/Text/Pandoc/Readers/Muse.hs +++ b/src/Text/Pandoc/Readers/Muse.hs @@ -37,18 +37,19 @@ import Text.Pandoc.Error (PandocError (PandocParsecError)) import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Parsing -import Text.Pandoc.Shared (crFilter, trimr, tshow) +import Text.Pandoc.Shared (trimr, tshow) -- | Read Muse from an input string and return a Pandoc document. -readMuse :: PandocMonad m +readMuse :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text + -> a -> m Pandoc readMuse opts s = do - let input = crFilter s - res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } "source" input + let sources = toSources s + res <- flip runReaderT def $ runParserT parseMuse def{ museOptions = opts } + (initialSourceName sources) sources case res of - Left e -> throwError $ PandocParsecError input e + Left e -> throwError $ PandocParsecError sources e Right d -> return d type F = Future MuseState @@ -82,7 +83,7 @@ instance Default MuseEnv where , museInPara = False } -type MuseParser m = ParserT Text MuseState (ReaderT MuseEnv m) +type MuseParser m = ParserT Sources MuseState (ReaderT MuseEnv m) instance HasReaderOptions MuseState where extractReaderOptions = museOptions @@ -155,7 +156,7 @@ firstColumn = getPosition >>= \pos -> guard (sourceColumn pos == 1) -- * Parsers -- | Parse end-of-line, which can be either a newline or end-of-file. -eol :: Stream s m Char => ParserT s st m () +eol :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m () eol = void newline <|> eof getIndent :: PandocMonad m diff --git a/src/Text/Pandoc/Readers/Native.hs b/src/Text/Pandoc/Readers/Native.hs index 9c8bc0374..58f235e81 100644 --- a/src/Text/Pandoc/Readers/Native.hs +++ b/src/Text/Pandoc/Readers/Native.hs @@ -21,6 +21,7 @@ import Control.Monad.Except (throwError) import Data.Text (Text) import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Error +import Text.Pandoc.Sources (ToSources(..), sourcesToText) -- | Read native formatted text and return a Pandoc document. -- The input may be a full pandoc document, a block list, a block, @@ -32,14 +33,15 @@ import Text.Pandoc.Error -- -- > Pandoc nullMeta [Plain [Str "hi"]] -- -readNative :: PandocMonad m +readNative :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text -- ^ String to parse (assuming @'\n'@ line endings) + -> a -> m Pandoc readNative _ s = - case maybe (Pandoc nullMeta <$> readBlocks s) Right (safeRead s) of - Right doc -> return doc - Left _ -> throwError $ PandocParseError "couldn't read native" + let t = sourcesToText . toSources $ s + in case maybe (Pandoc nullMeta <$> readBlocks t) Right (safeRead t) of + Right doc -> return doc + Left _ -> throwError $ PandocParseError "couldn't read native" readBlocks :: Text -> Either PandocError [Block] readBlocks s = maybe ((:[]) <$> readBlock s) Right (safeRead s) diff --git a/src/Text/Pandoc/Readers/OPML.hs b/src/Text/Pandoc/Readers/OPML.hs index 5f2ddb876..668c9ca11 100644 --- a/src/Text/Pandoc/Readers/OPML.hs +++ b/src/Text/Pandoc/Readers/OPML.hs @@ -24,7 +24,8 @@ import Text.Pandoc.Options import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Readers.HTML (readHtml) import Text.Pandoc.Readers.Markdown (readMarkdown) -import Text.Pandoc.Shared (crFilter, blocksToInlines') +import Text.Pandoc.Shared (blocksToInlines') +import Text.Pandoc.Sources (ToSources(..), sourcesToText) import Text.Pandoc.XML.Light import Control.Monad.Except (throwError) @@ -46,10 +47,14 @@ instance Default OPMLState where , opmlOptions = def } -readOPML :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readOPML :: (PandocMonad m, ToSources a) + => ReaderOptions + -> a + -> m Pandoc readOPML opts inp = do - (bs, st') <- runStateT - (case parseXMLContents (TL.fromStrict (crFilter inp)) of + let sources = toSources inp + (bs, st') <- + runStateT (case parseXMLContents (TL.fromStrict . sourcesToText $ sources) of Left msg -> throwError $ PandocXMLError "" msg Right ns -> mapM parseBlock ns) def{ opmlOptions = opts } diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index afeb27a87..8823befdd 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -18,22 +18,19 @@ import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Parsing (reportLogMessages) -import Text.Pandoc.Shared (crFilter) - +import Text.Pandoc.Sources (ToSources(..), ensureFinalNewlines) import Control.Monad.Except (throwError) import Control.Monad.Reader (runReaderT) -import Data.Text (Text) - -- | Parse org-mode string and return a Pandoc document. -readOrg :: PandocMonad m +readOrg :: (PandocMonad m, ToSources a) => ReaderOptions -- ^ Reader options - -> Text -- ^ String to parse (assuming @'\n'@ line endings) + -> a -> m Pandoc readOrg opts s = do parsed <- flip runReaderT def $ readWithM parseOrg (optionsToParserState opts) - (crFilter s <> "\n\n") + (ensureFinalNewlines 2 (toSources s)) case parsed of Right result -> return result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 519a6ce04..054f2611a 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -29,6 +29,7 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) import Text.TeXMath (DisplayType (..), readTeX, writePandoc) +import Text.Pandoc.Sources (ToSources(..)) import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap import Control.Monad (guard, mplus, mzero, unless, void, when) @@ -802,7 +803,7 @@ inlineLaTeX = try $ do parseAsInlineLaTeX :: PandocMonad m => Text -> TeXExport -> OrgParser m (Maybe Inlines) parseAsInlineLaTeX cs = \case - TeXExport -> maybeRight <$> runParserT inlineCommand state "" cs + TeXExport -> maybeRight <$> runParserT inlineCommand state "" (toSources cs) TeXIgnore -> return (Just mempty) TeXVerbatim -> return (Just $ B.str cs) diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index 6ed24a602..c7ea02815 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -114,7 +114,7 @@ import Control.Monad (guard) import Control.Monad.Reader (ReaderT) -- | The parser used to read org files. -type OrgParser m = ParserT Text OrgParserState (ReaderT OrgParserLocal m) +type OrgParser m = ParserT Sources OrgParserState (ReaderT OrgParserLocal m) -- -- Adaptions and specializations of parsing utilities diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index ac4c0b6cb..a3fcf028c 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -38,25 +38,24 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Shared import qualified Text.Pandoc.UTF8 as UTF8 -import Text.Printf (printf) import Data.Time.Format -- TODO: -- [ ] .. parsed-literal -- | Parse reStructuredText string and return Pandoc document. -readRST :: PandocMonad m +readRST :: (PandocMonad m, ToSources a) => ReaderOptions -- ^ Reader options - -> Text -- ^ Text to parse (assuming @'\n'@ line endings) + -> a -> m Pandoc readRST opts s = do parsed <- readWithM parseRST def{ stateOptions = opts } - (crFilter s <> "\n\n") + (ensureFinalNewlines 2 (toSources s)) case parsed of Right result -> return result Left e -> throwError e -type RSTParser m = ParserT Text ParserState m +type RSTParser m = ParserT Sources ParserState m -- -- Constants and data structure definitions @@ -151,11 +150,19 @@ parseRST = do startPos <- getPosition -- go through once just to get list of reference keys and notes -- docMinusKeys is the raw document with blanks where the keys were... - docMinusKeys <- T.concat <$> - manyTill (referenceKey <|> anchorDef <|> - noteBlock <|> citationBlock <|> - (snd <$> withRaw comment) <|> - headerBlock <|> lineClump) eof + let chunk = referenceKey + <|> anchorDef + <|> noteBlock + <|> citationBlock + <|> (snd <$> withRaw comment) + <|> headerBlock + <|> lineClump + docMinusKeys <- Sources <$> + manyTill (do pos <- getPosition + t <- chunk + return (pos, t)) eof + -- UGLY: we collapse source position information. + -- TODO: fix the parser to use the F monad instead of two passes setInput docMinusKeys setPosition startPos st' <- getState @@ -348,7 +355,7 @@ singleHeader' = try $ do -- hrule block -- -hrule :: Monad m => ParserT Text st m Blocks +hrule :: Monad m => ParserT Sources st m Blocks hrule = try $ do chr <- oneOf underlineChars count 3 (char chr) @@ -363,7 +370,7 @@ hrule = try $ do -- read a line indented by a given string indentedLine :: (HasReaderOptions st, Monad m) - => Int -> ParserT Text st m Text + => Int -> ParserT Sources st m Text indentedLine indents = try $ do lookAhead spaceChar gobbleAtMostSpaces indents @@ -372,7 +379,7 @@ indentedLine indents = try $ do -- one or more indented lines, possibly separated by blank lines. -- any amount of indentation will work. indentedBlock :: (HasReaderOptions st, Monad m) - => ParserT Text st m Text + => ParserT Sources st m Text indentedBlock = try $ do indents <- length <$> lookAhead (many1 spaceChar) lns <- many1 $ try $ do b <- option "" blanklines @@ -381,20 +388,20 @@ indentedBlock = try $ do optional blanklines return $ T.unlines lns -quotedBlock :: Monad m => ParserT Text st m Text +quotedBlock :: Monad m => ParserT Sources st m Text quotedBlock = try $ do quote <- lookAhead $ oneOf "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" lns <- many1 $ lookAhead (char quote) >> anyLine optional blanklines return $ T.unlines lns -codeBlockStart :: Monad m => ParserT Text st m Char +codeBlockStart :: Monad m => ParserT Sources st m Char codeBlockStart = string "::" >> blankline >> blankline -codeBlock :: Monad m => ParserT Text ParserState m Blocks +codeBlock :: Monad m => ParserT Sources ParserState m Blocks codeBlock = try $ codeBlockStart >> codeBlockBody -codeBlockBody :: Monad m => ParserT Text ParserState m Blocks +codeBlockBody :: Monad m => ParserT Sources ParserState m Blocks codeBlockBody = do lang <- stateRstHighlight <$> getState try $ B.codeBlockWith ("", maybeToList lang, []) . stripTrailingNewlines <$> @@ -410,14 +417,14 @@ lhsCodeBlock = try $ do return $ B.codeBlockWith ("", ["haskell","literate"], []) $ T.intercalate "\n" lns -latexCodeBlock :: Monad m => ParserT Text st m [Text] +latexCodeBlock :: Monad m => ParserT Sources st m [Text] latexCodeBlock = try $ do try (latexBlockLine "\\begin{code}") many1Till anyLine (try $ latexBlockLine "\\end{code}") where latexBlockLine s = skipMany spaceChar >> string s >> blankline -birdCodeBlock :: Monad m => ParserT Text st m [Text] +birdCodeBlock :: Monad m => ParserT Sources st m [Text] birdCodeBlock = filterSpace <$> many1 birdTrackLine where filterSpace lns = -- if (as is normal) there is always a space after >, drop it @@ -425,7 +432,7 @@ birdCodeBlock = filterSpace <$> many1 birdTrackLine then map (T.drop 1) lns else lns -birdTrackLine :: Monad m => ParserT Text st m Text +birdTrackLine :: Monad m => ParserT Sources st m Text birdTrackLine = char '>' >> anyLine -- @@ -456,7 +463,6 @@ includeDirective top fields body = do let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead oldPos <- getPosition - oldInput <- getInput containers <- stateContainers <$> getState when (f `elem` containers) $ throwError $ PandocParseError $ "Include file loop at " <> tshow oldPos @@ -494,15 +500,11 @@ includeDirective top fields body = do Nothing -> case lookup "literal" fields of Just _ -> return $ B.rawBlock "rst" contents' Nothing -> do - setPosition $ newPos (T.unpack f) 1 1 - setInput $ contents' <> "\n" - bs <- optional blanklines >> - (mconcat <$> many block) - setInput oldInput - setPosition oldPos + addToSources (initialPos (T.unpack f)) + (contents' <> "\n") updateState $ \s -> s{ stateContainers = tail $ stateContainers s } - return bs + return mempty -- @@ -526,7 +528,7 @@ definitionList :: PandocMonad m => RSTParser m Blocks definitionList = B.definitionList <$> many1 definitionListItem -- parses bullet list start and returns its length (inc. following whitespace) -bulletListStart :: Monad m => ParserT Text st m Int +bulletListStart :: Monad m => ParserT Sources st m Int bulletListStart = try $ do notFollowedBy' hrule -- because hrules start out just like lists marker <- oneOf bulletListMarkers @@ -1103,7 +1105,7 @@ quotedReferenceName = try $ do -- plus isolated (no two adjacent) internal hyphens, underscores, -- periods, colons and plus signs; no whitespace or other characters -- are allowed. -simpleReferenceName :: Monad m => ParserT Text st m Text +simpleReferenceName :: Monad m => ParserT Sources st m Text simpleReferenceName = do x <- alphaNum xs <- many $ alphaNum @@ -1122,7 +1124,7 @@ referenceKey = do -- return enough blanks to replace key return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n" -targetURI :: Monad m => ParserT Text st m Text +targetURI :: Monad m => ParserT Sources st m Text targetURI = do skipSpaces optional $ try $ newline >> notFollowedBy blankline @@ -1160,8 +1162,10 @@ anonymousKey :: Monad m => RSTParser m () anonymousKey = try $ do oneOfStrings [".. __:", "__"] src <- targetURI - pos <- getPosition - let key = toKey $ "_" <> T.pack (printf "%09d" (sourceLine pos)) + -- we need to ensure that the keys are ordered by occurrence in + -- the document. + numKeys <- M.size . stateKeys <$> getState + let key = toKey $ "_" <> T.pack (show numKeys) updateState $ \s -> s { stateKeys = M.insert key ((src,""), nullAttr) $ stateKeys s } @@ -1250,13 +1254,13 @@ headerBlock = do -- Grid tables TODO: -- - column spans -dashedLine :: Monad m => Char -> ParserT Text st m (Int, Int) +dashedLine :: Monad m => Char -> ParserT Sources st m (Int, Int) dashedLine ch = do dashes <- many1 (char ch) sp <- many (char ' ') return (length dashes, length $ dashes ++ sp) -simpleDashedLines :: Monad m => Char -> ParserT Text st m [(Int,Int)] +simpleDashedLines :: Monad m => Char -> ParserT Sources st m [(Int,Int)] simpleDashedLines ch = try $ many1 (dashedLine ch) -- Parse a table row separator @@ -1382,7 +1386,7 @@ hyphens = do -- don't want to treat endline after hyphen or dash as a space return $ B.str result -escapedChar :: Monad m => ParserT Text st m Inlines +escapedChar :: Monad m => ParserT Sources st m Inlines escapedChar = do c <- escaped anyChar return $ if c == ' ' || c == '\n' || c == '\r' -- '\ ' is null in RST diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs index 509ce1377..47f16ef4b 100644 --- a/src/Text/Pandoc/Readers/Roff.hs +++ b/src/Text/Pandoc/Readers/Roff.hs @@ -42,7 +42,6 @@ import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Shared (safeRead) -import Text.Parsec hiding (tokenPrim) import Text.Pandoc.RoffChar (characterCodes, combiningAccents) import qualified Data.Sequence as Seq import qualified Data.Foldable as Foldable @@ -122,16 +121,16 @@ instance Default RoffState where , afterConditional = False } -type RoffLexer m = ParserT T.Text RoffState m +type RoffLexer m = ParserT Sources RoffState m -- -- Lexer: T.Text -> RoffToken -- -eofline :: Stream s m Char => ParsecT s u m () +eofline :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m () eofline = void newline <|> eof <|> () <$ lookAhead (string "\\}") -spacetab :: Stream s m Char => ParsecT s u m Char +spacetab :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s u m Char spacetab = char ' ' <|> char '\t' characterCodeMap :: M.Map T.Text Char @@ -303,8 +302,7 @@ expandString = try $ do char '*' cs <- escapeArg <|> countChar 1 anyChar s <- linePartsToText <$> resolveText cs pos - getInput >>= setInput . (s <>) - return () + addToInput s -- Parses: '..' quoteArg :: PandocMonad m => RoffLexer m T.Text @@ -316,7 +314,7 @@ escFont = do font' <- if T.null font || font == "P" then prevFont <$> getState else return $ foldr processFontLetter defaultFontSpec $ T.unpack font - modifyState $ \st -> st{ prevFont = currentFont st + updateState $ \st -> st{ prevFont = currentFont st , currentFont = font' } return [Font font'] where @@ -372,8 +370,8 @@ lexTable pos = do spaces opts <- try tableOptions <|> [] <$ optional (char ';') case lookup "tab" opts of - Just (T.uncons -> Just (c, _)) -> modifyState $ \st -> st{ tableTabChar = c } - _ -> modifyState $ \st -> st{ tableTabChar = '\t' } + Just (T.uncons -> Just (c, _)) -> updateState $ \st -> st{ tableTabChar = c } + _ -> updateState $ \st -> st{ tableTabChar = '\t' } spaces skipMany lexComment spaces @@ -489,18 +487,18 @@ lexConditional mname = do ifPart <- do optional $ try $ char '\\' >> newline lexGroup - <|> do modifyState $ \s -> s{ afterConditional = True } + <|> do updateState $ \s -> s{ afterConditional = True } t <- manToken - modifyState $ \s -> s{ afterConditional = False } + updateState $ \s -> s{ afterConditional = False } return t case mbtest of Nothing -> do - putState st -- reset state, so we don't record macros in skipped section + setState st -- reset state, so we don't record macros in skipped section report $ SkippedContent (T.cons '.' mname) pos return mempty Just True -> return ifPart Just False -> do - putState st + setState st return mempty expression :: PandocMonad m => RoffLexer m (Maybe Bool) @@ -515,7 +513,7 @@ expression = do _ -> Nothing where returnValue v = do - modifyState $ \st -> st{ lastExpression = v } + updateState $ \st -> st{ lastExpression = v } return v lexGroup :: PandocMonad m => RoffLexer m RoffTokens @@ -536,7 +534,7 @@ lexIncludeFile args = do result <- readFileFromDirs dirs $ T.unpack fp case result of Nothing -> report $ CouldNotLoadIncludeFile fp pos - Just s -> getInput >>= setInput . (s <>) + Just s -> addToInput s return mempty [] -> return mempty @@ -564,13 +562,13 @@ lexStringDef args = do -- string definition (x:ys) -> do let ts = singleTok $ TextLine (intercalate [RoffStr " " ] ys) let stringName = linePartsToText x - modifyState $ \st -> + updateState $ \st -> st{ customMacros = M.insert stringName ts (customMacros st) } return mempty lexMacroDef :: PandocMonad m => [Arg] -> RoffLexer m RoffTokens lexMacroDef args = do -- macro definition - modifyState $ \st -> st{ roffMode = CopyMode } + updateState $ \st -> st{ roffMode = CopyMode } (macroName, stopMacro) <- case args of (x : y : _) -> return (linePartsToText x, linePartsToText y) @@ -584,7 +582,7 @@ lexMacroDef args = do -- macro definition _ <- lexArgs return () ts <- mconcat <$> manyTill manToken stop - modifyState $ \st -> + updateState $ \st -> st{ customMacros = M.insert macroName ts (customMacros st) , roffMode = NormalMode } return mempty diff --git a/src/Text/Pandoc/Readers/TWiki.hs b/src/Text/Pandoc/Readers/TWiki.hs index c4d7bcc93..276d28aaa 100644 --- a/src/Text/Pandoc/Readers/TWiki.hs +++ b/src/Text/Pandoc/Readers/TWiki.hs @@ -28,22 +28,22 @@ import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed, nested) import Text.Pandoc.Readers.HTML (htmlTag, isCommentTag) -import Text.Pandoc.Shared (crFilter, tshow) +import Text.Pandoc.Shared (tshow) import Text.Pandoc.XML (fromEntities) -- | Read twiki from an input string and return a Pandoc document. -readTWiki :: PandocMonad m +readTWiki :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text + -> a -> m Pandoc readTWiki opts s = do - res <- readWithM parseTWiki def{ stateOptions = opts } - (crFilter s <> "\n\n") + let sources = ensureFinalNewlines 2 (toSources s) + res <- readWithM parseTWiki def{ stateOptions = opts } sources case res of Left e -> throwError e Right d -> return d -type TWParser = ParserT Text ParserState +type TWParser = ParserT Sources ParserState -- -- utility functions diff --git a/src/Text/Pandoc/Readers/Textile.hs b/src/Text/Pandoc/Readers/Textile.hs index 8d7900de4..981878206 100644 --- a/src/Text/Pandoc/Readers/Textile.hs +++ b/src/Text/Pandoc/Readers/Textile.hs @@ -53,30 +53,34 @@ import Text.Pandoc.Options import Text.Pandoc.Parsing import Text.Pandoc.Readers.HTML (htmlTag, isBlockTag, isInlineTag) import Text.Pandoc.Readers.LaTeX (rawLaTeXBlock, rawLaTeXInline) -import Text.Pandoc.Shared (crFilter, trim, tshow) +import Text.Pandoc.Shared (trim, tshow) -- | Parse a Textile text and return a Pandoc document. -readTextile :: PandocMonad m +readTextile :: (PandocMonad m, ToSources a) => ReaderOptions -- ^ Reader options - -> Text -- ^ String to parse (assuming @'\n'@ line endings) + -> a -> m Pandoc readTextile opts s = do - parsed <- readWithM parseTextile def{ stateOptions = opts } - (crFilter s <> "\n\n") + let sources = ensureFinalNewlines 2 (toSources s) + parsed <- readWithM parseTextile def{ stateOptions = opts } sources case parsed of Right result -> return result Left e -> throwError e +type TextileParser = ParserT Sources ParserState -- | Generate a Pandoc ADT from a textile document -parseTextile :: PandocMonad m => ParserT Text ParserState m Pandoc +parseTextile :: PandocMonad m => TextileParser m Pandoc parseTextile = do many blankline startPos <- getPosition -- go through once just to get list of reference keys and notes -- docMinusKeys is the raw document with blanks where the keys/notes were... - let firstPassParser = noteBlock <|> lineClump - manyTill firstPassParser eof >>= setInput . T.concat + let firstPassParser = do + pos <- getPosition + t <- noteBlock <|> lineClump + return (pos, t) + manyTill firstPassParser eof >>= setInput . Sources setPosition startPos st' <- getState let reversedNotes = stateNotes st' @@ -84,10 +88,10 @@ parseTextile = do -- now parse it for real... Pandoc nullMeta . B.toList <$> parseBlocks -- FIXME -noteMarker :: PandocMonad m => ParserT Text ParserState m Text +noteMarker :: PandocMonad m => TextileParser m Text noteMarker = skipMany spaceChar >> string "fn" >> T.pack <$> manyTill digit (char '.') -noteBlock :: PandocMonad m => ParserT Text ParserState m Text +noteBlock :: PandocMonad m => TextileParser m Text noteBlock = try $ do startPos <- getPosition ref <- noteMarker @@ -102,11 +106,11 @@ noteBlock = try $ do return $ T.replicate (sourceLine endPos - sourceLine startPos) "\n" -- | Parse document blocks -parseBlocks :: PandocMonad m => ParserT Text ParserState m Blocks +parseBlocks :: PandocMonad m => TextileParser m Blocks parseBlocks = mconcat <$> manyTill block eof -- | Block parsers list tried in definition order -blockParsers :: PandocMonad m => [ParserT Text ParserState m Blocks] +blockParsers :: PandocMonad m => [TextileParser m Blocks] blockParsers = [ codeBlock , header , blockQuote @@ -121,22 +125,22 @@ blockParsers = [ codeBlock ] -- | Any block in the order of definition of blockParsers -block :: PandocMonad m => ParserT Text ParserState m Blocks +block :: PandocMonad m => TextileParser m Blocks block = do res <- choice blockParsers <?> "block" trace (T.take 60 $ tshow $ B.toList res) return res -commentBlock :: PandocMonad m => ParserT Text ParserState m Blocks +commentBlock :: PandocMonad m => TextileParser m Blocks commentBlock = try $ do string "###." manyTill anyLine blanklines return mempty -codeBlock :: PandocMonad m => ParserT Text ParserState m Blocks +codeBlock :: PandocMonad m => TextileParser m Blocks codeBlock = codeBlockTextile <|> codeBlockHtml -codeBlockTextile :: PandocMonad m => ParserT Text ParserState m Blocks +codeBlockTextile :: PandocMonad m => TextileParser m Blocks codeBlockTextile = try $ do string "bc." <|> string "pre." extended <- option False (True <$ char '.') @@ -156,7 +160,7 @@ trimTrailingNewlines :: Text -> Text trimTrailingNewlines = T.dropWhileEnd (=='\n') -- | Code Blocks in Textile are between <pre> and </pre> -codeBlockHtml :: PandocMonad m => ParserT Text ParserState m Blocks +codeBlockHtml :: PandocMonad m => TextileParser m Blocks codeBlockHtml = try $ do (t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True)) result' <- T.pack <$> manyTill anyChar (htmlTag (tagClose (=="pre"))) @@ -174,7 +178,7 @@ codeBlockHtml = try $ do return $ B.codeBlockWith (ident,classes,kvs) result''' -- | Header of the form "hN. content" with N in 1..6 -header :: PandocMonad m => ParserT Text ParserState m Blocks +header :: PandocMonad m => TextileParser m Blocks header = try $ do char 'h' level <- digitToInt <$> oneOf "123456" @@ -186,14 +190,14 @@ header = try $ do return $ B.headerWith attr' level name -- | Blockquote of the form "bq. content" -blockQuote :: PandocMonad m => ParserT Text ParserState m Blocks +blockQuote :: PandocMonad m => TextileParser m Blocks blockQuote = try $ do string "bq" >> attributes >> char '.' >> whitespace B.blockQuote <$> para -- Horizontal rule -hrule :: PandocMonad m => ParserT Text st m Blocks +hrule :: PandocMonad m => TextileParser m Blocks hrule = try $ do skipSpaces start <- oneOf "-*" @@ -208,39 +212,39 @@ hrule = try $ do -- | Can be a bullet list or an ordered list. This implementation is -- strict in the nesting, sublist must start at exactly "parent depth -- plus one" -anyList :: PandocMonad m => ParserT Text ParserState m Blocks +anyList :: PandocMonad m => TextileParser m Blocks anyList = try $ anyListAtDepth 1 <* blanklines -- | This allow one type of list to be nested into an other type, -- provided correct nesting -anyListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks +anyListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks anyListAtDepth depth = choice [ bulletListAtDepth depth, orderedListAtDepth depth, definitionList ] -- | Bullet List of given depth, depth being the number of leading '*' -bulletListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks +bulletListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth depth) -- | Bullet List Item of given depth, depth being the number of -- leading '*' -bulletListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks +bulletListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks bulletListItemAtDepth = genericListItemAtDepth '*' -- | Ordered List of given depth, depth being the number of -- leading '#' -orderedListAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks +orderedListAtDepth :: PandocMonad m => Int -> TextileParser m Blocks orderedListAtDepth depth = try $ do items <- many1 (orderedListItemAtDepth depth) return $ B.orderedList items -- | Ordered List Item of given depth, depth being the number of -- leading '#' -orderedListItemAtDepth :: PandocMonad m => Int -> ParserT Text ParserState m Blocks +orderedListItemAtDepth :: PandocMonad m => Int -> TextileParser m Blocks orderedListItemAtDepth = genericListItemAtDepth '#' -- | Common implementation of list items -genericListItemAtDepth :: PandocMonad m => Char -> Int -> ParserT Text ParserState m Blocks +genericListItemAtDepth :: PandocMonad m => Char -> Int -> TextileParser m Blocks genericListItemAtDepth c depth = try $ do count depth (char c) >> attributes >> whitespace contents <- mconcat <$> many ((B.plain . mconcat <$> many1 inline) <|> @@ -250,25 +254,25 @@ genericListItemAtDepth c depth = try $ do return $ contents <> sublist -- | A definition list is a set of consecutive definition items -definitionList :: PandocMonad m => ParserT Text ParserState m Blocks +definitionList :: PandocMonad m => TextileParser m Blocks definitionList = try $ B.definitionList <$> many1 definitionListItem -- | List start character. -listStart :: PandocMonad m => ParserT Text ParserState m () +listStart :: PandocMonad m => TextileParser m () listStart = genericListStart '*' <|> () <$ genericListStart '#' <|> () <$ definitionListStart -genericListStart :: PandocMonad m => Char -> ParserT Text st m () +genericListStart :: PandocMonad m => Char -> TextileParser m () genericListStart c = () <$ try (many1 (char c) >> whitespace) -basicDLStart :: PandocMonad m => ParserT Text ParserState m () +basicDLStart :: PandocMonad m => TextileParser m () basicDLStart = do char '-' whitespace notFollowedBy newline -definitionListStart :: PandocMonad m => ParserT Text ParserState m Inlines +definitionListStart :: PandocMonad m => TextileParser m Inlines definitionListStart = try $ do basicDLStart trimInlines . mconcat <$> @@ -281,15 +285,15 @@ definitionListStart = try $ do -- the term defined, then spaces and ":=". The definition follows, on -- the same single line, or spaned on multiple line, after a line -- break. -definitionListItem :: PandocMonad m => ParserT Text ParserState m (Inlines, [Blocks]) +definitionListItem :: PandocMonad m => TextileParser m (Inlines, [Blocks]) definitionListItem = try $ do term <- mconcat . intersperse B.linebreak <$> many1 definitionListStart def' <- string ":=" *> optional whitespace *> (multilineDef <|> inlineDef) return (term, def') - where inlineDef :: PandocMonad m => ParserT Text ParserState m [Blocks] + where inlineDef :: PandocMonad m => TextileParser m [Blocks] inlineDef = liftM (\d -> [B.plain d]) $ optional whitespace >> (trimInlines . mconcat <$> many inline) <* newline - multilineDef :: PandocMonad m => ParserT Text ParserState m [Blocks] + multilineDef :: PandocMonad m => TextileParser m [Blocks] multilineDef = try $ do optional whitespace >> newline s <- T.pack <$> many1Till anyChar (try (string "=:" >> newline)) @@ -300,7 +304,7 @@ definitionListItem = try $ do -- raw content -- | A raw Html Block, optionally followed by blanklines -rawHtmlBlock :: PandocMonad m => ParserT Text ParserState m Blocks +rawHtmlBlock :: PandocMonad m => TextileParser m Blocks rawHtmlBlock = try $ do skipMany spaceChar (_,b) <- htmlTag isBlockTag @@ -308,14 +312,14 @@ rawHtmlBlock = try $ do return $ B.rawBlock "html" b -- | Raw block of LaTeX content -rawLaTeXBlock' :: PandocMonad m => ParserT Text ParserState m Blocks +rawLaTeXBlock' :: PandocMonad m => TextileParser m Blocks rawLaTeXBlock' = do guardEnabled Ext_raw_tex B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces) -- | In textile, paragraphs are separated by blank lines. -para :: PandocMonad m => ParserT Text ParserState m Blocks +para :: PandocMonad m => TextileParser m Blocks para = B.para . trimInlines . mconcat <$> many1 inline -- Tables @@ -326,7 +330,7 @@ toAlignment '>' = AlignRight toAlignment '=' = AlignCenter toAlignment _ = AlignDefault -cellAttributes :: PandocMonad m => ParserT Text ParserState m (Bool, Alignment) +cellAttributes :: PandocMonad m => TextileParser m (Bool, Alignment) cellAttributes = try $ do isHeader <- option False (True <$ char '_') -- we just ignore colspan and rowspan markers: @@ -339,7 +343,7 @@ cellAttributes = try $ do return (isHeader, alignment) -- | A table cell spans until a pipe | -tableCell :: PandocMonad m => ParserT Text ParserState m ((Bool, Alignment), Blocks) +tableCell :: PandocMonad m => TextileParser m ((Bool, Alignment), Blocks) tableCell = try $ do char '|' (isHeader, alignment) <- option (False, AlignDefault) cellAttributes @@ -350,7 +354,7 @@ tableCell = try $ do return ((isHeader, alignment), B.plain content) -- | A table row is made of many table cells -tableRow :: PandocMonad m => ParserT Text ParserState m [((Bool, Alignment), Blocks)] +tableRow :: PandocMonad m => TextileParser m [((Bool, Alignment), Blocks)] tableRow = try $ do -- skip optional row attributes optional $ try $ do @@ -360,7 +364,7 @@ tableRow = try $ do many1 tableCell <* char '|' <* blankline -- | A table with an optional header. -table :: PandocMonad m => ParserT Text ParserState m Blocks +table :: PandocMonad m => TextileParser m Blocks table = try $ do -- ignore table attributes caption <- option mempty $ try $ do @@ -388,7 +392,7 @@ table = try $ do (TableFoot nullAttr []) -- | Ignore markers for cols, thead, tfoot. -ignorableRow :: PandocMonad m => ParserT Text ParserState m () +ignorableRow :: PandocMonad m => TextileParser m () ignorableRow = try $ do char '|' oneOf ":^-~" @@ -397,7 +401,7 @@ ignorableRow = try $ do _ <- anyLine return () -explicitBlockStart :: PandocMonad m => Text -> ParserT Text ParserState m () +explicitBlockStart :: PandocMonad m => Text -> TextileParser m () explicitBlockStart name = try $ do string (T.unpack name) attributes @@ -409,8 +413,8 @@ explicitBlockStart name = try $ do -- However, they can be used to set HTML/CSS attributes when needed. maybeExplicitBlock :: PandocMonad m => Text -- ^ block tag name - -> ParserT Text ParserState m Blocks -- ^ implicit block - -> ParserT Text ParserState m Blocks + -> TextileParser m Blocks -- ^ implicit block + -> TextileParser m Blocks maybeExplicitBlock name blk = try $ do optional $ explicitBlockStart name blk @@ -423,11 +427,11 @@ maybeExplicitBlock name blk = try $ do -- | Any inline element -inline :: PandocMonad m => ParserT Text ParserState m Inlines +inline :: PandocMonad m => TextileParser m Inlines inline = choice inlineParsers <?> "inline" -- | Inline parsers tried in order -inlineParsers :: PandocMonad m => [ParserT Text ParserState m Inlines] +inlineParsers :: PandocMonad m => [TextileParser m Inlines] inlineParsers = [ str , whitespace , endline @@ -447,7 +451,7 @@ inlineParsers = [ str ] -- | Inline markups -inlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines +inlineMarkup :: PandocMonad m => TextileParser m Inlines inlineMarkup = choice [ simpleInline (string "??") (B.cite []) , simpleInline (string "**") B.strong , simpleInline (string "__") B.emph @@ -461,29 +465,29 @@ inlineMarkup = choice [ simpleInline (string "??") (B.cite []) ] -- | Trademark, registered, copyright -mark :: PandocMonad m => ParserT Text st m Inlines +mark :: PandocMonad m => TextileParser m Inlines mark = try $ char '(' >> (try tm <|> try reg <|> copy) -reg :: PandocMonad m => ParserT Text st m Inlines +reg :: PandocMonad m => TextileParser m Inlines reg = do oneOf "Rr" char ')' return $ B.str "\174" -tm :: PandocMonad m => ParserT Text st m Inlines +tm :: PandocMonad m => TextileParser m Inlines tm = do oneOf "Tt" oneOf "Mm" char ')' return $ B.str "\8482" -copy :: PandocMonad m => ParserT Text st m Inlines +copy :: PandocMonad m => TextileParser m Inlines copy = do oneOf "Cc" char ')' return $ B.str "\169" -note :: PandocMonad m => ParserT Text ParserState m Inlines +note :: PandocMonad m => TextileParser m Inlines note = try $ do ref <- char '[' *> many1 digit <* char ']' notes <- stateNotes <$> getState @@ -507,13 +511,13 @@ wordBoundaries :: [Char] wordBoundaries = markupChars <> stringBreakers -- | Parse a hyphened sequence of words -hyphenedWords :: PandocMonad m => ParserT Text ParserState m Text +hyphenedWords :: PandocMonad m => TextileParser m Text hyphenedWords = do x <- wordChunk xs <- many (try $ char '-' >> wordChunk) return $ T.intercalate "-" (x:xs) -wordChunk :: PandocMonad m => ParserT Text ParserState m Text +wordChunk :: PandocMonad m => TextileParser m Text wordChunk = try $ do hd <- noneOf wordBoundaries tl <- many ( noneOf wordBoundaries <|> @@ -522,7 +526,7 @@ wordChunk = try $ do return $ T.pack $ hd:tl -- | Any string -str :: PandocMonad m => ParserT Text ParserState m Inlines +str :: PandocMonad m => TextileParser m Inlines str = do baseStr <- hyphenedWords -- RedCloth compliance : if parsed word is uppercase and immediately @@ -535,11 +539,11 @@ str = do return $ B.str fullStr -- | Some number of space chars -whitespace :: PandocMonad m => ParserT Text st m Inlines +whitespace :: PandocMonad m => TextileParser m Inlines whitespace = many1 spaceChar >> return B.space <?> "whitespace" -- | In Textile, an isolated endline character is a line break -endline :: PandocMonad m => ParserT Text ParserState m Inlines +endline :: PandocMonad m => TextileParser m Inlines endline = try $ do newline notFollowedBy blankline @@ -547,18 +551,18 @@ endline = try $ do notFollowedBy rawHtmlBlock return B.linebreak -rawHtmlInline :: PandocMonad m => ParserT Text ParserState m Inlines +rawHtmlInline :: PandocMonad m => TextileParser m Inlines rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag -- | Raw LaTeX Inline -rawLaTeXInline' :: PandocMonad m => ParserT Text ParserState m Inlines +rawLaTeXInline' :: PandocMonad m => TextileParser m Inlines rawLaTeXInline' = try $ do guardEnabled Ext_raw_tex B.rawInline "latex" <$> rawLaTeXInline -- | Textile standard link syntax is "label":target. But we -- can also have ["label":target]. -link :: PandocMonad m => ParserT Text ParserState m Inlines +link :: PandocMonad m => TextileParser m Inlines link = try $ do bracketed <- (True <$ char '[') <|> return False char '"' *> notFollowedBy (oneOf " \t\n\r") @@ -578,7 +582,7 @@ link = try $ do else B.spanWith attr $ B.link url "" name' -- | image embedding -image :: PandocMonad m => ParserT Text ParserState m Inlines +image :: PandocMonad m => TextileParser m Inlines image = try $ do char '!' >> notFollowedBy space (ident, cls, kvs) <- attributes @@ -590,51 +594,51 @@ image = try $ do char '!' return $ B.imageWith attr src alt (B.str alt) -escapedInline :: PandocMonad m => ParserT Text ParserState m Inlines +escapedInline :: PandocMonad m => TextileParser m Inlines escapedInline = escapedEqs <|> escapedTag -escapedEqs :: PandocMonad m => ParserT Text ParserState m Inlines +escapedEqs :: PandocMonad m => TextileParser m Inlines escapedEqs = B.str . T.pack <$> try (string "==" *> manyTill anyChar' (try $ string "==")) -- | literal text escaped btw <notextile> tags -escapedTag :: PandocMonad m => ParserT Text ParserState m Inlines +escapedTag :: PandocMonad m => TextileParser m Inlines escapedTag = B.str . T.pack <$> try (string "<notextile>" *> manyTill anyChar' (try $ string "</notextile>")) -- | Any special symbol defined in wordBoundaries -symbol :: PandocMonad m => ParserT Text ParserState m Inlines +symbol :: PandocMonad m => TextileParser m Inlines symbol = B.str . T.singleton <$> (notFollowedBy newline *> notFollowedBy rawHtmlBlock *> oneOf wordBoundaries) -- | Inline code -code :: PandocMonad m => ParserT Text ParserState m Inlines +code :: PandocMonad m => TextileParser m Inlines code = code1 <|> code2 -- any character except a newline before a blank line -anyChar' :: PandocMonad m => ParserT Text ParserState m Char +anyChar' :: PandocMonad m => TextileParser m Char anyChar' = satisfy (/='\n') <|> try (char '\n' <* notFollowedBy blankline) -code1 :: PandocMonad m => ParserT Text ParserState m Inlines +code1 :: PandocMonad m => TextileParser m Inlines code1 = B.code . T.pack <$> surrounded (char '@') anyChar' -code2 :: PandocMonad m => ParserT Text ParserState m Inlines +code2 :: PandocMonad m => TextileParser m Inlines code2 = do htmlTag (tagOpen (=="tt") null) B.code . T.pack <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt")) -- | Html / CSS attributes -attributes :: PandocMonad m => ParserT Text ParserState m Attr +attributes :: PandocMonad m => TextileParser m Attr attributes = foldl' (flip ($)) ("",[],[]) <$> try (do special <- option id specialAttribute attrs <- many attribute return (special : attrs)) -specialAttribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) +specialAttribute :: PandocMonad m => TextileParser m (Attr -> Attr) specialAttribute = do alignStr <- ("center" <$ char '=') <|> ("justify" <$ try (string "<>")) <|> @@ -643,11 +647,11 @@ specialAttribute = do notFollowedBy spaceChar return $ addStyle $ T.pack $ "text-align:" ++ alignStr -attribute :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) +attribute :: PandocMonad m => TextileParser m (Attr -> Attr) attribute = try $ (classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar -classIdAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) +classIdAttr :: PandocMonad m => TextileParser m (Attr -> Attr) classIdAttr = try $ do -- (class class #id) char '(' ws <- T.words `fmap` T.pack <$> manyTill anyChar' (char ')') @@ -659,7 +663,7 @@ classIdAttr = try $ do -- (class class #id) classes' -> return $ \(_,_,keyvals) -> ("",classes',keyvals) -styleAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) +styleAttr :: PandocMonad m => TextileParser m (Attr -> Attr) styleAttr = do style <- try $ enclosed (char '{') (char '}') anyChar' return $ addStyle $ T.pack style @@ -670,23 +674,23 @@ addStyle style (id',classes,keyvals) = where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"] style' = style <> ";" <> T.concat [v | ("style",v) <- keyvals] -langAttr :: PandocMonad m => ParserT Text ParserState m (Attr -> Attr) +langAttr :: PandocMonad m => TextileParser m (Attr -> Attr) langAttr = do lang <- try $ enclosed (char '[') (char ']') alphaNum return $ \(id',classes,keyvals) -> (id',classes,("lang",T.pack lang):keyvals) -- | Parses material surrounded by a parser. surrounded :: (PandocMonad m, Show t) - => ParserT Text st m t -- ^ surrounding parser - -> ParserT Text st m a -- ^ content parser (to be used repeatedly) - -> ParserT Text st m [a] + => ParserT Sources st m t -- ^ surrounding parser + -> ParserT Sources st m a -- ^ content parser (to be used repeatedly) + -> ParserT Sources st m [a] surrounded border = enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border) simpleInline :: PandocMonad m - => ParserT Text ParserState m t -- ^ surrounding parser + => TextileParser m t -- ^ surrounding parser -> (Inlines -> Inlines) -- ^ Inline constructor - -> ParserT Text ParserState m Inlines -- ^ content parser (to be used repeatedly) + -> TextileParser m Inlines -- ^ content parser (to be used repeatedly) simpleInline border construct = try $ do notAfterString border *> notFollowedBy (oneOf " \t\n\r") @@ -700,7 +704,7 @@ simpleInline border construct = try $ do then body else B.spanWith attr body -groupedInlineMarkup :: PandocMonad m => ParserT Text ParserState m Inlines +groupedInlineMarkup :: PandocMonad m => TextileParser m Inlines groupedInlineMarkup = try $ do char '[' sp1 <- option mempty $ B.space <$ whitespace @@ -709,5 +713,5 @@ groupedInlineMarkup = try $ do char ']' return $ sp1 <> result <> sp2 -eof' :: Monad m => ParserT Text s m Char +eof' :: Monad m => ParserT Sources s m Char eof' = '\n' <$ eof diff --git a/src/Text/Pandoc/Readers/TikiWiki.hs b/src/Text/Pandoc/Readers/TikiWiki.hs index fb4b662c5..5c414fdec 100644 --- a/src/Text/Pandoc/Readers/TikiWiki.hs +++ b/src/Text/Pandoc/Readers/TikiWiki.hs @@ -30,23 +30,23 @@ import Text.Pandoc.Definition import Text.Pandoc.Logging (Verbosity (..)) import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (enclosed, nested) -import Text.Pandoc.Shared (crFilter, safeRead) +import Text.Pandoc.Shared (safeRead) import Text.Pandoc.XML (fromEntities) import Text.Printf (printf) -- | Read TikiWiki from an input string and return a Pandoc document. -readTikiWiki :: PandocMonad m +readTikiWiki :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text + -> a -> m Pandoc readTikiWiki opts s = do - res <- readWithM parseTikiWiki def{ stateOptions = opts } - (crFilter s <> "\n\n") + let sources = ensureFinalNewlines 2 (toSources s) + res <- readWithM parseTikiWiki def{ stateOptions = opts } sources case res of Left e -> throwError e Right d -> return d -type TikiWikiParser = ParserT Text ParserState +type TikiWikiParser = ParserT Sources ParserState -- -- utility functions diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index d355a4b55..6f92f0063 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -33,9 +33,9 @@ import Data.Time (defaultTimeLocale) import Text.Pandoc.Definition import Text.Pandoc.Options import Text.Pandoc.Parsing hiding (space, spaces, uri) -import Text.Pandoc.Shared (compactify, compactifyDL, crFilter, escapeURI) +import Text.Pandoc.Shared (compactify, compactifyDL, escapeURI) -type T2T = ParserT Text ParserState (Reader T2TMeta) +type T2T = ParserT Sources ParserState (Reader T2TMeta) -- | An object for the T2T macros meta information -- the contents of each field is simply substituted verbatim into the file @@ -68,15 +68,15 @@ getT2TMeta = do (intercalate ", " inps) outp -- | Read Txt2Tags from an input string returning a Pandoc document -readTxt2Tags :: PandocMonad m +readTxt2Tags :: (PandocMonad m, ToSources a) => ReaderOptions - -> Text + -> a -> m Pandoc readTxt2Tags opts s = do + let sources = ensureFinalNewlines 2 (toSources s) meta <- getT2TMeta let parsed = flip runReader meta $ - readWithM parseT2T (def {stateOptions = opts}) $ - crFilter s <> "\n\n" + readWithM parseT2T (def {stateOptions = opts}) sources case parsed of Right result -> return result Left e -> throwError e diff --git a/src/Text/Pandoc/Readers/Vimwiki.hs b/src/Text/Pandoc/Readers/Vimwiki.hs index 74dac5ea7..460f304c4 100644 --- a/src/Text/Pandoc/Readers/Vimwiki.hs +++ b/src/Text/Pandoc/Readers/Vimwiki.hs @@ -74,23 +74,28 @@ import Text.Pandoc.Parsing (ParserState, ParserT, blanklines, emailAddress, many1Till, orderedListMarker, readWithM, registerHeader, spaceChar, stateMeta, stateOptions, uri, manyTillChar, manyChar, textStr, - many1Char, countChar, many1TillChar) -import Text.Pandoc.Shared (crFilter, splitTextBy, stringify, stripFirstAndLast, + many1Char, countChar, many1TillChar, + alphaNum, anyChar, char, newline, noneOf, oneOf, + space, spaces, string) +import Text.Pandoc.Sources (ToSources(..), Sources) +import Text.Pandoc.Shared (splitTextBy, stringify, stripFirstAndLast, isURI, tshow) -import Text.Parsec.Char (alphaNum, anyChar, char, newline, noneOf, oneOf, space, - spaces, string) import Text.Parsec.Combinator (between, choice, eof, lookAhead, many1, manyTill, notFollowedBy, option, skipMany1) import Text.Parsec.Prim (getState, many, try, updateState, (<|>)) -readVimwiki :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readVimwiki :: (PandocMonad m, ToSources a) + => ReaderOptions + -> a + -> m Pandoc readVimwiki opts s = do - res <- readWithM parseVimwiki def{ stateOptions = opts } $ crFilter s + let sources = toSources s + res <- readWithM parseVimwiki def{ stateOptions = opts } sources case res of Left e -> throwError e Right result -> return result -type VwParser = ParserT Text ParserState +type VwParser = ParserT Sources ParserState -- constants diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index e389c1727..920edca7b 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -298,6 +298,7 @@ tabFilter tabStop = T.unlines . map go . T.lines (tabStop - (T.length s1 `mod` tabStop)) (T.pack " ") <> go (T.drop 1 s2) +{-# DEPRECATED crFilter "readers filter crs automatically" #-} -- | Strip out DOS line endings. crFilter :: T.Text -> T.Text crFilter = T.filter (/= '\r') diff --git a/src/Text/Pandoc/Sources.hs b/src/Text/Pandoc/Sources.hs new file mode 100644 index 000000000..5511ccfb8 --- /dev/null +++ b/src/Text/Pandoc/Sources.hs @@ -0,0 +1,195 @@ +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# LANGUAGE OverloadedStrings #-} +{- | + Module : Text.Pandoc.Sources + Copyright : Copyright (C) 2021 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane <jgm@berkeley.edu> + Stability : alpha + Portability : portable + +Defines Sources object to be used as input to pandoc parsers and redefines Char +parsers so they get source position information from it. +-} + +module Text.Pandoc.Sources + ( Sources(..) + , ToSources(..) + , UpdateSourcePos(..) + , sourcesToText + , initialSourceName + , addToSources + , ensureFinalNewlines + , addToInput + , satisfy + , oneOf + , noneOf + , anyChar + , char + , string + , newline + , space + , spaces + , letter + , digit + , hexDigit + , alphaNum + ) +where +import qualified Text.Parsec as P +import Text.Parsec (Stream(..), ParsecT) +import Text.Parsec.Pos as P +import Data.Text (Text) +import qualified Data.Text as T +import Data.Char (isSpace, isLetter, isAlphaNum, isDigit, isHexDigit) +import Data.String (IsString(..)) +import qualified Data.List.NonEmpty as NonEmpty + +-- | A list of inputs labeled with source positions. It is assumed +-- that the 'Text's have @\n@ line endings. +newtype Sources = Sources { unSources :: [(SourcePos, Text)] } + deriving (Show, Semigroup, Monoid) + +instance Monad m => Stream Sources m Char where + uncons (Sources []) = return Nothing + uncons (Sources ((pos,t):rest)) = + case T.uncons t of + Nothing -> uncons (Sources rest) + Just (c,t') -> return $ Just (c, Sources ((pos,t'):rest)) + +instance IsString Sources where + fromString s = Sources [(P.initialPos "", T.pack (filter (/='\r') s))] + +class ToSources a where + toSources :: a -> Sources + +instance ToSources Text where + toSources t = Sources [(P.initialPos "", T.filter (/='\r') t)] + +instance ToSources [(FilePath, Text)] where + toSources = Sources + . map (\(fp,t) -> + (P.initialPos fp, T.snoc (T.filter (/='\r') t) '\n')) + +instance ToSources Sources where + toSources = id + +sourcesToText :: Sources -> Text +sourcesToText (Sources xs) = mconcat $ map snd xs + +addToSources :: Monad m => SourcePos -> Text -> ParsecT Sources u m () +addToSources pos t = do + curpos <- P.getPosition + Sources xs <- P.getInput + let xs' = case xs of + [] -> [] + ((_,t'):rest) -> (curpos,t'):rest + P.setInput $ Sources ((pos, T.filter (/='\r') t):xs') + +ensureFinalNewlines :: Int -- ^ number of trailing newlines + -> Sources + -> Sources +ensureFinalNewlines n (Sources xs) = + case NonEmpty.nonEmpty xs of + Nothing -> Sources [(initialPos "", T.replicate n "\n")] + Just lst -> + case NonEmpty.last lst of + (spos, t) -> + case T.length (T.takeWhileEnd (=='\n') t) of + len | len >= n -> Sources xs + | otherwise -> Sources (NonEmpty.init lst ++ + [(spos, + t <> T.replicate (n - len) "\n")]) + +class UpdateSourcePos s c where + updateSourcePos :: SourcePos -> c -> s -> SourcePos + +instance UpdateSourcePos Text Char where + updateSourcePos pos c _ = updatePosChar pos c + +instance UpdateSourcePos Sources Char where + updateSourcePos pos c sources = + case sources of + Sources [] -> updatePosChar pos c + Sources ((_,t):(pos',_):_) + | T.null t -> pos' + Sources _ -> + case c of + '\n' -> incSourceLine (setSourceColumn pos 1) 1 + '\t' -> incSourceColumn pos (4 - ((sourceColumn pos - 1) `mod` 4)) + _ -> incSourceColumn pos 1 + +-- | Get name of first source in 'Sources'. +initialSourceName :: Sources -> FilePath +initialSourceName (Sources []) = "" +initialSourceName (Sources ((pos,_):_)) = sourceName pos + +-- | Add some text to the beginning of the input sources. +-- This simplifies code that expands macros. +addToInput :: Monad m => Text -> ParsecT Sources u m () +addToInput t = do + Sources xs <- P.getInput + case xs of + [] -> P.setInput $ Sources [(initialPos "",t)] + (pos,t'):rest -> P.setInput $ Sources ((pos, t <> t'):rest) + +-- We need to redefine the parsers in Text.Parsec.Char so that they +-- update source positions properly from the Sources stream. + +satisfy :: (Monad m, Stream s m Char, UpdateSourcePos s Char) + => (Char -> Bool) -> ParsecT s u m Char +satisfy f = P.tokenPrim show updateSourcePos matcher + where + matcher c = if f c then Just c else Nothing + +oneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char) + => [Char] -> ParsecT s u m Char +oneOf cs = satisfy (`elem` cs) + +noneOf :: (Monad m, Stream s m Char, UpdateSourcePos s Char) + => [Char] -> ParsecT s u m Char +noneOf cs = satisfy (`notElem` cs) + +anyChar :: (Monad m, Stream s m Char, UpdateSourcePos s Char) + => ParsecT s u m Char +anyChar = satisfy (const True) + +char :: (Monad m, Stream s m Char, UpdateSourcePos s Char) + => Char -> ParsecT s u m Char +char c = satisfy (== c) + +string :: (Monad m, Stream s m Char, UpdateSourcePos s Char) + => [Char] -> ParsecT s u m [Char] +string = mapM char + +newline :: (Monad m, Stream s m Char, UpdateSourcePos s Char) + => ParsecT s u m Char +newline = satisfy (== '\n') + +space :: (Monad m, Stream s m Char, UpdateSourcePos s Char) + => ParsecT s u m Char +space = satisfy isSpace + +spaces :: (Monad m, Stream s m Char, UpdateSourcePos s Char) + => ParsecT s u m () +spaces = P.skipMany space P.<?> "white space" + +letter :: (Monad m, Stream s m Char, UpdateSourcePos s Char) + => ParsecT s u m Char +letter = satisfy isLetter + +alphaNum :: (Monad m, Stream s m Char, UpdateSourcePos s Char) + => ParsecT s u m Char +alphaNum = satisfy isAlphaNum + +digit :: (Monad m, Stream s m Char, UpdateSourcePos s Char) + => ParsecT s u m Char +digit = satisfy isDigit + +hexDigit :: (Monad m, Stream s m Char, UpdateSourcePos s Char) + => ParsecT s u m Char +hexDigit = satisfy isHexDigit diff --git a/test/Tests/Readers/Markdown.hs b/test/Tests/Readers/Markdown.hs index 6e38da21a..f055ab197 100644 --- a/test/Tests/Readers/Markdown.hs +++ b/test/Tests/Readers/Markdown.hs @@ -374,8 +374,8 @@ tests = [ testGroup "inline code" , testGroup "lhs" [ test (purely $ readMarkdown def{ readerExtensions = enableExtension Ext_literate_haskell pandocExtensions }) - "inverse bird tracks and html" $ - "> a\n\n< b\n\n<div>\n" + "inverse bird tracks and html" + $ ("> a\n\n< b\n\n<div>\n" :: Text) =?> codeBlockWith ("",["haskell","literate"],[]) "a" <> codeBlockWith ("",["haskell"],[]) "b" -- cgit v1.2.3 From 05ea507bd75e0bb4bbb8f25cad5fa2f02e4f6796 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 9 May 2021 15:26:11 -0600 Subject: T.P.Parsing: improve include file functions. Remove old `insertIncludedFileF`. [API change] Give `insertIncludedFile` a more general type, allowing it to be used where `insertIncludedFileF` was. --- src/Text/Pandoc/Parsing.hs | 61 +++++++++++++++++----------------- src/Text/Pandoc/Readers/Org/Blocks.hs | 3 +- src/Text/Pandoc/Readers/Org/Parsing.hs | 3 +- 3 files changed, 34 insertions(+), 33 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 11c4c7a62..cbe9993c6 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -121,7 +121,6 @@ module Text.Pandoc.Parsing ( module Text.Pandoc.Sources, (<+?>), extractIdClass, insertIncludedFile, - insertIncludedFileF, -- * Re-exports from Text.Parsec Stream, runParser, @@ -1638,12 +1637,15 @@ extractIdClass (ident, cls, kvs) = (ident', cls', kvs') cls' = maybe cls T.words $ lookup "class" kvs kvs' = filter (\(k,_) -> k /= "id" || k /= "class") kvs -insertIncludedFile' :: (PandocMonad m, HasIncludeFiles st) - => ParserT a st m (mf Blocks) - -> (Text -> a) - -> [FilePath] -> FilePath - -> ParserT a st m (mf Blocks) -insertIncludedFile' blocks totoks dirs f = do +insertIncludedFile :: (PandocMonad m, HasIncludeFiles st) + => ParserT a st m b -- ^ parser to apply + -> (Text -> a) -- ^ convert Text to stream type + -> [FilePath] -- ^ search path (directories) + -> FilePath -- ^ path of file to include + -> Maybe Int -- ^ start line (negative counts from end) + -> Maybe Int -- ^ end line (negative counts from end) + -> ParserT a st m b +insertIncludedFile parser toStream dirs f mbstartline mbendline = do oldPos <- getPosition oldInput <- getInput containers <- getIncludeFiles <$> getState @@ -1652,33 +1654,32 @@ insertIncludedFile' blocks totoks dirs f = do updateState $ addIncludeFile $ T.pack f mbcontents <- readFileFromDirs dirs f contents <- case mbcontents of - Just s -> return s + Just s -> return $ exciseLines mbstartline mbendline s Nothing -> do report $ CouldNotLoadIncludeFile (T.pack f) oldPos return "" - setPosition $ newPos f 1 1 - setInput $ totoks contents - bs <- blocks + setInput $ toStream contents + setPosition $ newPos f (fromMaybe 1 mbstartline) 1 + result <- parser setInput oldInput setPosition oldPos updateState dropLatestIncludeFile - return bs + return result + +exciseLines :: Maybe Int -> Maybe Int -> Text -> Text +exciseLines Nothing Nothing t = t +exciseLines mbstartline mbendline t = + T.unlines $ take (endline' - (startline' - 1)) + $ drop (startline' - 1) contentLines + where + contentLines = T.lines t + numLines = length contentLines + startline' = case mbstartline of + Nothing -> 1 + Just x | x >= 0 -> x + | otherwise -> numLines + x -- negative from end + endline' = case mbendline of + Nothing -> numLines + Just x | x >= 0 -> x + | otherwise -> numLines + x -- negative from end --- | Parse content of include file as blocks. Circular includes result in an --- @PandocParseError@. -insertIncludedFile :: (PandocMonad m, HasIncludeFiles st) - => ParserT [a] st m Blocks - -> (Text -> [a]) - -> [FilePath] -> FilePath - -> ParserT [a] st m Blocks -insertIncludedFile blocks totoks dirs f = - runIdentity <$> insertIncludedFile' (Identity <$> blocks) totoks dirs f - --- TODO: replace this with something using addToSources. --- | Parse content of include file as future blocks. Circular includes result in --- an @PandocParseError@. -insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st) - => ParserT Sources st m (Future st Blocks) - -> [FilePath] -> FilePath - -> ParserT Sources st m (Future st Blocks) -insertIncludedFileF p = insertIncludedFile' p (\t -> Sources [(initialPos "",t)]) diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index 883434cdc..5e15c2c36 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -44,6 +44,7 @@ import Data.List.NonEmpty (nonEmpty) import qualified Data.Text as T import qualified Text.Pandoc.Builder as B import qualified Text.Pandoc.Walk as Walk +import Text.Pandoc.Sources (ToSources(..)) -- -- parsing blocks @@ -527,7 +528,7 @@ include = try $ do _ -> nullAttr return $ pure . B.codeBlockWith attr <$> parseRaw _ -> return $ return . B.fromList . blockFilter params <$> blockList - insertIncludedFileF blocksParser ["."] filename + insertIncludedFile blocksParser toSources ["."] filename Nothing Nothing where includeTarget :: PandocMonad m => OrgParser m FilePath includeTarget = do diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index c7ea02815..701bf3398 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -63,8 +63,7 @@ module Text.Pandoc.Readers.Org.Parsing , ellipses , citeKey , gridTableWith - , insertIncludedFileF - -- * Re-exports from Text.Pandoc.Parsec + , insertIncludedFile , runParser , runParserT , getInput -- cgit v1.2.3 From 41a3ac9da99c2701fa7e6adbc85da91f191620f4 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 9 May 2021 16:26:11 -0600 Subject: RST reader: use `insertIncludedFile` from T.P.Parsing... instead of reproducing much of its code. --- src/Text/Pandoc/Readers/RST.hs | 94 ++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 58 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index a3fcf028c..bb70b2620 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -27,8 +27,7 @@ import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.Builder (Blocks, Inlines, fromList, setMeta, trimInlines) import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Class.PandocMonad (PandocMonad, fetchItem, - readFileFromDirs, getTimestamp) +import Text.Pandoc.Class.PandocMonad (PandocMonad, fetchItem, getTimestamp) import Text.Pandoc.CSV (CSVOptions (..), defaultCSVOptions, parseCSV) import Text.Pandoc.Definition import Text.Pandoc.Error @@ -453,59 +452,38 @@ encoding -} includeDirective :: PandocMonad m - => Text -> [(Text, Text)] -> Text + => Text + -> [(Text, Text)] + -> Text -> RSTParser m Blocks includeDirective top fields body = do - let f = trim top - guard $ not (T.null f) + let f = T.unpack $ trim top + guard $ not $ null f guard $ T.null (trim body) - -- options - let (startLine :: Maybe Int) = lookup "start-line" fields >>= safeRead - let (endLine :: Maybe Int) = lookup "end-line" fields >>= safeRead - oldPos <- getPosition - containers <- stateContainers <$> getState - when (f `elem` containers) $ - throwError $ PandocParseError $ "Include file loop at " <> tshow oldPos - updateState $ \s -> s{ stateContainers = f : stateContainers s } - mbContents <- readFileFromDirs ["."] $ T.unpack f - contentLines <- case mbContents of - Just s -> return $ T.lines s - Nothing -> do - logMessage $ CouldNotLoadIncludeFile f oldPos - return [] - let numLines = length contentLines - let startLine' = case startLine of - Nothing -> 1 - Just x | x >= 0 -> x - | otherwise -> numLines + x -- negative from end - let endLine' = case endLine of - Nothing -> numLines + 1 - Just x | x >= 0 -> x - | otherwise -> numLines + x -- negative from end - let contentLines' = drop (startLine' - 1) - $ take (endLine' - 1) contentLines - let contentLines'' = (case trim <$> lookup "end-before" fields of - Just patt -> takeWhile (not . (patt `T.isInfixOf`)) - Nothing -> id) . - (case trim <$> lookup "start-after" fields of - Just patt -> drop 1 . - dropWhile (not . (patt `T.isInfixOf`)) - Nothing -> id) $ contentLines' - let contents' = T.unlines contentLines'' - case lookup "code" fields of - Just lang -> do - let classes = maybe [] T.words (lookup "class" fields) - let ident = maybe "" trimr $ lookup "name" fields - codeblock ident classes fields (trimr lang) contents' False - Nothing -> case lookup "literal" fields of - Just _ -> return $ B.rawBlock "rst" contents' - Nothing -> do - addToSources (initialPos (T.unpack f)) - (contents' <> "\n") - updateState $ \s -> s{ stateContainers = - tail $ stateContainers s } - return mempty - + let startLine = lookup "start-line" fields >>= safeRead + let endLine = lookup "end-line" fields >>= safeRead + let classes = maybe [] T.words (lookup "class" fields) + let ident = maybe "" trimr $ lookup "name" fields + let parser = + case lookup "code" fields of + Just lang -> + (codeblock ident classes fields (trimr lang) False + . sourcesToText) <$> getInput + Nothing -> + case lookup "literal" fields of + Just _ -> B.rawBlock "rst" . sourcesToText <$> getInput + Nothing -> parseBlocks + let selectLines = + (case trim <$> lookup "end-before" fields of + Just patt -> takeWhile (not . (patt `T.isInfixOf`)) + Nothing -> id) . + (case trim <$> lookup "start-after" fields of + Just patt -> drop 1 . + dropWhile (not . (patt `T.isInfixOf`)) + Nothing -> id) + let toStream t = + toSources [(f, T.unlines . selectLines . T.lines $ t)] + insertIncludedFile parser toStream ["."] f startLine endLine -- -- list blocks @@ -734,8 +712,8 @@ directive' = do "" -> stateRstHighlight def lang -> Just lang }) x | x == "code" || x == "code-block" || x == "sourcecode" -> - codeblock name classes (map (second trimr) fields) - (trim top) body True + return $ codeblock name classes (map (second trimr) fields) + (trim top) True body "aafig" -> do let attribs = (name, ["aafig"], map (second trimr) fields) return $ B.codeBlockWith attribs $ stripTrailingNewlines body @@ -1021,10 +999,10 @@ toChunks = dropWhile T.null then "\\begin{aligned}\n" <> s <> "\n\\end{aligned}" else s -codeblock :: Text -> [Text] -> [(Text, Text)] -> Text -> Text -> Bool - -> RSTParser m Blocks -codeblock ident classes fields lang body rmTrailingNewlines = - return $ B.codeBlockWith attribs $ stripTrailingNewlines' body +codeblock :: Text -> [Text] -> [(Text, Text)] -> Text -> Bool -> Text + -> Blocks +codeblock ident classes fields lang rmTrailingNewlines body = + B.codeBlockWith attribs $ stripTrailingNewlines' body where stripTrailingNewlines' = if rmTrailingNewlines then stripTrailingNewlines else id -- cgit v1.2.3 From b2398cd74744ce34fa10cc34f4051555f21feb2c Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 9 May 2021 18:42:42 -0600 Subject: Org reader: Resolve org includes relative to ... ...the directory containing the file containing the INCLUDE directive. Closes #5501. --- src/Text/Pandoc/Readers/Org/Blocks.hs | 6 ++++-- src/Text/Pandoc/Readers/Org/Parsing.hs | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index 5e15c2c36..f18d2f9a7 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -40,7 +40,7 @@ import Data.List (foldl', intersperse) import Data.Maybe (fromMaybe, isJust, isNothing) import Data.Text (Text) import Data.List.NonEmpty (nonEmpty) - +import System.FilePath import qualified Data.Text as T import qualified Text.Pandoc.Builder as B import qualified Text.Pandoc.Walk as Walk @@ -528,7 +528,9 @@ include = try $ do _ -> nullAttr return $ pure . B.codeBlockWith attr <$> parseRaw _ -> return $ return . B.fromList . blockFilter params <$> blockList - insertIncludedFile blocksParser toSources ["."] filename Nothing Nothing + currentDir <- takeDirectory . sourceName <$> getPosition + insertIncludedFile blocksParser toSources + [currentDir] filename Nothing Nothing where includeTarget :: PandocMonad m => OrgParser m FilePath includeTarget = do diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index 701bf3398..f0949e205 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -99,6 +99,7 @@ module Text.Pandoc.Readers.Org.Parsing , getState , updateState , SourcePos + , sourceName , getPosition ) where -- cgit v1.2.3 From 2a2e08d82335d45c982f4acf14b62130365f2c8e Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 9 May 2021 18:50:41 -0600 Subject: RST reader: seek include files in the directory... ...of the file containing the include directive, as RST requires. Closes #6632. --- src/Text/Pandoc/Readers/RST.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index bb70b2620..35292d949 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -38,6 +38,7 @@ import Text.Pandoc.Parsing import Text.Pandoc.Shared import qualified Text.Pandoc.UTF8 as UTF8 import Data.Time.Format +import System.FilePath (takeDirectory) -- TODO: -- [ ] .. parsed-literal @@ -483,7 +484,8 @@ includeDirective top fields body = do Nothing -> id) let toStream t = toSources [(f, T.unlines . selectLines . T.lines $ t)] - insertIncludedFile parser toStream ["."] f startLine endLine + currentDir <- takeDirectory . sourceName <$> getPosition + insertIncludedFile parser toStream [currentDir] f startLine endLine -- -- list blocks -- cgit v1.2.3 From ff7176de80726a5db4c4bc4c5a3296b2bcdab192 Mon Sep 17 00:00:00 2001 From: nuew <code@nuew.net> Date: Mon, 10 May 2021 11:26:32 -0400 Subject: epub Writer: Fix belongs-to-collection XML id choice (#7267) The epub writer previously used the same XML id for both the book identifier and the epub collection. This causes an error on epubcheck. --- src/Text/Pandoc/Writers/EPUB.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 3f10cb437..3c092a2c1 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -991,12 +991,12 @@ metadataElement version md currentTime = showDateTimeISO8601 currentTime | version == EPUB3 ] belongsToCollectionNodes = maybe [] - (\belongsToCollection -> (unode "meta" ! [("property", "belongs-to-collection"), ("id", "epub-id-1")] $ belongsToCollection ) + (\belongsToCollection -> (unode "meta" ! [("property", "belongs-to-collection"), ("id", "epub-collection-1")] $ belongsToCollection ) : - [unode "meta" ! [("refines", "#epub-id-1"), ("property", "collection-type")] $ ("series" :: Text) ]) + [unode "meta" ! [("refines", "#epub-collection-1"), ("property", "collection-type")] $ ("series" :: Text) ]) (epubBelongsToCollection md)++ maybe [] - (\groupPosition -> [unode "meta" ! [("refines", "#epub-id-1"), ("property", "group-position")] $ groupPosition ]) + (\groupPosition -> [unode "meta" ! [("refines", "#epub-collection-1"), ("property", "group-position")] $ groupPosition ]) (epubGroupPosition md) dcTag n s = unode ("dc:" <> n) s dcTag' n s = [dcTag n s] -- cgit v1.2.3 From 2bd5d0cafbe67fa723f49fd2d35d913594fdb935 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 11 May 2021 07:52:05 -0600 Subject: LaTeX writer: better handling of line breaks in simple tables. Now we also handle the case where they're embedded in other elements, e.g. spans. Closes #7272. --- src/Text/Pandoc/Writers/LaTeX/Table.hs | 4 +--- test/command/7272.md | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 test/command/7272.md (limited to 'src') diff --git a/src/Text/Pandoc/Writers/LaTeX/Table.hs b/src/Text/Pandoc/Writers/LaTeX/Table.hs index 6f8386937..16f63314b 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Table.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Table.hs @@ -219,9 +219,7 @@ footRows (Ann.TableFoot _attr rows) = map headerRowCells rows -- we need to go to some lengths to get line breaks working: -- as LineBreak bs = \vtop{\hbox{\strut as}\hbox{\strut bs}}. fixLineBreaks :: Block -> Block -fixLineBreaks (Para ils) = Para $ fixLineBreaks' ils -fixLineBreaks (Plain ils) = Plain $ fixLineBreaks' ils -fixLineBreaks x = x +fixLineBreaks = walk fixLineBreaks' fixLineBreaks' :: [Inline] -> [Inline] fixLineBreaks' ils = case splitBy (== LineBreak) ils of diff --git a/test/command/7272.md b/test/command/7272.md new file mode 100644 index 000000000..d3a3b2137 --- /dev/null +++ b/test/command/7272.md @@ -0,0 +1,24 @@ +``` +% pandoc -t latex -f html +<table> + <tbody> + <tr> + <td> + <span> + text + <br /> + text2 + </span> + </td> + </tr> + </tbody> +</table> +^D +\begin{longtable}[]{@{} + >{\raggedright\arraybackslash}p{(\columnwidth - 0\tabcolsep) * \real{1.00}}@{}} +\toprule +\endhead +{\vtop{\hbox{\strut text}\hbox{\strut text2 }}} \\ +\bottomrule +\end{longtable} +``` -- cgit v1.2.3 From a66e50840bdc3c92e661257a4a0c5fec34aa25bc Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 11 May 2021 09:01:36 -0600 Subject: T.P.XML.Light - add Eq, Ord instances... for Content, Element, Attr, CDataKind. [API change] --- src/Text/Pandoc/XML/Light/Types.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/XML/Light/Types.hs b/src/Text/Pandoc/XML/Light/Types.hs index 03fdc2e4d..ba602ac1f 100644 --- a/src/Text/Pandoc/XML/Light/Types.hs +++ b/src/Text/Pandoc/XML/Light/Types.hs @@ -53,7 +53,7 @@ type Line = Integer data Content = Elem Element | Text CData | CRef Text - deriving (Show, Typeable, Data) + deriving (Show, Typeable, Data, Ord, Eq) -- | XML elements data Element = Element { @@ -61,7 +61,7 @@ data Element = Element { elAttribs :: [Attr], elContent :: [Content], elLine :: Maybe Line - } deriving (Show, Typeable, Data) + } deriving (Show, Typeable, Data, Ord, Eq) -- | XML attributes data Attr = Attr { @@ -74,13 +74,13 @@ data CData = CData { cdVerbatim :: CDataKind, cdData :: Text, cdLine :: Maybe Line - } deriving (Show, Typeable, Data) + } deriving (Show, Typeable, Data, Ord, Eq) data CDataKind = CDataText -- ^ Ordinary character data; pretty printer escapes &, < etc. | CDataVerbatim -- ^ Unescaped character data; pretty printer embeds it in <![CDATA[.. | CDataRaw -- ^ As-is character data; pretty printer passes it along without any escaping or CDATA wrap-up. - deriving ( Eq, Show, Typeable, Data ) + deriving ( Eq, Ord, Show, Typeable, Data ) -- | XML qualified names data QName = QName { -- cgit v1.2.3 From 5eb7ad7d1ebbfe27a282a2d75f199bacf2052be3 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 11 May 2021 22:31:38 -0600 Subject: Improve integration of settings from reference.docx. The settings we can carry over from a reference.docx are autoHyphenation, consecutiveHyphenLimit, hyphenationZone, doNotHyphenateCap, evenAndOddHeaders, and proofState. Previously this was implemented in a buggy way, so that the reference doc's values AND the new values were included. This change allows users to create a reference.docx that sets w:proofState for spelling or grammar to "dirty," so that spell/grammar checking will be triggered on the generated docx. Closes #1209. --- src/Text/Pandoc/Writers/Docx.hs | 27 +++++++++++---------- test/docx/golden/block_quotes.docx | Bin 10067 -> 10061 bytes test/docx/golden/codeblock.docx | Bin 9916 -> 9910 bytes test/docx/golden/comments.docx | Bin 10254 -> 10248 bytes test/docx/golden/custom_style_no_reference.docx | Bin 10017 -> 10011 bytes test/docx/golden/custom_style_preserve.docx | Bin 10646 -> 10640 bytes test/docx/golden/custom_style_reference.docx | Bin 12400 -> 12397 bytes test/docx/golden/definition_list.docx | Bin 9916 -> 9910 bytes .../golden/document-properties-short-desc.docx | Bin 9922 -> 9916 bytes test/docx/golden/document-properties.docx | Bin 10399 -> 10393 bytes test/docx/golden/headers.docx | Bin 10055 -> 10049 bytes test/docx/golden/image.docx | Bin 26733 -> 26727 bytes test/docx/golden/inline_code.docx | Bin 9855 -> 9849 bytes test/docx/golden/inline_formatting.docx | Bin 10035 -> 10029 bytes test/docx/golden/inline_images.docx | Bin 26789 -> 26783 bytes test/docx/golden/link_in_notes.docx | Bin 10077 -> 10071 bytes test/docx/golden/links.docx | Bin 10248 -> 10242 bytes test/docx/golden/lists.docx | Bin 10314 -> 10308 bytes test/docx/golden/lists_continuing.docx | Bin 10110 -> 10104 bytes test/docx/golden/lists_multiple_initial.docx | Bin 10192 -> 10186 bytes test/docx/golden/lists_restarting.docx | Bin 10108 -> 10102 bytes test/docx/golden/nested_anchors_in_header.docx | Bin 10212 -> 10206 bytes test/docx/golden/notes.docx | Bin 10024 -> 10018 bytes test/docx/golden/raw-blocks.docx | Bin 9956 -> 9950 bytes test/docx/golden/raw-bookmarks.docx | Bin 10090 -> 10084 bytes test/docx/golden/table_one_row.docx | Bin 9925 -> 9920 bytes test/docx/golden/table_with_list_cell.docx | Bin 10230 -> 10225 bytes test/docx/golden/tables.docx | Bin 10271 -> 10266 bytes test/docx/golden/track_changes_deletion.docx | Bin 9899 -> 9893 bytes test/docx/golden/track_changes_insertion.docx | Bin 9882 -> 9876 bytes test/docx/golden/track_changes_move.docx | Bin 9916 -> 9910 bytes .../golden/track_changes_scrubbed_metadata.docx | Bin 10028 -> 10022 bytes test/docx/golden/unicode.docx | Bin 9841 -> 9835 bytes test/docx/golden/verbatim_subsuper.docx | Bin 9888 -> 9882 bytes 34 files changed, 14 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index e11961bfd..75bed1595 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -469,12 +469,12 @@ writeDocx opts doc = do -- adds references to footnotes or endnotes we don't have... -- we do, however, copy some settings over from reference let settingsPath = "word/settings.xml" - settingsList = [ "w:autoHyphenation" - , "w:consecutiveHyphenLimit" - , "w:hyphenationZone" - , "w:doNotHyphenateCap" - , "w:evenAndOddHeaders" - , "w:proofState" + settingsList = [ "autoHyphenation" + , "consecutiveHyphenLimit" + , "hyphenationZone" + , "doNotHyphenateCap" + , "evenAndOddHeaders" + , "proofState" ] settingsEntry <- copyChildren refArchive distArchive settingsPath epochtime settingsList @@ -577,16 +577,17 @@ copyChildren :: (PandocMonad m) copyChildren refArchive distArchive path timestamp elNames = do ref <- parseXml refArchive distArchive path dist <- parseXml distArchive distArchive path + let elsToCopy = + map cleanElem $ filterChildrenName (\e -> qName e `elem` elNames) ref + let elsToKeep = + [e | Elem e <- elContent dist, not (any (hasSameNameAs e) elsToCopy)] return $ toEntry path timestamp $ renderXml dist{ - elContent = elContent dist ++ copyContent ref + elContent = map Elem elsToKeep ++ map Elem elsToCopy } where - strName QName{qName=name, qPrefix=prefix} - | Just p <- prefix = p <> ":" <> name - | otherwise = name - shouldCopy = (`elem` elNames) . strName - cleanElem el@Element{elName=name} = Elem el{elName=name{qURI=Nothing}} - copyContent = map cleanElem . filterChildrenName shouldCopy + hasSameNameAs (Element {elName = n1}) (Element {elName = n2}) = + qName n1 == qName n2 + cleanElem el@Element{elName=name} = el{elName=name{qURI=Nothing}} -- this is the lowest number used for a list numId baseListId :: Int diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx index af1b33ca2..b6973fcfd 100644 Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx index f748f1f01..f0d35d3ad 100644 Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx index ac9d56680..fb1c15dec 100644 Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx index f27727edd..43d536b65 100644 Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx index 1da499d6a..8865010d1 100644 Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx index 4d2fe245d..10f7bf661 100644 Binary files a/test/docx/golden/custom_style_reference.docx and b/test/docx/golden/custom_style_reference.docx differ diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx index f386fcea3..2a7d81a34 100644 Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx index debe9a3f6..2fcd14908 100644 Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx index cd17400bf..39533f42d 100644 Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx index d3af8a3dd..c8d67c45b 100644 Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx index 1c4e738c0..8a7aeec10 100644 Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx index 35f43f19f..969237cec 100644 Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx index 8de3f70f6..cda936a39 100644 Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx index e76558be9..69991e791 100644 Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx index 88bae8142..40e848195 100644 Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx index 455adcfc7..28237a30d 100644 Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx index 081d9ddba..bf075805e 100644 Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx index fc9213fc5..e7d308e13 100644 Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx index b636fd3f8..9763e347e 100644 Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx index 252623215..b717ca619 100644 Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx index a8c3f5478..873e731d2 100644 Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx index 43e650ebd..134cb2eaf 100644 Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx index fe4f7845b..c49ed79c8 100644 Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx index 45e90608f..1f2cbb214 100644 Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx index e60bb303f..a0160cdb4 100644 Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx index a4037cf32..6427d475c 100644 Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx index bc1bc27f8..470eac2ae 100644 Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx index 247725aaa..3542b8f9c 100644 Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx index 3863afef2..b36b4485e 100644 Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx index 5c848b63a..e30ab06ae 100644 Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx index e0c843713..11597d578 100644 Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx index 78a773bdd..c7bff82e5 100644 Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx index c66a45b74..c70f6946e 100644 Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ -- cgit v1.2.3 From 46309319ef9eb66ea3598073df4174fb40b1b0f8 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 12 May 2021 06:01:13 -0600 Subject: Fix source position reporting for YAML bibliographies. Closes #7273. --- src/Text/Pandoc/Readers/Markdown.hs | 2 -- src/Text/Pandoc/Readers/Metadata.hs | 8 ++++++-- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 69dd51bc4..2d20ff018 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -97,12 +97,10 @@ yamlToRefs :: PandocMonad m -> m [MetaValue] yamlToRefs idpred opts mbfp bstr = do let parser = do - oldPos <- getPosition case mbfp of Nothing -> return () Just fp -> setPosition $ initialPos fp refs <- yamlBsToRefs (fmap B.toMetaValue <$> parseBlocks) idpred bstr - setPosition oldPos return $ runF refs defaultParserState parsed <- readWithM parser def{ stateOptions = opts } ("" :: Text) case parsed of diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs index bbcfe62ea..45eddf25a 100644 --- a/src/Text/Pandoc/Readers/Metadata.hs +++ b/src/Text/Pandoc/Readers/Metadata.hs @@ -98,8 +98,12 @@ yamlBsToRefs pMetaValue idpred bstr = Right [YAML.Doc (YAML.Scalar _ YAML.SNull)] -> return . return $ mempty Right _ -> Prelude.fail "expecting YAML object" - Left (_pos, err') - -> Prelude.fail err' + Left (yamlpos, err') + -> do pos <- getPosition + setPosition $ incSourceLine + (setSourceColumn pos (YE.posColumn yamlpos)) + (YE.posLine yamlpos - 1) + Prelude.fail err' nodeToKey :: YAML.Node YE.Pos -> Maybe Text -- cgit v1.2.3 From 0217ae2a4ffe33bee7cc53ba44817cd540b5f01e Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 12 May 2021 11:05:55 -0700 Subject: Hande 'annote' field in bibtex/biblatex writer. Closes #7266. --- src/Text/Pandoc/Citeproc/BibTeX.hs | 2 ++ test/command/7266.md | 10 ++++++++++ 2 files changed, 12 insertions(+) create mode 100644 test/command/7266.md (limited to 'src') diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs index f6833000c..b17240557 100644 --- a/src/Text/Pandoc/Citeproc/BibTeX.hs +++ b/src/Text/Pandoc/Citeproc/BibTeX.hs @@ -156,6 +156,7 @@ writeBibtexString opts variant mblang ref = , "langid" , "abstract" , "keywords" + , "annote" ] Bibtex -> [ "author" @@ -175,6 +176,7 @@ writeBibtexString opts variant mblang ref = , "address" , "type" , "note" + , "annote" ] valToInlines (TextVal t) = B.text t diff --git a/test/command/7266.md b/test/command/7266.md new file mode 100644 index 000000000..9db833636 --- /dev/null +++ b/test/command/7266.md @@ -0,0 +1,10 @@ +``` +% pandoc -f biblatex -t biblatex -s +@article{id, + annote = "annotation" +} +^D +@article{id, + annote = {annotation} +} +``` -- cgit v1.2.3 From edca1d1656a03d7de4901a1f6d356094ec9fb2df Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 12 May 2021 11:12:27 -0700 Subject: Plain writer: handle superscript unicode minus. Closes #7276. Note: currently we still get unwanted white space around the minus; this needs to be addressed with a change in texmath. --- src/Text/Pandoc/Writers/Shared.hs | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Shared.hs b/src/Text/Pandoc/Writers/Shared.hs index a09d18571..0b7c6bee0 100644 --- a/src/Text/Pandoc/Writers/Shared.hs +++ b/src/Text/Pandoc/Writers/Shared.hs @@ -394,6 +394,7 @@ toSuperscript '2' = Just '\x00B2' toSuperscript '3' = Just '\x00B3' toSuperscript '+' = Just '\x207A' toSuperscript '-' = Just '\x207B' +toSuperscript '\x2212' = Just '\x207B' -- unicode minus toSuperscript '=' = Just '\x207C' toSuperscript '(' = Just '\x207D' toSuperscript ')' = Just '\x207E' -- cgit v1.2.3 From 3f09f53459b877f53072efbf57dec21fa37280b5 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 13 May 2021 21:11:52 -0700 Subject: Implement curly-brace syntax for Markdown citation keys. The change provides a way to use citation keys that contain special characters not usable with the standard citation key syntax. Example: `@{foo_bar{x}'}` for the key `foo_bar{x}`. Closes #6026. The change requires adding a new parameter to the `citeKey` parser from Text.Pandoc.Parsing [API change]. Markdown reader: recognize @{..} syntax for citatinos. Markdown writer: use @{..} syntax for citations when needed. Update manual with curly-brace syntax for citations. Closes #6026. --- MANUAL.txt | 15 ++++++++++----- src/Text/Pandoc/Parsing.hs | 18 +++++++++++++----- src/Text/Pandoc/Readers/Markdown.hs | 6 +++--- src/Text/Pandoc/Readers/Org/Inlines.hs | 8 ++++---- src/Text/Pandoc/Writers/Markdown/Inline.hs | 9 +++++++-- test/command/6026.md | 19 +++++++++++++++++++ 6 files changed, 56 insertions(+), 19 deletions(-) create mode 100644 test/command/6026.md (limited to 'src') diff --git a/MANUAL.txt b/MANUAL.txt index 75fe599e2..6f06d1e8a 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -4949,12 +4949,15 @@ Inline and regular footnotes may be mixed freely. #### Extension: `citations` #### Markdown citations go inside square brackets and are separated -by semicolons. Each citation must have a key, composed of '@' + -the citation identifier from the database, and may optionally -have a prefix, a locator, and a suffix. The citation key must -begin with a letter, digit, or `_`, and may contain +by semicolons. Each citation must have a key and may optionally +have a prefix, a locator, and a suffix. The citation key +consists of `@` plus the citation identifier, possibly +enclosed in curly braces. If the identifier starts +with a letter, digit, or `_`, followed by zero or more alphanumerics, `_`, and internal punctuation characters -(`:.#$%&-+?<>~/`). Here are some examples: +(`:.#$%&-+?<>~/`), then the curly braces may be omitted. +Identifiers may not contain whitespace characters or unbalanced +curly braces. Here are some examples: Blah blah [see @doe99, pp. 33-35; also @smith04, chap. 1]. @@ -4962,6 +4965,8 @@ alphanumerics, `_`, and internal punctuation characters Blah blah [@smith04; @doe99]. + Blah blah [@{https://example.com/bib?name=foobar&date=2000}, p. 33]. + `pandoc` detects locator terms in the [CSL locale files]. Either abbreviated or unabbreviated forms are accepted. In the `en-US` locale, locator terms can be written in either singular or plural forms, diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index cbe9993c6..0bb794ba1 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -1605,19 +1605,27 @@ nested p = do return res citeKey :: (Stream s m Char, UpdateSourcePos s Char, HasLastStrPosition st) - => ParserT s st m (Bool, Text) -citeKey = try $ do + => Bool -- ^ If True, allow expanded @{..} syntax. + -> ParserT s st m (Bool, Text) +citeKey allowBraced = try $ do guard =<< notAfterString suppress_author <- option False (True <$ char '-') char '@' + key <- simpleCiteIdentifier + <|> if allowBraced + then charsInBalanced '{' '}' (satisfy (not . isSpace)) + else mzero + return (suppress_author, key) + +simpleCiteIdentifier :: (Stream s m Char, UpdateSourcePos s Char) + => ParserT s st m Text +simpleCiteIdentifier = do firstChar <- alphaNum <|> char '_' <|> char '*' -- @* for wildcard in nocite let regchar = satisfy (\c -> isAlphaNum c || c == '_') let internal p = try $ p <* lookAhead regchar rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") <|> try (oneOf ":/" <* lookAhead (char '/')) - let key = firstChar:rest - return (suppress_author, T.pack key) - + return $ T.pack $ firstChar:rest token :: (Stream s m t) => (t -> Text) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 2d20ff018..34f16ab4e 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -2094,7 +2094,7 @@ cite = do textualCite :: PandocMonad m => MarkdownParser m (F Inlines) textualCite = try $ do - (suppressAuthor, key) <- citeKey + (suppressAuthor, key) <- citeKey True -- If this is a reference to an earlier example list item, -- then don't parse it as a citation. If the example list -- item comes later, we'll parse it here and figure out in @@ -2174,7 +2174,7 @@ prefix = trimInlinesF . mconcat <$> manyTill inline (char ']' <|> lookAhead (try $ do optional (try (char ';' >> spnl)) - citeKey + citeKey True return ']')) citeList :: PandocMonad m => MarkdownParser m (F [Citation]) @@ -2183,7 +2183,7 @@ citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl) citation :: PandocMonad m => MarkdownParser m (F Citation) citation = try $ do pref <- prefix - (suppress_author, key) <- citeKey + (suppress_author, key) <- citeKey True suff <- suffix noteNum <- stateNoteNumber <$> getState return $ do diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 054f2611a..6862dd71e 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -263,7 +263,7 @@ berkeleyCitationList = try $ do where citationListPart :: PandocMonad m => OrgParser m (F Inlines) citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do - notFollowedBy' citeKey + notFollowedBy' $ citeKey False notFollowedBy (oneOf ";]") inline @@ -278,7 +278,7 @@ berkeleyBareTag' = try $ void (string "cite") berkeleyTextualCite :: PandocMonad m => OrgParser m (F [Citation]) berkeleyTextualCite = try $ do - (suppressAuthor, key) <- citeKey + (suppressAuthor, key) <- citeKey False returnF . return $ Citation { citationId = key , citationPrefix = mempty @@ -351,7 +351,7 @@ citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces) citation :: PandocMonad m => OrgParser m (F Citation) citation = try $ do pref <- prefix - (suppress_author, key) <- citeKey + (suppress_author, key) <- citeKey False suff <- suffix return $ do x <- pref @@ -368,7 +368,7 @@ citation = try $ do } where prefix = trimInlinesF . mconcat <$> - manyTill inline (char ']' <|> (']' <$ lookAhead citeKey)) + manyTill inline (char ']' <|> (']' <$ lookAhead (citeKey False))) suffix = try $ do hasSpace <- option False (notFollowedBy nonspaceChar >> return True) skipSpaces diff --git a/src/Text/Pandoc/Writers/Markdown/Inline.hs b/src/Text/Pandoc/Writers/Markdown/Inline.hs index e35e1a0b9..2062050e4 100644 --- a/src/Text/Pandoc/Writers/Markdown/Inline.hs +++ b/src/Text/Pandoc/Writers/Markdown/Inline.hs @@ -491,11 +491,16 @@ inlineToMarkdown opts (Cite (c:cs) lst) rest <- mapM convertOne cs let inbr = suffs <+> joincits rest br = if isEmpty inbr then empty else char '[' <> inbr <> char ']' - return $ literal ("@" <> citationId c) <+> br + return $ literal ("@" <> maybeInBraces (citationId c)) <+> br else do cits <- mapM convertOne (c:cs) return $ literal "[" <> joincits cits <> literal "]" where + maybeInBraces key = + case readWith (citeKey False >> spaces >> eof) + defaultParserState ("@" <> key) of + Left _ -> "{" <> key <> "}" + Right _ -> key joincits = hcat . intersperse (literal "; ") . filter (not . isEmpty) convertOne Citation { citationId = k , citationPrefix = pinlines @@ -504,7 +509,7 @@ inlineToMarkdown opts (Cite (c:cs) lst) = do pdoc <- inlineListToMarkdown opts pinlines sdoc <- inlineListToMarkdown opts sinlines - let k' = literal (modekey m <> "@" <> k) + let k' = literal (modekey m <> "@" <> maybeInBraces k) r = case sinlines of Str (T.uncons -> Just (y,_)):_ | y `elem` (",;]@" :: String) -> k' <> sdoc _ -> k' <+> sdoc diff --git a/test/command/6026.md b/test/command/6026.md new file mode 100644 index 000000000..5e18a5f42 --- /dev/null +++ b/test/command/6026.md @@ -0,0 +1,19 @@ +``` +% pandoc -t native +@{https://openreview.net/forum?id=HkwoSDPgg} + +@https://openreview.net/forum?id=HkwoSDPgg +^D +[Para [Cite [Citation {citationId = "https://openreview.net/forum?id=HkwoSDPgg", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 1, citationHash = 0}] [Str "@https://openreview.net/forum?id=HkwoSDPgg"]] +,Para [Cite [Citation {citationId = "https://openreview.net/forum?id", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 2, citationHash = 0}] [Str "@https://openreview.net/forum?id"],Str "=HkwoSDPgg"]] +``` +``` +% pandoc -t markdown +@{https://openreview.net/forum?id=HkwoSDPgg} + +@https://openreview.net/forum?id=HkwoSDPgg +^D +@{https://openreview.net/forum?id=HkwoSDPgg} + +@https://openreview.net/forum?id=HkwoSDPgg +``` -- cgit v1.2.3 From 875f8f36545d1c21fa3d29c52c40517a667f2574 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Fri, 14 May 2021 11:58:58 +0200 Subject: HTML reader: don't fail on unmatched closing "script" tag. Prevent the reader from crashing if the HTML input contains an unmatched closing `</script>` tag. Fixes: #7282 --- src/Text/Pandoc/Readers/HTML.hs | 16 +++++++++------- test/command/7282.md | 7 +++++++ 2 files changed, 16 insertions(+), 7 deletions(-) create mode 100644 test/command/7282.md (limited to 'src') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index f5c8a2277..0a9d67e35 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -942,13 +942,15 @@ getTagName (TagClose t) = Just t getTagName _ = Nothing isInlineTag :: Tag Text -> Bool -isInlineTag t = - isCommentTag t || - case getTagName t of - Nothing -> False - Just "script" -> "math/tex" `T.isPrefixOf` fromAttrib "type" t - Just x -> x `Set.notMember` blockTags || - T.take 1 x == "?" -- processing instr. +isInlineTag t = isCommentTag t || case t of + TagOpen "script" _ -> "math/tex" `T.isPrefixOf` fromAttrib "type" t + TagClose "script" -> True + TagOpen name _ -> isInlineTagName name + TagClose name -> isInlineTagName name + _ -> False + where isInlineTagName x = + x `Set.notMember` blockTags || + T.take 1 x == "?" -- processing instr. isBlockTag :: Tag Text -> Bool isBlockTag t = isBlockTagName || isTagComment t diff --git a/test/command/7282.md b/test/command/7282.md new file mode 100644 index 000000000..4ed6b5b3e --- /dev/null +++ b/test/command/7282.md @@ -0,0 +1,7 @@ +Don't crash on unmatched closing tag. +``` +% pandoc -f html -t native +</script> +^D +[] +``` -- cgit v1.2.3 From 17d96404f5b6f5f080329e220a0784f10c364f2d Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Fri, 14 May 2021 16:19:20 +0200 Subject: Docx writer: allow multirow table headers --- src/Text/Pandoc/Writers/Docx/Table.hs | 42 +++++++++++++++++------------ test/docx/golden/table_with_list_cell.docx | Bin 10225 -> 10238 bytes test/docx/golden/tables.docx | Bin 10266 -> 10279 bytes 3 files changed, 25 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs index bb931bf08..ccc325fbe 100644 --- a/src/Text/Pandoc/Writers/Docx/Table.hs +++ b/src/Text/Pandoc/Writers/Docx/Table.hs @@ -42,11 +42,12 @@ tableToOpenXML blocksToOpenXML gridTable = do then return [] else withParaPropM (pStyleM "Table Caption") $ blocksToOpenXML captionBlocks - head' <- cellGridToOpenXML blocksToOpenXML thead - bodies <- mapM (cellGridToOpenXML blocksToOpenXML) tbodies - foot' <- cellGridToOpenXML blocksToOpenXML tfoot + head' <- cellGridToOpenXML blocksToOpenXML HeadRow thead + bodies <- mapM (cellGridToOpenXML blocksToOpenXML BodyRow) tbodies + foot' <- cellGridToOpenXML blocksToOpenXML FootRow tfoot let hasHeader = not . null . indices . partRowAttrs $ thead + let hasFooter = not . null . indices . partRowAttrs $ tfoot -- for compatibility with Word <= 2007, we include a val with a bitmask -- 0×0020 Apply first row conditional formatting -- 0×0040 Apply last row conditional formatting @@ -61,7 +62,7 @@ tableToOpenXML blocksToOpenXML gridTable = do ( mknode "w:tblStyle" [("w:val","Table")] () : mknode "w:tblW" tblWattr () : mknode "w:tblLook" [("w:firstRow",if hasHeader then "1" else "0") - ,("w:lastRow","0") + ,("w:lastRow",if hasFooter then "1" else "0") ,("w:firstColumn","0") ,("w:lastColumn","0") ,("w:noHBand","0") @@ -77,6 +78,9 @@ tableToOpenXML blocksToOpenXML gridTable = do modify $ \s -> s { stInTable = False } return $ captionXml ++ [Elem tbl] +-- | Parts of a table +data RowType = HeadRow | BodyRow | FootRow + alignmentToString :: Alignment -> Text alignmentToString = \case AlignLeft -> "left" @@ -104,22 +108,23 @@ tableLayout specs = , [ ("w:type", "pct"), ("w:w", tshow rowwidth) ]) cellGridToOpenXML :: PandocMonad m - => ([Block] -> WS m [Content]) - -> Part - -> WS m [Element] -cellGridToOpenXML blocksToOpenXML part@(Part _ _ rowAttrs) = + => ([Block] -> WS m [Content]) + -> RowType + -> Part + -> WS m [Element] +cellGridToOpenXML blocksToOpenXML rowType part@(Part _ _ rowAttrs) = if null (indices rowAttrs) then return mempty - else mapM (rowToOpenXML blocksToOpenXML) $ partToRows part + else mapM (rowToOpenXML blocksToOpenXML) $ partToRows rowType part data OOXMLCell = OOXMLCell Attr Alignment RowSpan ColSpan [Block] | OOXMLCellMerge ColSpan -data OOXMLRow = OOXMLRow Attr [OOXMLCell] +data OOXMLRow = OOXMLRow RowType Attr [OOXMLCell] -partToRows :: Part -> [OOXMLRow] -partToRows part = +partToRows :: RowType -> Part -> [OOXMLRow] +partToRows rowType part = let toOOXMLCell :: RowIndex -> ColIndex -> GridCell -> [OOXMLCell] toOOXMLCell ridx cidx = \case @@ -132,7 +137,7 @@ partToRows part = " at index " ++ show idx' _ -> mempty mkRow :: (RowIndex, Attr) -> OOXMLRow - mkRow (ridx, attr) = OOXMLRow attr + mkRow (ridx, attr) = OOXMLRow rowType attr . concatMap (uncurry $ toOOXMLCell ridx) . assocs . rowArray ridx @@ -143,12 +148,17 @@ rowToOpenXML :: PandocMonad m => ([Block] -> WS m [Content]) -> OOXMLRow -> WS m Element -rowToOpenXML blocksToOpenXML (OOXMLRow _attr cells) = do +rowToOpenXML blocksToOpenXML (OOXMLRow rowType _attr cells) = do xmlcells <- mapM (ooxmlCellToOpenXML blocksToOpenXML) cells + let addTrPr = case rowType of + HeadRow -> (mknode "w:trPr" [] + [mknode "w:tblHeader" [("w:val", "true")] ()] :) + BodyRow -> id + FootRow -> id -- let align' = case align of -- AlignDefault -> colAlign -- _ -> align - return $ mknode "w:tr" [] xmlcells + return $ mknode "w:tr" [] (addTrPr xmlcells) ooxmlCellToOpenXML :: PandocMonad m => ([Block] -> WS m [Content]) @@ -161,8 +171,6 @@ ooxmlCellToOpenXML blocksToOpenXML = \case , mknode "w:vMerge" [("w:val", "continue")] () ] , mknode "w:p" [] [mknode "w:pPr" [] ()]] OOXMLCell _attr align rowspan (ColSpan colspan) contents -> do - -- we handle rowspans via 'leftpad', so we can ignore those here - compactStyle <- pStyleM "Compact" es <- withParaProp (alignmentFor align) $ blocksToOpenXML contents -- Table cells require a <w:p> element, even an empty one! diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx index 6427d475c..e51910770 100644 Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx index 470eac2ae..8029774a9 100644 Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ -- cgit v1.2.3 From 3ec5726c9b7b9f7cc41ca5f26a80c347e20afc9a Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Fri, 14 May 2021 16:49:19 +0200 Subject: Docx writer: fix alignment for cells. This fixes a regression introduced with the in the colspan/rowspan changes that caused column alignments to be ignored. The column alignment is used only if a default alignment is specified at the cell level; otherwise the cell-level alignment takes precedence. --- src/Text/Pandoc/Writers/Docx/Table.hs | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs index ccc325fbe..e7fc82a10 100644 --- a/src/Text/Pandoc/Writers/Docx/Table.hs +++ b/src/Text/Pandoc/Writers/Docx/Table.hs @@ -38,13 +38,14 @@ tableToOpenXML blocksToOpenXML gridTable = do gridTable let (Caption _maybeShortCaption captionBlocks) = caption let captionStr = stringify captionBlocks + let aligns = map fst $ elems colspecs captionXml <- if null captionBlocks then return [] else withParaPropM (pStyleM "Table Caption") $ blocksToOpenXML captionBlocks - head' <- cellGridToOpenXML blocksToOpenXML HeadRow thead - bodies <- mapM (cellGridToOpenXML blocksToOpenXML BodyRow) tbodies - foot' <- cellGridToOpenXML blocksToOpenXML FootRow tfoot + head' <- cellGridToOpenXML blocksToOpenXML HeadRow aligns thead + bodies <- mapM (cellGridToOpenXML blocksToOpenXML BodyRow aligns) tbodies + foot' <- cellGridToOpenXML blocksToOpenXML FootRow aligns tfoot let hasHeader = not . null . indices . partRowAttrs $ thead let hasFooter = not . null . indices . partRowAttrs $ tfoot @@ -110,12 +111,14 @@ tableLayout specs = cellGridToOpenXML :: PandocMonad m => ([Block] -> WS m [Content]) -> RowType + -> [Alignment] -> Part -> WS m [Element] -cellGridToOpenXML blocksToOpenXML rowType part@(Part _ _ rowAttrs) = +cellGridToOpenXML blocksToOpenXML rowType aligns part@(Part _ _ rowAttrs) = if null (indices rowAttrs) then return mempty - else mapM (rowToOpenXML blocksToOpenXML) $ partToRows rowType part + else mapM (rowToOpenXML blocksToOpenXML) $ + partToRows rowType aligns part data OOXMLCell = OOXMLCell Attr Alignment RowSpan ColSpan [Block] @@ -123,13 +126,17 @@ data OOXMLCell data OOXMLRow = OOXMLRow RowType Attr [OOXMLCell] -partToRows :: RowType -> Part -> [OOXMLRow] -partToRows rowType part = +partToRows :: RowType -> [Alignment] -> Part -> [OOXMLRow] +partToRows rowType aligns part = let - toOOXMLCell :: RowIndex -> ColIndex -> GridCell -> [OOXMLCell] - toOOXMLCell ridx cidx = \case + toOOXMLCell :: Alignment -> RowIndex -> ColIndex -> GridCell -> [OOXMLCell] + toOOXMLCell columnAlign ridx cidx = \case ContentCell attr align rowspan colspan blocks -> - [OOXMLCell attr align rowspan colspan blocks] + -- Respect non-default, cell specific alignment. + let align' = case align of + AlignDefault -> columnAlign + _ -> align + in [OOXMLCell attr align' rowspan colspan blocks] ContinuationCell idx'@(ridx',cidx') | ridx /= ridx', cidx == cidx' -> case (partCellArray part)!idx' of (ContentCell _ _ _ colspan _) -> [OOXMLCellMerge colspan] @@ -138,7 +145,9 @@ partToRows rowType part = _ -> mempty mkRow :: (RowIndex, Attr) -> OOXMLRow mkRow (ridx, attr) = OOXMLRow rowType attr - . concatMap (uncurry $ toOOXMLCell ridx) + . mconcat + . zipWith (\align -> uncurry $ toOOXMLCell align ridx) + aligns . assocs . rowArray ridx $ partCellArray part @@ -155,9 +164,6 @@ rowToOpenXML blocksToOpenXML (OOXMLRow rowType _attr cells) = do [mknode "w:tblHeader" [("w:val", "true")] ()] :) BodyRow -> id FootRow -> id - -- let align' = case align of - -- AlignDefault -> colAlign - -- _ -> align return $ mknode "w:tr" [] (addTrPr xmlcells) ooxmlCellToOpenXML :: PandocMonad m -- cgit v1.2.3 From 76a4e7127be1116b67ae531f56aadb05963813b5 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 14 May 2021 10:09:02 -0700 Subject: Beamer writer: support exampleblock and alertblock. A block will be rendered as an exampleblock if the heading has class `example` and alertblock if it has class `alert`. Closes #7278. --- MANUAL.txt | 6 +++++- src/Text/Pandoc/Writers/LaTeX.hs | 11 ++++++++--- test/command/7278.md | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 4 deletions(-) create mode 100644 test/command/7278.md (limited to 'src') diff --git a/MANUAL.txt b/MANUAL.txt index 6f06d1e8a..cdfb0e332 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -5666,7 +5666,11 @@ rules: * A heading at the slide level always starts a new slide. * Headings *below* the slide level in the hierarchy create - headings *within* a slide. + headings *within* a slide. (In beamer, a "block" will be + created. If the heading has the class `example`, an + `exampleblock` environment will be used; if it has the class + `alert`, an `alertblock` will be used; otherwise a regular + `block` will be used.) * Headings *above* the slide level in the hierarchy create "title slides," which just contain the section title diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 8b1f3df1d..d8722876e 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -290,7 +290,12 @@ blockToLaTeX :: PandocMonad m => Block -- ^ Block to convert -> LW m (Doc Text) blockToLaTeX Null = return empty -blockToLaTeX (Div attr@(identifier,"block":_,_) (Header _ _ ils : bs)) = do +blockToLaTeX (Div attr@(identifier,"block":dclasses,_) + (Header _ _ ils : bs)) = do + let blockname + | "example" `elem` dclasses = "exampleblock" + | "alert" `elem` dclasses = "alertblock" + | otherwise = "block" ref <- toLabel identifier let anchor = if T.null identifier then empty @@ -298,8 +303,8 @@ blockToLaTeX (Div attr@(identifier,"block":_,_) (Header _ _ ils : bs)) = do braces (literal ref) <> braces empty title' <- inlineListToLaTeX ils contents <- blockListToLaTeX bs - wrapDiv attr $ ("\\begin{block}" <> braces title' <> anchor) $$ - contents $$ "\\end{block}" + wrapDiv attr $ ("\\begin" <> braces blockname <> braces title' <> anchor) $$ + contents $$ "\\end" <> braces blockname blockToLaTeX (Div (identifier,"slide":dclasses,dkvs) (Header _ (_,hclasses,hkvs) ils : bs)) = do -- note: [fragile] is required or verbatim breaks diff --git a/test/command/7278.md b/test/command/7278.md new file mode 100644 index 000000000..dcf71c29f --- /dev/null +++ b/test/command/7278.md @@ -0,0 +1,38 @@ +``` +% pandoc -t beamer +# Slide + +Some blocks: + +## example block title {.example} + +text in block + +## alert block title {.alert} + +text in block + +## block title + +text in block +^D +\begin{frame}{Slide} +\protect\hypertarget{slide}{} +Some blocks: + +\begin{exampleblock}{example block title} +\protect\hypertarget{example-block-title}{} +text in block +\end{exampleblock} + +\begin{alertblock}{alert block title} +\protect\hypertarget{alert-block-title}{} +text in block +\end{alertblock} + +\begin{block}{block title} +\protect\hypertarget{block-title}{} +text in block +\end{block} +\end{frame} +``` -- cgit v1.2.3 From 013e4a3164115bf84f3e1964f21b7cd7f020b86a Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Fri, 14 May 2021 21:31:24 +0200 Subject: HTML reader: keep h1 tags as normal headers (#7274) The tags `<title>` and `<h1 class="title">` often contain the same information, so the latter was dropped from the document. However, as this can lead to loss of information, the heading is now always retained. Use `--shift-heading-level-by=-1` to turn the `<h1>` into the document title, or a filter to restore the previous behavior. Closes: #2293 --- src/Text/Pandoc/Readers/HTML.hs | 6 +----- test/html-reader.native | 3 ++- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 0a9d67e35..b305de7b5 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -499,17 +499,13 @@ pHeader = try $ do tagOpen (`elem` ["h1","h2","h3","h4","h5","h6"]) (const True) let attr = toStringAttr attr' - let bodyTitle = TagOpen tagtype attr' ~== TagOpen ("h1" :: Text) - [("class","title")] level <- headerLevel tagtype contents <- trimInlines . mconcat <$> manyTill inline (pCloses tagtype <|> eof) let ident = fromMaybe "" $ lookup "id" attr let classes = maybe [] T.words $ lookup "class" attr let keyvals = [(k,v) | (k,v) <- attr, k /= "class", k /= "id"] attr'' <- registerHeader (ident, classes, keyvals) contents - return $ if bodyTitle - then mempty -- skip a representation of the title in the body - else B.headerWith attr'' level contents + return $ B.headerWith attr'' level contents pHrule :: PandocMonad m => TagParser m Blocks pHrule = do diff --git a/test/html-reader.native b/test/html-reader.native index 04ec55d1e..880561a93 100644 --- a/test/html-reader.native +++ b/test/html-reader.native @@ -1,5 +1,6 @@ Pandoc (Meta {unMeta = fromList [("generator",MetaInlines [Str "pandoc"]),("title",MetaInlines [Str "Pandoc",Space,Str "Test",Space,Str "Suite"])]}) -[Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber's",Space,Str "markdown",Space,Str "test",Space,Str "suite."] +[Header 1 ("pandoc-test-suite",["title"],[]) [Str "Pandoc",Space,Str "Test",Space,Str "Suite"] +,Para [Str "This",Space,Str "is",Space,Str "a",Space,Str "set",Space,Str "of",Space,Str "tests",Space,Str "for",Space,Str "pandoc.",Space,Str "Most",Space,Str "of",Space,Str "them",Space,Str "are",Space,Str "adapted",Space,Str "from",Space,Str "John",Space,Str "Gruber's",Space,Str "markdown",Space,Str "test",Space,Str "suite."] ,HorizontalRule ,Header 1 ("headers",[],[]) [Str "Headers"] ,Header 2 ("level-2-with-an-embedded-link",[],[]) [Str "Level",Space,Str "2",Space,Str "with",Space,Str "an",Space,Link ("",[],[]) [Str "embedded",Space,Str "link"] ("/url","")] -- cgit v1.2.3 From 0794862aacdb5d28616dadc3d681a63c36251212 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Sat, 15 May 2021 16:36:13 +0200 Subject: HTML writer: parse `<header>` as a Div HTML5 `<header>` elements are treated like `<div>` elements. --- src/Text/Pandoc/Readers/HTML.hs | 2 ++ test/Tests/Readers/HTML.hs | 14 +++++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index b305de7b5..0a9e4addf 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -205,6 +205,7 @@ block = ((do | otherwise -> pDiv "section" -> pDiv + "header" -> pDiv "main" -> pDiv "figure" -> pFigure "iframe" -> pIframe @@ -404,6 +405,7 @@ pLineBlock = try $ do isDivLike :: Text -> Bool isDivLike "div" = True isDivLike "section" = True +isDivLike "header" = True isDivLike "main" = True isDivLike _ = False diff --git a/test/Tests/Readers/HTML.hs b/test/Tests/Readers/HTML.hs index f23af2cb1..7f5849991 100644 --- a/test/Tests/Readers/HTML.hs +++ b/test/Tests/Readers/HTML.hs @@ -99,11 +99,15 @@ tests = [ testGroup "base tag" plain (codeWith ("",["sample"],[]) "Answer is 42") ] , testGroup "var" - [ - test html "inline var block" $ - "<var>result</var>" =?> - plain (codeWith ("",["variable"],[]) "result") - ] + [ test html "inline var block" $ + "<var>result</var>" =?> + plain (codeWith ("",["variable"],[]) "result") + ] + , testGroup "header" + [ test htmlNativeDivs "<header> is parsed as a div" $ + "<header id=\"title\">Title</header>" =?> + divWith ("title", mempty, mempty) (plain "Title") + ] , askOption $ \(QuickCheckTests numtests) -> testProperty "Round trip" $ withMaxSuccess (if QuickCheckTests numtests == defaultValue -- cgit v1.2.3 From 0a4c6925b6db433bdb8b9d57c94a7c36be3daea7 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 15 May 2021 15:40:49 -0700 Subject: Docx writer: copy over more settings from referenc.odcx. From settings.xml in the reference-doc, we now include: `zoom`, `embedSystemFonts`, `doNotTrackMoves`, `defaultTabStop`, `drawingGridHorizontalSpacing`, `drawingGridVerticalSpacing`, `displayHorizontalDrawingGridEvery`, `displayVerticalDrawingGridEvery`, `characterSpacingControl`, `savePreviewPicture`, `mathPr`, `themeFontLang`, `decimalSymbol`, `listSeparator`, `autoHyphenation`, `compat`. Closes #7240. --- src/Text/Pandoc/Writers/Docx.hs | 17 ++++++++++++++++- test/docx/golden/block_quotes.docx | Bin 9981 -> 9981 bytes test/docx/golden/codeblock.docx | Bin 9830 -> 9830 bytes test/docx/golden/comments.docx | Bin 10168 -> 10168 bytes test/docx/golden/custom_style_no_reference.docx | Bin 9931 -> 9931 bytes test/docx/golden/custom_style_preserve.docx | Bin 10560 -> 10560 bytes test/docx/golden/custom_style_reference.docx | Bin 12317 -> 12368 bytes test/docx/golden/definition_list.docx | Bin 9830 -> 9830 bytes .../docx/golden/document-properties-short-desc.docx | Bin 9835 -> 9835 bytes test/docx/golden/document-properties.docx | Bin 10314 -> 10314 bytes test/docx/golden/headers.docx | Bin 9969 -> 9969 bytes test/docx/golden/image.docx | Bin 26647 -> 26647 bytes test/docx/golden/inline_code.docx | Bin 9769 -> 9769 bytes test/docx/golden/inline_formatting.docx | Bin 9949 -> 9949 bytes test/docx/golden/inline_images.docx | Bin 26703 -> 26703 bytes test/docx/golden/link_in_notes.docx | Bin 9991 -> 9991 bytes test/docx/golden/links.docx | Bin 10162 -> 10162 bytes test/docx/golden/lists.docx | Bin 10228 -> 10228 bytes test/docx/golden/lists_continuing.docx | Bin 10024 -> 10024 bytes test/docx/golden/lists_multiple_initial.docx | Bin 10106 -> 10106 bytes test/docx/golden/lists_restarting.docx | Bin 10022 -> 10022 bytes test/docx/golden/nested_anchors_in_header.docx | Bin 10126 -> 10126 bytes test/docx/golden/notes.docx | Bin 9938 -> 9938 bytes test/docx/golden/raw-blocks.docx | Bin 9870 -> 9870 bytes test/docx/golden/raw-bookmarks.docx | Bin 10004 -> 10004 bytes test/docx/golden/table_one_row.docx | Bin 9840 -> 9840 bytes test/docx/golden/table_with_list_cell.docx | Bin 10159 -> 10159 bytes test/docx/golden/tables.docx | Bin 10200 -> 10200 bytes test/docx/golden/track_changes_deletion.docx | Bin 9813 -> 9813 bytes test/docx/golden/track_changes_insertion.docx | Bin 9796 -> 9796 bytes test/docx/golden/track_changes_move.docx | Bin 9830 -> 9830 bytes .../golden/track_changes_scrubbed_metadata.docx | Bin 9942 -> 9942 bytes test/docx/golden/unicode.docx | Bin 9755 -> 9755 bytes test/docx/golden/verbatim_subsuper.docx | Bin 9802 -> 9802 bytes 34 files changed, 16 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index 75bed1595..e7a49ba02 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -469,12 +469,27 @@ writeDocx opts doc = do -- adds references to footnotes or endnotes we don't have... -- we do, however, copy some settings over from reference let settingsPath = "word/settings.xml" - settingsList = [ "autoHyphenation" + settingsList = [ "zoom" + , "embedSystemFonts" + , "doNotTrackMoves" + , "defaultTabStop" + , "drawingGridHorizontalSpacing" + , "drawingGridVerticalSpacing" + , "displayHorizontalDrawingGridEvery" + , "displayVerticalDrawingGridEvery" + , "characterSpacingControl" + , "savePreviewPicture" + , "mathPr" + , "themeFontLang" + , "decimalSymbol" + , "listSeparator" + , "autoHyphenation" , "consecutiveHyphenLimit" , "hyphenationZone" , "doNotHyphenateCap" , "evenAndOddHeaders" , "proofState" + , "compat" ] settingsEntry <- copyChildren refArchive distArchive settingsPath epochtime settingsList diff --git a/test/docx/golden/block_quotes.docx b/test/docx/golden/block_quotes.docx index f5e4b6428..d05020f82 100644 Binary files a/test/docx/golden/block_quotes.docx and b/test/docx/golden/block_quotes.docx differ diff --git a/test/docx/golden/codeblock.docx b/test/docx/golden/codeblock.docx index 0bbe42bd7..616c9b1d0 100644 Binary files a/test/docx/golden/codeblock.docx and b/test/docx/golden/codeblock.docx differ diff --git a/test/docx/golden/comments.docx b/test/docx/golden/comments.docx index 6b759b522..aa200c8d6 100644 Binary files a/test/docx/golden/comments.docx and b/test/docx/golden/comments.docx differ diff --git a/test/docx/golden/custom_style_no_reference.docx b/test/docx/golden/custom_style_no_reference.docx index 74e30f651..49cf42f38 100644 Binary files a/test/docx/golden/custom_style_no_reference.docx and b/test/docx/golden/custom_style_no_reference.docx differ diff --git a/test/docx/golden/custom_style_preserve.docx b/test/docx/golden/custom_style_preserve.docx index 7ee99c87b..e24940478 100644 Binary files a/test/docx/golden/custom_style_preserve.docx and b/test/docx/golden/custom_style_preserve.docx differ diff --git a/test/docx/golden/custom_style_reference.docx b/test/docx/golden/custom_style_reference.docx index dfaf16e2b..e7da8f06d 100644 Binary files a/test/docx/golden/custom_style_reference.docx and b/test/docx/golden/custom_style_reference.docx differ diff --git a/test/docx/golden/definition_list.docx b/test/docx/golden/definition_list.docx index 02992c6bd..df148cfed 100644 Binary files a/test/docx/golden/definition_list.docx and b/test/docx/golden/definition_list.docx differ diff --git a/test/docx/golden/document-properties-short-desc.docx b/test/docx/golden/document-properties-short-desc.docx index 9bfe56dca..2d9e96b15 100644 Binary files a/test/docx/golden/document-properties-short-desc.docx and b/test/docx/golden/document-properties-short-desc.docx differ diff --git a/test/docx/golden/document-properties.docx b/test/docx/golden/document-properties.docx index 2a37045f2..d8f091956 100644 Binary files a/test/docx/golden/document-properties.docx and b/test/docx/golden/document-properties.docx differ diff --git a/test/docx/golden/headers.docx b/test/docx/golden/headers.docx index ca2c7a261..8c1b3e870 100644 Binary files a/test/docx/golden/headers.docx and b/test/docx/golden/headers.docx differ diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx index c4447d021..48b72e283 100644 Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ diff --git a/test/docx/golden/inline_code.docx b/test/docx/golden/inline_code.docx index 1824c0aae..048ac8f15 100644 Binary files a/test/docx/golden/inline_code.docx and b/test/docx/golden/inline_code.docx differ diff --git a/test/docx/golden/inline_formatting.docx b/test/docx/golden/inline_formatting.docx index 82565a5b1..cf1301c4b 100644 Binary files a/test/docx/golden/inline_formatting.docx and b/test/docx/golden/inline_formatting.docx differ diff --git a/test/docx/golden/inline_images.docx b/test/docx/golden/inline_images.docx index 0416bcc15..662e70556 100644 Binary files a/test/docx/golden/inline_images.docx and b/test/docx/golden/inline_images.docx differ diff --git a/test/docx/golden/link_in_notes.docx b/test/docx/golden/link_in_notes.docx index b51ec5220..d800a5fb4 100644 Binary files a/test/docx/golden/link_in_notes.docx and b/test/docx/golden/link_in_notes.docx differ diff --git a/test/docx/golden/links.docx b/test/docx/golden/links.docx index ba0d100c8..bffdbbaf8 100644 Binary files a/test/docx/golden/links.docx and b/test/docx/golden/links.docx differ diff --git a/test/docx/golden/lists.docx b/test/docx/golden/lists.docx index 9632c598b..2b201df28 100644 Binary files a/test/docx/golden/lists.docx and b/test/docx/golden/lists.docx differ diff --git a/test/docx/golden/lists_continuing.docx b/test/docx/golden/lists_continuing.docx index 3269a0528..257053a78 100644 Binary files a/test/docx/golden/lists_continuing.docx and b/test/docx/golden/lists_continuing.docx differ diff --git a/test/docx/golden/lists_multiple_initial.docx b/test/docx/golden/lists_multiple_initial.docx index 716129170..0a3bf1016 100644 Binary files a/test/docx/golden/lists_multiple_initial.docx and b/test/docx/golden/lists_multiple_initial.docx differ diff --git a/test/docx/golden/lists_restarting.docx b/test/docx/golden/lists_restarting.docx index af6edfe86..0aa69805f 100644 Binary files a/test/docx/golden/lists_restarting.docx and b/test/docx/golden/lists_restarting.docx differ diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx index f141425f7..88dd21abd 100644 Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ diff --git a/test/docx/golden/notes.docx b/test/docx/golden/notes.docx index 93b4222f6..f02d5951c 100644 Binary files a/test/docx/golden/notes.docx and b/test/docx/golden/notes.docx differ diff --git a/test/docx/golden/raw-blocks.docx b/test/docx/golden/raw-blocks.docx index 9d1aa9853..58a101b3f 100644 Binary files a/test/docx/golden/raw-blocks.docx and b/test/docx/golden/raw-blocks.docx differ diff --git a/test/docx/golden/raw-bookmarks.docx b/test/docx/golden/raw-bookmarks.docx index b57289fdd..484c363a2 100644 Binary files a/test/docx/golden/raw-bookmarks.docx and b/test/docx/golden/raw-bookmarks.docx differ diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx index edb23cc72..f75e567ab 100644 Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx index f9cbed156..a49f70643 100644 Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx index b585c803d..f24e27516 100644 Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ diff --git a/test/docx/golden/track_changes_deletion.docx b/test/docx/golden/track_changes_deletion.docx index 313942750..de7c44bf4 100644 Binary files a/test/docx/golden/track_changes_deletion.docx and b/test/docx/golden/track_changes_deletion.docx differ diff --git a/test/docx/golden/track_changes_insertion.docx b/test/docx/golden/track_changes_insertion.docx index f18b3f85f..958533459 100644 Binary files a/test/docx/golden/track_changes_insertion.docx and b/test/docx/golden/track_changes_insertion.docx differ diff --git a/test/docx/golden/track_changes_move.docx b/test/docx/golden/track_changes_move.docx index 50bdab767..04fa05062 100644 Binary files a/test/docx/golden/track_changes_move.docx and b/test/docx/golden/track_changes_move.docx differ diff --git a/test/docx/golden/track_changes_scrubbed_metadata.docx b/test/docx/golden/track_changes_scrubbed_metadata.docx index 230b27006..ef2dc96f8 100644 Binary files a/test/docx/golden/track_changes_scrubbed_metadata.docx and b/test/docx/golden/track_changes_scrubbed_metadata.docx differ diff --git a/test/docx/golden/unicode.docx b/test/docx/golden/unicode.docx index 627276e4b..a9de2b367 100644 Binary files a/test/docx/golden/unicode.docx and b/test/docx/golden/unicode.docx differ diff --git a/test/docx/golden/verbatim_subsuper.docx b/test/docx/golden/verbatim_subsuper.docx index 790278108..3ebadc59d 100644 Binary files a/test/docx/golden/verbatim_subsuper.docx and b/test/docx/golden/verbatim_subsuper.docx differ -- cgit v1.2.3 From 39a69c4f93d46e059c48a740b69dfff219a5d715 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 16 May 2021 11:53:19 -0700 Subject: Markdown writer: improve escaping of `@`. We need to escape literal `@` before `{` because of the new citation syntax. --- src/Text/Pandoc/Writers/Markdown/Inline.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Markdown/Inline.hs b/src/Text/Pandoc/Writers/Markdown/Inline.hs index 2062050e4..ced5fbacb 100644 --- a/src/Text/Pandoc/Writers/Markdown/Inline.hs +++ b/src/Text/Pandoc/Writers/Markdown/Inline.hs @@ -56,7 +56,7 @@ escapeText opts = T.pack . go . T.unpack '@' | isEnabled Ext_citations opts -> case cs of (d:_) - | isAlphaNum d || d == '_' + | isAlphaNum d || d == '_' || d == '{' -> '\\':'@':go cs _ -> '@':go cs _ | c `elem` ['\\','`','*','_','[',']','#'] -> -- cgit v1.2.3 From 5a6399d9f62c4306fa073ae1311675158dd6a203 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 16 May 2021 12:23:34 -0700 Subject: Markdown writer: fewer unneeded escapes for `#`. See #6259. --- src/Text/Pandoc/Writers/Markdown/Inline.hs | 13 +++++++++++-- test/command/3792.md | 2 +- test/command/4164.md | 2 +- test/command/7208.md | 2 +- test/command/biblatex-sigfridsson.md | 2 +- test/writer.opml | 2 +- 6 files changed, 16 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Markdown/Inline.hs b/src/Text/Pandoc/Writers/Markdown/Inline.hs index ced5fbacb..e6c6da5a9 100644 --- a/src/Text/Pandoc/Writers/Markdown/Inline.hs +++ b/src/Text/Pandoc/Writers/Markdown/Inline.hs @@ -43,7 +43,11 @@ import Text.Pandoc.Writers.Markdown.Types (MarkdownVariant(..), -- | Escape special characters for Markdown. escapeText :: WriterOptions -> Text -> Text escapeText opts = T.pack . go . T.unpack - where + where + startsWithSpace (' ':_) = True + startsWithSpace ('\t':_) = True + startsWithSpace [] = True + startsWithSpace _ = False go [] = [] go (c:cs) = case c of @@ -59,7 +63,10 @@ escapeText opts = T.pack . go . T.unpack | isAlphaNum d || d == '_' || d == '{' -> '\\':'@':go cs _ -> '@':go cs - _ | c `elem` ['\\','`','*','_','[',']','#'] -> + '#' | isEnabled Ext_space_in_atx_header opts + , startsWithSpace cs + -> '\\':'#':go cs + _ | c `elem` ['\\','`','*','_','[',']'] -> '\\':c:go cs '|' | isEnabled Ext_pipe_tables opts -> '\\':'|':go cs '^' | isEnabled Ext_superscript opts -> '\\':'^':go cs @@ -81,6 +88,8 @@ escapeText opts = T.pack . go . T.unpack | isEnabled Ext_intraword_underscores opts , isAlphaNum c , isAlphaNum x -> c : '_' : x : go xs + '#':xs -> c : '#' : go xs + '>':xs -> c : '>' : go xs _ -> c : go cs attrsToMarkdown :: Attr -> Doc Text diff --git a/test/command/3792.md b/test/command/3792.md index eb109b9cc..eff26d517 100644 --- a/test/command/3792.md +++ b/test/command/3792.md @@ -6,7 +6,7 @@ and properly escaped. ok ^D --- -title: \<this\> \*that\* +title: \<this> \*that\* --- ok diff --git a/test/command/4164.md b/test/command/4164.md index 4e7b7e285..68cbd0584 100644 --- a/test/command/4164.md +++ b/test/command/4164.md @@ -26,6 +26,6 @@ Here is inline html: Here is inline html: -\<div\> \<balise\> bla bla \</div\> +\<div> \<balise> bla bla \</div> ``` diff --git a/test/command/7208.md b/test/command/7208.md index e65943ade..fe02ec32e 100644 --- a/test/command/7208.md +++ b/test/command/7208.md @@ -2,5 +2,5 @@ % pandoc -t gfm \<hi\> ^D -\<hi\> +\<hi> ``` diff --git a/test/command/biblatex-sigfridsson.md b/test/command/biblatex-sigfridsson.md index f83c35622..e042a8762 100644 --- a/test/command/biblatex-sigfridsson.md +++ b/test/command/biblatex-sigfridsson.md @@ -90,7 +90,7 @@ references: - family: Ryde given: Ulf container-title: Journal of Computational Chemistry - doi: "10.1002/(SICI)1096-987X(199803)19:4\\<377::AID-JCC1\\>3.0.CO;2-P" + doi: "10.1002/(SICI)1096-987X(199803)19:4\\<377::AID-JCC1>3.0.CO;2-P" id: sigfridsson issue: 4 issued: 1998 diff --git a/test/writer.opml b/test/writer.opml index 6bdcb882e..bfe1e5de6 100644 --- a/test/writer.opml +++ b/test/writer.opml @@ -52,7 +52,7 @@ </outline> <outline text="LaTeX" _note="- - 2 + 2 = 4 - *x* ∈ *y* - *α* ∧ *ω* - 223 - *p*-Tree - Here’s some display math: $$\\frac{d}{dx}f(x)=\\lim\_{h\\to 0}\\frac{f(x+h)-f(x)}{h}$$ - Here’s one that has a line break in it: *α* + *ω* × *x*². These shouldn’t be math: - To get the famous equation, write `$e = mc^2$`. - $22,000 is a *lot* of money. So is $34,000. (It worked if “lot” is emphasized.) - Shoes ($20) and socks ($5). - Escaped `$`: $73 *this should be emphasized* 23$. Here’s a LaTeX table: ------------------------------------------------------------------------"> </outline> -<outline text="Special Characters" _note="Here is some unicode: - I hat: Î - o umlaut: ö - section: § - set membership: ∈ - copyright: © AT&T has an ampersand in their name. AT&T is another way to write it. This & that. 4 &lt; 5. 6 &gt; 5. Backslash: \\ Backtick: \` Asterisk: \* Underscore: \_ Left brace: { Right brace: } Left bracket: \[ Right bracket: \] Left paren: ( Right paren: ) Greater-than: &gt; Hash: \# Period: . Bang: ! Plus: + Minus: - ------------------------------------------------------------------------"> +<outline text="Special Characters" _note="Here is some unicode: - I hat: Î - o umlaut: ö - section: § - set membership: ∈ - copyright: © AT&T has an ampersand in their name. AT&T is another way to write it. This & that. 4 &lt; 5. 6 &gt; 5. Backslash: \\ Backtick: \` Asterisk: \* Underscore: \_ Left brace: { Right brace: } Left bracket: \[ Right bracket: \] Left paren: ( Right paren: ) Greater-than: &gt; Hash: # Period: . Bang: ! Plus: + Minus: - ------------------------------------------------------------------------"> </outline> <outline text="Links"> <outline text="Explicit" _note="Just a [URL](/url/). [URL and title](/url/ "title"). [URL and title](/url/ "title preceded by two spaces"). [URL and title](/url/ "title preceded by a tab"). [URL and title](/url/ "title with "quotes" in it") [URL and title](/url/ "title with single quotes") [with\_underscore](/url/with_underscore) [Email link](mailto:nobody@nowhere.net) [Empty]()."> -- cgit v1.2.3 From d92622ba3cae5ced69a256472d367a53fc5878a1 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Sun, 16 May 2021 21:33:32 +0200 Subject: LaTeX template: define commands for zero width non-joiner character Closes: #6639 The zero-width non-joiner character is used to avoid ligatures (e.g. in German). --- data/templates/default.latex | 19 +++++++++++++++++++ src/Text/Pandoc/Writers/LaTeX.hs | 3 +-- src/Text/Pandoc/Writers/LaTeX/Types.hs | 3 ++- src/Text/Pandoc/Writers/LaTeX/Util.hs | 7 ++++--- 4 files changed, 26 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/data/templates/default.latex b/data/templates/default.latex index 142fe3e55..04784b971 100644 --- a/data/templates/default.latex +++ b/data/templates/default.latex @@ -156,6 +156,25 @@ $if(CJKmainfont)$ \fi $endif$ \fi +$if(zero-width-non-joiner)$ +%% Support for zero-width non-joiner characters. +\makeatletter +\def\zerowidthnonjoiner{% + % Prevent ligatures and adjust kerning, but still support hyphenating. + \texorpdfstring{% + \textormath{\nobreak\discretionary{-}{}{\kern.03em}% + \ifvmode\else\nobreak\hskip\z@skip\fi}{}% + }{}% +} +\makeatother +\ifPDFTeX + \DeclareUnicodeCharacter{200C}{\zerowidthnonjoiner} +\else + \catcode`^^^^200c=\active + \protected\def ^^^^200c{\zerowidthnonjoiner} +\fi +%% End of ZWNJ support +$endif$ $if(beamer)$ $if(theme)$ \usetheme[$for(themeoptions)$$themeoptions$$sep$,$endfor$]{$theme$} diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index d8722876e..bf57937bd 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -173,6 +173,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do defField "has-chapters" (stHasChapters st) $ defField "has-frontmatter" (documentClass `elem` frontmatterClasses) $ defField "listings" (writerListings options || stLHS st) $ + defField "zero-width-non-joiner" (stZwnj st) $ defField "beamer" beamer $ (if stHighlighting st then case writerHighlightStyle options of @@ -1048,5 +1049,3 @@ extractInline _ _ = [] -- Look up a key in an attribute and give a list of its values lookKey :: Text -> Attr -> [Text] lookKey key (_,_,kvs) = maybe [] T.words $ lookup key kvs - - diff --git a/src/Text/Pandoc/Writers/LaTeX/Types.hs b/src/Text/Pandoc/Writers/LaTeX/Types.hs index d598794ad..c06b7e923 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Types.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Types.hs @@ -40,7 +40,7 @@ data WriterState = , stCsquotes :: Bool -- ^ true if document uses csquotes , stHighlighting :: Bool -- ^ true if document has highlighted code , stIncremental :: Bool -- ^ true if beamer lists should be - -- displayed bit by bit + , stZwnj :: Bool -- ^ true if document has a ZWNJ character , stInternalLinks :: [Text] -- ^ list of internal link targets , stBeamer :: Bool -- ^ produce beamer , stEmptyLine :: Bool -- ^ true if no content on line @@ -74,6 +74,7 @@ startingState options = , stCsquotes = False , stHighlighting = False , stIncremental = writerIncremental options + , stZwnj = False , stInternalLinks = [] , stBeamer = False , stEmptyLine = True diff --git a/src/Text/Pandoc/Writers/LaTeX/Util.hs b/src/Text/Pandoc/Writers/LaTeX/Util.hs index 56bb792ae..c34338121 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Util.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Util.hs @@ -22,6 +22,7 @@ module Text.Pandoc.Writers.LaTeX.Util ( where import Control.Applicative ((<|>)) +import Control.Monad (when) import Text.Pandoc.Class (PandocMonad, toLang) import Text.Pandoc.Options (WriterOptions(..), isEnabled) import Text.Pandoc.Writers.LaTeX.Types (LW, WriterState(..)) @@ -30,7 +31,7 @@ import Text.Pandoc.Highlighting (toListingsLanguage) import Text.DocLayout import Text.Pandoc.Definition import Text.Pandoc.ImageSize (showFl) -import Control.Monad.State.Strict (gets) +import Control.Monad.State.Strict (gets, modify) import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.Extensions (Extension(Ext_smart)) @@ -49,6 +50,8 @@ data StringContext = TextString stringToLaTeX :: PandocMonad m => StringContext -> Text -> LW m Text stringToLaTeX context zs = do opts <- gets stOptions + when ('\x200c' `elemText` zs) $ + modify (\s -> s { stZwnj = True }) return $ T.pack $ foldr (go opts context) mempty $ T.unpack $ if writerPreferAscii opts @@ -270,5 +273,3 @@ mbBraced :: Text -> Text mbBraced x = if not (T.all isAlphaNum x) then "{" <> x <> "}" else x - - -- cgit v1.2.3 From 4417dacc440e269c5a4db1d67b52da9e0fe05561 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Mon, 17 May 2021 13:00:38 +0200 Subject: ConTeXt writer: use span identifiers as reference anchors. Closes: #7246 --- src/Text/Pandoc/Writers/ConTeXt.hs | 8 ++++++-- test/Tests/Writers/ConTeXt.hs | 3 +++ 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index b694437d8..57d752a67 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -487,7 +487,7 @@ inlineToConTeXt (Note contents) = do then literal "\\footnote{" <> nest 2 (chomp contents') <> char '}' else literal "\\startbuffer " <> nest 2 (chomp contents') <> literal "\\stopbuffer\\footnote{\\getbuffer}" -inlineToConTeXt (Span (_,_,kvs) ils) = do +inlineToConTeXt (Span (ident,_,kvs) ils) = do mblang <- fromBCP47 (lookup "lang" kvs) let wrapDir txt = case lookup "dir" kvs of Just "rtl" -> braces $ "\\righttoleft " <> txt @@ -497,7 +497,11 @@ inlineToConTeXt (Span (_,_,kvs) ils) = do Just lng -> braces ("\\language" <> brackets (literal lng) <> txt) Nothing -> txt - wrapLang . wrapDir <$> inlineListToConTeXt ils + addReference = + if T.null ident + then id + else (("\\reference" <> brackets (literal ident) <> "{}") <>) + addReference . wrapLang . wrapDir <$> inlineListToConTeXt ils -- | Craft the section header, inserting the section reference, if supplied. sectionHeader :: PandocMonad m diff --git a/test/Tests/Writers/ConTeXt.hs b/test/Tests/Writers/ConTeXt.hs index a4aa69dcd..fbbf9b948 100644 --- a/test/Tests/Writers/ConTeXt.hs +++ b/test/Tests/Writers/ConTeXt.hs @@ -43,6 +43,9 @@ tests = [ testGroup "inline code" [ "with '}'" =: code "}" =?> "\\mono{\\}}" , "without '}'" =: code "]" =?> "\\type{]}" + , "span with ID" =: + spanWith ("city", [], []) "Berlin" =?> + "\\reference[city]{}Berlin" , testProperty "code property" $ \s -> null s || '\n' `elem` s || if '{' `elem` s || '}' `elem` s then context' (code $ pack s) == "\\mono{" ++ -- cgit v1.2.3 From 25f5b927773eb730c2d5ef834bd61e1d2d5f09df Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Mon, 17 May 2021 15:37:25 +0200 Subject: HTML writer: ensure headings only have valid attribs in HTML4 Fixes: #5944 --- src/Text/Pandoc/Writers/HTML.hs | 16 +++++- test/Tests/Writers/HTML.hs | 109 +++++++++++++++++++++------------------- 2 files changed, 71 insertions(+), 54 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 332de1545..f7a387927 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -882,7 +882,7 @@ blockToHtml opts (BlockQuote blocks) = do else do contents <- blockListToHtml opts blocks return $ H.blockquote $ nl opts >> contents >> nl opts -blockToHtml opts (Header level attr@(_,classes,kvs) lst) = do +blockToHtml opts (Header level (ident,classes,kvs) lst) = do contents <- inlineListToHtml opts lst let secnum = fromMaybe mempty $ lookup "number" kvs let contents' = if writerNumberSections opts && not (T.null secnum) @@ -890,7 +890,13 @@ blockToHtml opts (Header level attr@(_,classes,kvs) lst) = do then (H.span ! A.class_ "header-section-number" $ toHtml secnum) >> strToHtml " " >> contents else contents - addAttrs opts attr + html5 <- gets stHtml5 + let kvs' = if html5 + then kvs + else [ (k, v) | (k, v) <- kvs + , k `elem` (["lang", "dir", "title", "style" + , "align"] ++ intrinsicEventsHTML4)] + addAttrs opts (ident,classes,kvs') $ case level of 1 -> H.h1 contents' 2 -> H.h2 contents' @@ -1526,6 +1532,12 @@ allowsMathEnvironments MathML = True allowsMathEnvironments (WebTeX _) = True allowsMathEnvironments _ = False +-- | List of intrinsic event attributes allowed on all elements in HTML4. +intrinsicEventsHTML4 :: [Text] +intrinsicEventsHTML4 = + [ "onclick", "ondblclick", "onmousedown", "onmouseup", "onmouseover" + , "onmouseout", "onmouseout", "onkeypress", "onkeydown", "onkeyup"] + isRawHtml :: PandocMonad m => Format -> StateT WriterState m Bool isRawHtml f = do html5 <- gets stHtml5 diff --git a/test/Tests/Writers/HTML.hs b/test/Tests/Writers/HTML.hs index 328801e31..404f6da98 100644 --- a/test/Tests/Writers/HTML.hs +++ b/test/Tests/Writers/HTML.hs @@ -34,55 +34,60 @@ infix 4 =: (=:) = test html tests :: [TestTree] -tests = [ testGroup "inline code" - [ "basic" =: code "@&" =?> "<code>@&</code>" - , "haskell" =: codeWith ("",["haskell"],[]) ">>=" - =?> "<code class=\"sourceCode haskell\"><span class=\"op\">>>=</span></code>" - , "nolanguage" =: codeWith ("",["nolanguage"],[]) ">>=" - =?> "<code class=\"nolanguage\">>>=</code>" - ] - , testGroup "images" - [ "alt with formatting" =: - image "/url" "title" ("my " <> emph "image") - =?> "<img src=\"/url\" title=\"title\" alt=\"my image\" />" - ] - , testGroup "blocks" - [ "definition list with empty <dt>" =: - definitionList [(mempty, [para $ text "foo bar"])] - =?> "<dl><dt></dt><dd><p>foo bar</p></dd></dl>" - ] - , testGroup "quotes" - [ "quote with cite attribute (without q-tags)" =: - doubleQuoted (spanWith ("", [], [("cite", "http://example.org")]) (str "examples")) - =?> "“<span cite=\"http://example.org\">examples</span>”" - , tQ "quote with cite attribute (with q-tags)" $ - doubleQuoted (spanWith ("", [], [("cite", "http://example.org")]) (str "examples")) - =?> "<q cite=\"http://example.org\">examples</q>" - ] - , testGroup "sample" - [ "sample should be rendered correctly" =: - plain (codeWith ("",["sample"],[]) "Answer is 42") =?> - "<samp>Answer is 42</samp>" - ] - , testGroup "variable" - [ "variable should be rendered correctly" =: - plain (codeWith ("",["variable"],[]) "result") =?> - "<var>result</var>" - ] - , testGroup "sample with style" - [ "samp should wrap highlighted code" =: - codeWith ("",["sample","haskell"],[]) ">>=" - =?> ("<samp><code class=\"sourceCode haskell\">" ++ - "<span class=\"op\">>>=</span></code></samp>") - ] - , testGroup "variable with style" - [ "var should wrap highlighted code" =: - codeWith ("",["haskell","variable"],[]) ">>=" - =?> ("<var><code class=\"sourceCode haskell\">" ++ - "<span class=\"op\">>>=</span></code></var>") - ] - ] - where - tQ :: (ToString a, ToPandoc a) - => String -> (a, String) -> TestTree - tQ = test htmlQTags +tests = + [ testGroup "inline code" + [ "basic" =: code "@&" =?> "<code>@&</code>" + , "haskell" =: codeWith ("",["haskell"],[]) ">>=" + =?> "<code class=\"sourceCode haskell\"><span class=\"op\">>>=</span></code>" + , "nolanguage" =: codeWith ("",["nolanguage"],[]) ">>=" + =?> "<code class=\"nolanguage\">>>=</code>" + ] + , testGroup "images" + [ "alt with formatting" =: + image "/url" "title" ("my " <> emph "image") + =?> "<img src=\"/url\" title=\"title\" alt=\"my image\" />" + ] + , testGroup "blocks" + [ "definition list with empty <dt>" =: + definitionList [(mempty, [para $ text "foo bar"])] + =?> "<dl><dt></dt><dd><p>foo bar</p></dd></dl>" + , "heading with disallowed attributes" =: + headerWith ("", [], [("invalid","1"), ("lang", "en")]) 1 "test" + =?> + "<h1 lang=\"en\">test</h1>" + ] + , testGroup "quotes" + [ "quote with cite attribute (without q-tags)" =: + doubleQuoted (spanWith ("", [], [("cite", "http://example.org")]) (str "examples")) + =?> "“<span cite=\"http://example.org\">examples</span>”" + , tQ "quote with cite attribute (with q-tags)" $ + doubleQuoted (spanWith ("", [], [("cite", "http://example.org")]) (str "examples")) + =?> "<q cite=\"http://example.org\">examples</q>" + ] + , testGroup "sample" + [ "sample should be rendered correctly" =: + plain (codeWith ("",["sample"],[]) "Answer is 42") =?> + "<samp>Answer is 42</samp>" + ] + , testGroup "variable" + [ "variable should be rendered correctly" =: + plain (codeWith ("",["variable"],[]) "result") =?> + "<var>result</var>" + ] + , testGroup "sample with style" + [ "samp should wrap highlighted code" =: + codeWith ("",["sample","haskell"],[]) ">>=" + =?> ("<samp><code class=\"sourceCode haskell\">" ++ + "<span class=\"op\">>>=</span></code></samp>") + ] + , testGroup "variable with style" + [ "var should wrap highlighted code" =: + codeWith ("",["haskell","variable"],[]) ">>=" + =?> ("<var><code class=\"sourceCode haskell\">" ++ + "<span class=\"op\">>>=</span></code></var>") + ] + ] + where + tQ :: (ToString a, ToPandoc a) + => String -> (a, String) -> TestTree + tQ = test htmlQTags -- cgit v1.2.3 From 1843a8793a9043a45c8c427b06f100461889d7ef Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Mon, 17 May 2021 18:08:02 +0200 Subject: HTML writer: keep attributes from code nested below pre tag. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a code block is defined with `<pre><code class="language-x">…</code></pre>`, where the `<pre>` element has no attributes, then the attributes from the `<code>` element are used instead. Any leading `language-` prefix is dropped in the code's *class* attribute are dropped to improve syntax highlighting. Closes: #7221 --- src/Text/Pandoc/Readers/HTML.hs | 13 ++++++++++++- test/Tests/Readers/HTML.hs | 11 +++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 0a9e4addf..fc4575f2d 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -557,7 +557,18 @@ pFigure = try $ do pCodeBlock :: PandocMonad m => TagParser m Blocks pCodeBlock = try $ do TagOpen _ attr' <- pSatisfy (matchTagOpen "pre" []) - let attr = toAttr attr' + -- if the `pre` has no attributes, try if it is followed by a `code` + -- element and use those attributes if possible. + attr <- case attr' of + _:_ -> pure (toAttr attr') + [] -> option nullAttr $ do + TagOpen _ codeAttr <- pSatisfy (matchTagOpen "code" []) + pure $ toAttr + [ (k, v') | (k, v) <- codeAttr + -- strip language from class + , let v' = if k == "class" + then fromMaybe v (T.stripPrefix "language-" v) + else v ] contents <- manyTill pAny (pCloses "pre" <|> eof) let rawText = T.concat $ map tagToText contents -- drop leading newline if any diff --git a/test/Tests/Readers/HTML.hs b/test/Tests/Readers/HTML.hs index 7f5849991..9bf567194 100644 --- a/test/Tests/Readers/HTML.hs +++ b/test/Tests/Readers/HTML.hs @@ -108,6 +108,17 @@ tests = [ testGroup "base tag" "<header id=\"title\">Title</header>" =?> divWith ("title", mempty, mempty) (plain "Title") ] + , testGroup "code block" + [ test html "attributes in pre > code element" $ + "<pre><code id=\"a\" class=\"python\">\nprint('hi')\n</code></pre>" + =?> + codeBlockWith ("a", ["python"], []) "print('hi')" + + , test html "attributes in pre take precendence" $ + "<pre id=\"c\"><code id=\"d\">\nprint('hi mom!')\n</code></pre>" + =?> + codeBlockWith ("c", [], []) "print('hi mom!')" + ] , askOption $ \(QuickCheckTests numtests) -> testProperty "Round trip" $ withMaxSuccess (if QuickCheckTests numtests == defaultValue -- cgit v1.2.3 From 56fb4dae1ba2f6c28f561964249b89385d482f53 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 17 May 2021 20:42:43 -0700 Subject: Citeproc: ensure that CSL-related attributes are passed on... ...to a Div with id 'refs'. Previously we just left the attributes of such a Div alone, which meant that style options like entry-spacing had no effect there. --- src/Text/Pandoc/Citeproc.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index 2f4936fa6..de63aed1f 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -499,7 +499,7 @@ insertRefs refkvs refclasses meta refs bs = put True -- refHeader isn't used if you have an explicit references div let cs' = ordNub $ cs ++ refclasses - return $ Div ("refs",cs',kvs) (xs ++ refs) + return $ Div ("refs",cs' ++ refclasses,kvs ++ refkvs) (xs ++ refs) go x = return x refTitle :: Meta -> Maybe [Inline] -- cgit v1.2.3 From eb3dff148e67e84362632e63848d40ba808940f4 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Tue, 18 May 2021 22:55:47 +0200 Subject: LaTeX writer: separate successive quote chars with thin space Successive quote characters are separated with a thin space to improve readability and to prevent unwanted ligatures. Detection of these quotes sometimes had failed if the second quote was nested in a span element. Closes: #6958 --- src/Text/Pandoc/Writers/LaTeX.hs | 5 +++-- test/command/6958.md | 10 ++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 test/command/6958.md (limited to 'src') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index bf57937bd..978f94ea0 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -894,8 +894,9 @@ inlineToLaTeX (Quoted qt lst) = do then char '`' <> inner <> char '\'' else char '\x2018' <> inner <> char '\x2019' where - isQuoted (Quoted _ _) = True - isQuoted _ = False + isQuoted (Span _ (x:_)) = isQuoted x + isQuoted (Quoted _ _) = True + isQuoted _ = False inlineToLaTeX (Str str) = do setEmptyLine False liftM literal $ stringToLaTeX TextString str diff --git a/test/command/6958.md b/test/command/6958.md new file mode 100644 index 000000000..230371d7d --- /dev/null +++ b/test/command/6958.md @@ -0,0 +1,10 @@ +Add thin space between single and double quotes. +``` +% pandoc -t latex+smart +--- +lang: en-GB +--- +'["On the Outside"]{}: Constructing Cycling Citizenship.' +^D +`\,{``On the Outside''}: Constructing Cycling Citizenship.' +``` -- cgit v1.2.3 From ddbd984a0d8ea7e75f78ad6632fe3568e2390deb Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 17 May 2021 09:31:52 -0700 Subject: Text.Pandoc.MediaBag: change type to use a Text key... instead of `[FilePath]`. We normalize the path and use `/` separators for consistency. --- src/Text/Pandoc/Class/PandocMonad.hs | 1 + src/Text/Pandoc/MediaBag.hs | 20 +++++++++++++------- 2 files changed, 14 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index 7559cd7cd..226194503 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -1,4 +1,5 @@ {-# LANGUAGE CPP #-} +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index 3249bcdeb..4a9b4efa1 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -1,4 +1,5 @@ {-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {- | @@ -27,26 +28,31 @@ import qualified Data.Map as M import Data.Maybe (fromMaybe) import Data.Typeable (Typeable) import System.FilePath -import qualified System.FilePath.Posix as Posix import Text.Pandoc.MIME (MimeType, getMimeTypeDef) +import Data.Text (Text) +import qualified Data.Text as T -- | A container for a collection of binary resources, with names and -- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty' -- can be used for an empty 'MediaBag', and '<>' can be used to append -- two 'MediaBag's. -newtype MediaBag = MediaBag (M.Map [FilePath] (MimeType, BL.ByteString)) +newtype MediaBag = MediaBag (M.Map Text (MimeType, BL.ByteString)) deriving (Semigroup, Monoid, Data, Typeable) instance Show MediaBag where show bag = "MediaBag " ++ show (mediaDirectory bag) +-- | We represent paths with /, in normalized form. +canonicalize :: FilePath -> Text +canonicalize = T.replace "\\" "/" . T.pack . normalise + -- | Delete a media item from a 'MediaBag', or do nothing if no item corresponds -- to the given path. deleteMedia :: FilePath -- ^ relative path and canonical name of resource -> MediaBag -> MediaBag deleteMedia fp (MediaBag mediamap) = - MediaBag $ M.delete (splitDirectories fp) mediamap + MediaBag $ M.delete (canonicalize fp) mediamap -- | Insert a media item into a 'MediaBag', replacing any existing -- value with the same name. @@ -56,7 +62,7 @@ insertMedia :: FilePath -- ^ relative path and canonical name of resource -> MediaBag -> MediaBag insertMedia fp mbMime contents (MediaBag mediamap) = - MediaBag (M.insert (splitDirectories fp) (mime, contents) mediamap) + MediaBag (M.insert (canonicalize fp) (mime, contents) mediamap) where mime = fromMaybe fallback mbMime fallback = case takeExtension fp of ".gz" -> getMimeTypeDef $ dropExtension fp @@ -66,16 +72,16 @@ insertMedia fp mbMime contents (MediaBag mediamap) = lookupMedia :: FilePath -> MediaBag -> Maybe (MimeType, BL.ByteString) -lookupMedia fp (MediaBag mediamap) = M.lookup (splitDirectories fp) mediamap +lookupMedia fp (MediaBag mediamap) = M.lookup (canonicalize fp) mediamap -- | Get a list of the file paths stored in a 'MediaBag', with -- their corresponding mime types and the lengths in bytes of the contents. mediaDirectory :: MediaBag -> [(FilePath, MimeType, Int)] mediaDirectory (MediaBag mediamap) = M.foldrWithKey (\fp (mime,contents) -> - ((Posix.joinPath fp, mime, fromIntegral $ BL.length contents):)) [] mediamap + ((T.unpack fp, mime, fromIntegral (BL.length contents)):)) [] mediamap mediaItems :: MediaBag -> [(FilePath, MimeType, BL.ByteString)] mediaItems (MediaBag mediamap) = M.foldrWithKey (\fp (mime,contents) -> - ((Posix.joinPath fp, mime, contents):)) [] mediamap + ((T.unpack fp, mime, contents):)) [] mediamap -- cgit v1.2.3 From 9b5798bd9abe267f783dab7fe5295c9b61e2fdce Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 17 May 2021 21:22:46 -0700 Subject: Use fetchItem instead of downloadOrRead in fetchMediaResource. --- src/Text/Pandoc/Class/PandocMonad.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index 226194503..b12850de5 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -635,7 +635,7 @@ withPaths (p:ps) action fp = fetchMediaResource :: PandocMonad m => T.Text -> m (FilePath, Maybe MimeType, BL.ByteString) fetchMediaResource src = do - (bs, mt) <- downloadOrRead src + (bs, mt) <- fetchItem src let ext = fromMaybe (T.pack $ takeExtension $ T.unpack src) (mt >>= extensionFromMimeType) let bs' = BL.fromChunks [bs] -- cgit v1.2.3 From 640dbf8b8f5e652661df42c631b4343570d7448e Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 19 May 2021 09:51:50 -0700 Subject: Remove unused pragma. --- src/Text/Pandoc/Class/PandocMonad.hs | 1 - 1 file changed, 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index b12850de5..dd6499a73 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -1,5 +1,4 @@ {-# LANGUAGE CPP #-} -{-# LANGUAGE LambdaCase #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} -- cgit v1.2.3 From 5736b331d8ecaa12cc3e2712211ada37c665a93a Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 19 May 2021 16:14:49 -0700 Subject: LaTeX reader: better support for `\xspace`. Previously we only supported it in inline contexts; now we support it in all contexts, including math. Partially addresses #7299. --- src/Text/Pandoc/Readers/LaTeX.hs | 12 ------------ src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 21 +++++++++++++++++++-- test/command/4442.md | 2 +- test/command/7299.md | 23 +++++++++++++++++++++++ 4 files changed, 43 insertions(+), 15 deletions(-) create mode 100644 test/command/7299.md (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index f90d562ae..2ace18d1b 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -232,16 +232,6 @@ mkImage options (T.unpack -> src) = do _ -> return src return $ imageWith attr (T.pack src') "" alt -doxspace :: PandocMonad m => LP m Inlines -doxspace = - (space <$ lookAhead (satisfyTok startsWithLetter)) <|> return mempty - where startsWithLetter (Tok _ Word t) = - case T.uncons t of - Just (c, _) | isLetter c -> True - _ -> False - startsWithLetter _ = False - - removeDoubleQuotes :: Text -> Text removeDoubleQuotes t = Data.Maybe.fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\"" @@ -417,8 +407,6 @@ inlineCommands = M.unions -- LaTeX colors , ("textcolor", coloredInline "color") , ("colorbox", coloredInline "background-color") - -- xspace - , ("xspace", doxspace) -- etoolbox , ("ifstrequal", ifstrequal) , ("newtoggle", braced >>= newToggle) diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index 35ce3509d..b6804a825 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -464,7 +464,7 @@ satisfyTok f = do doMacros :: PandocMonad m => LP m () doMacros = do st <- getState - unless (sVerbatimMode st || M.null (sMacros st)) $ do + unless (sVerbatimMode st) $ getInput >>= doMacros' 1 >>= setInput doMacros' :: PandocMonad m => Int -> [Tok] -> LP m [Tok] @@ -526,7 +526,7 @@ doMacros' n inp = $ throwError $ PandocMacroLoop name macros <- sMacros <$> getState case M.lookup name macros of - Nothing -> mzero + Nothing -> trySpecialMacro name ts Just (Macro expansionPoint argspecs optarg newtoks) -> do let getargs' = do args <- @@ -554,6 +554,23 @@ doMacros' n inp = ExpandWhenUsed -> doMacros' (n' + 1) result ExpandWhenDefined -> return result +-- | Certain macros do low-level tex manipulations that can't +-- be represented in our Macro type, so we handle them here. +trySpecialMacro :: PandocMonad m => Text -> [Tok] -> LP m [Tok] +trySpecialMacro "xspace" ts = do + ts' <- doMacros' 1 ts + case ts' of + Tok pos Word t : _ + | startsWithAlphaNum t -> return $ Tok pos Spaces " " : ts' + _ -> return ts' +trySpecialMacro _ _ = mzero + +startsWithAlphaNum :: Text -> Bool +startsWithAlphaNum t = + case T.uncons t of + Just (c, _) | isAlphaNum c -> True + _ -> False + setpos :: SourcePos -> Tok -> Tok setpos spos (Tok _ tt txt) = Tok spos tt txt diff --git a/test/command/4442.md b/test/command/4442.md index 8574fe759..447073406 100644 --- a/test/command/4442.md +++ b/test/command/4442.md @@ -5,5 +5,5 @@ ^D \newcommand{\myFruit}{Mango\xspace} -Mango\xspace is the king of fruits. +Mango is the king of fruits. ``` diff --git a/test/command/7299.md b/test/command/7299.md new file mode 100644 index 000000000..0847c40ce --- /dev/null +++ b/test/command/7299.md @@ -0,0 +1,23 @@ +``` +% pandoc -f latex -t plain +$1-{\ensuremath{r}\xspace}$ +^D +1 − r +``` + +``` +% pandoc -f latex -t plain +\newcommand{\foo}{Foo\xspace} + +$\text{\foo bar}$ +^D +Foo bar +``` + +``` +% pandoc -f latex -t plain +a\xspace b +^D +a b +``` + -- cgit v1.2.3 From b6239f41509c368b5befd316c290b5b6cc6f00e3 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Thu, 20 May 2021 10:48:28 +0200 Subject: ZimWiki writer: allow links and emphasis in headers The latest version of ZimWiki supports this. Closes: #6605 --- src/Text/Pandoc/Writers/ZimWiki.hs | 2 +- test/writer.zimwiki | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/ZimWiki.hs b/src/Text/Pandoc/Writers/ZimWiki.hs index fcf9e000d..df914f590 100644 --- a/src/Text/Pandoc/Writers/ZimWiki.hs +++ b/src/Text/Pandoc/Writers/ZimWiki.hs @@ -116,7 +116,7 @@ blockToZimWiki opts b@(RawBlock f str) blockToZimWiki _ HorizontalRule = return "\n----\n" blockToZimWiki opts (Header level _ inlines) = do - contents <- inlineListToZimWiki opts $ removeFormatting inlines -- emphasis, links etc. not allowed in headers + contents <- inlineListToZimWiki opts inlines let eqs = T.replicate ( 7 - level ) "=" return $ eqs <> " " <> contents <> " " <> eqs <> "\n" diff --git a/test/writer.zimwiki b/test/writer.zimwiki index 4b384fb20..f793e5760 100644 --- a/test/writer.zimwiki +++ b/test/writer.zimwiki @@ -8,9 +8,9 @@ This is a set of tests for pandoc. Most of them are adapted from John Gruber’s ====== Headers ====== -===== Level 2 with an embedded link ===== +===== Level 2 with an [[url|embedded link]] ===== -==== Level 3 with emphasis ==== +==== Level 3 with //emphasis// ==== === Level 4 === @@ -18,7 +18,7 @@ This is a set of tests for pandoc. Most of them are adapted from John Gruber’s ====== Level 1 ====== -===== Level 2 with emphasis ===== +===== Level 2 with //emphasis// ===== ==== Level 3 ==== -- cgit v1.2.3 From 8437a4a002210a33ee721c58f5f95605898a8e1b Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 20 May 2021 08:15:48 -0700 Subject: LaTeX reader: support `\pm` in `SI{..}`. Closes #6620. --- src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 4 +++- test/command/6620.md | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs index 1952f4e1a..c4fb06700 100644 --- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs +++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs @@ -45,7 +45,9 @@ doSI tok = do unit] doSInum :: PandocMonad m => LP m Inlines -doSInum = skipopts *> (tonum . untokenize <$> braced) +doSInum = skipopts *> (tonum . untokenize . map convertPM <$> braced) + where convertPM (Tok pos (CtrlSeq "pm") _) = Tok pos Word "\xb1\xa0" + convertPM t = t tonum :: Text -> Inlines tonum value = diff --git a/test/command/6620.md b/test/command/6620.md index 314200d30..00643e57c 100644 --- a/test/command/6620.md +++ b/test/command/6620.md @@ -7,9 +7,12 @@ \SI{0.135(21)}{\m} \SI{12.3(60)}{\m} + +\SI{10.0 \pm 3.3}{\ms} ^D <p>23 ± 2 m</p> <p>125 ± 12 m</p> <p>0.135 ± 0.021 m</p> <p>12.3 ± 6 m</p> +<p>10.0 ± 3.3 ms</p> ``` -- cgit v1.2.3 From a366bd6abccd18a49f0033de850ddd53830cc5ed Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 20 May 2021 09:03:29 -0700 Subject: LaTeX reader: Fix parsing of `+-` in siunitx numbers. See #6658. --- src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 11 +++++++---- test/command/6620.md | 5 ++++- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs index c4fb06700..5e140ef7a 100644 --- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs +++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs @@ -45,9 +45,7 @@ doSI tok = do unit] doSInum :: PandocMonad m => LP m Inlines -doSInum = skipopts *> (tonum . untokenize . map convertPM <$> braced) - where convertPM (Tok pos (CtrlSeq "pm") _) = Tok pos Word "\xb1\xa0" - convertPM t = t +doSInum = skipopts *> (tonum . untokenize <$> braced) tonum :: Text -> Inlines tonum value = @@ -74,12 +72,16 @@ parseNumPart = parseDecimalNum <|> parseComma <|> parsePlusMinus <|> + parsePM <|> parseI <|> parseExp <|> parseX <|> parseSpace where - parseDecimalNum = do + parseDecimalNum, parsePlusMinus, parsePM, + parseComma, parseI, parseX, + parseExp, parseSpace :: Parser Text () Inlines + parseDecimalNum = try $ do pref <- option mempty $ (mempty <$ char '+') <|> ("\x2212" <$ char '-') basenum <- (pref <>) . T.pack <$> many1 (satisfy (\c -> isDigit c || c == '.')) @@ -100,6 +102,7 @@ parseNumPart = | otherwise -> "." <> t parseComma = str "." <$ char ',' parsePlusMinus = str "\xa0\xb1\xa0" <$ try (string "+-") + parsePM = str "\xa0\xb1\xa0" <$ try (string "\\pm") parseParens = char '(' *> many1 (satisfy (\c -> isDigit c || c == '.')) <* char ')' parseI = str "i" <$ char 'i' diff --git a/test/command/6620.md b/test/command/6620.md index 00643e57c..b19cd1972 100644 --- a/test/command/6620.md +++ b/test/command/6620.md @@ -9,10 +9,13 @@ \SI{12.3(60)}{\m} \SI{10.0 \pm 3.3}{\ms} + +\SI{10.0 +- 3.3}{\ms} ^D <p>23 ± 2 m</p> <p>125 ± 12 m</p> <p>0.135 ± 0.021 m</p> <p>12.3 ± 6 m</p> -<p>10.0 ± 3.3 ms</p> +<p>10.0 ± 3.3 ms</p> +<p>10.0 ± 3.3 ms</p> ``` -- cgit v1.2.3 From 183ce584779b344ad6a6a3e085ddfdb00faf62aa Mon Sep 17 00:00:00 2001 From: Denis Maier <denis.maier@ub.unibe.ch> Date: Thu, 20 May 2021 18:59:53 +0200 Subject: ConTeXt reader: improve ordered lists (#7304) Closes #5016 - change ordered list from itemize to enumerate - adds new itemgroup for ordered lists - add fontfeature for table figures - remove width from itemize in context writer --- data/templates/default.context | 4 ++ src/Text/Pandoc/Writers/ConTeXt.hs | 14 ++----- test/writer.context | 80 ++++++++++++++++++++------------------ test/writers-lang-and-dir.context | 4 ++ 4 files changed, 53 insertions(+), 49 deletions(-) (limited to 'src') diff --git a/data/templates/default.context b/data/templates/default.context index 64de448ca..df39130b2 100644 --- a/data/templates/default.context +++ b/data/templates/default.context @@ -51,6 +51,7 @@ $endif$ % use microtypography \definefontfeature[default][default][script=latn, protrusion=quality, expansion=quality, itlc=yes, textitalics=yes, onum=yes, pnum=yes] +\definefontfeature[default:tnum][default][tnum=yes, pnum=no] \definefontfeature[smallcaps][script=latn, protrusion=quality, expansion=quality, smcp=yes, onum=yes, pnum=yes] \setupalign[hz,hanging] \setupitaliccorrection[global, always] @@ -97,6 +98,9 @@ $endif$ \setupitemize[autointro] % prevent orphan list intro \setupitemize[indentnext=no] +\defineitemgroup[enumerate] +\setupenumerate[each][fit][itemalign=left,distance=.5em,style={\feature[+][default:tnum]}] + \setupfloat[figure][default={here,nonumber}] \setupfloat[table][default={here,nonumber}] diff --git a/src/Text/Pandoc/Writers/ConTeXt.hs b/src/Text/Pandoc/Writers/ConTeXt.hs index 57d752a67..3cafcefba 100644 --- a/src/Text/Pandoc/Writers/ConTeXt.hs +++ b/src/Text/Pandoc/Writers/ConTeXt.hs @@ -16,7 +16,6 @@ module Text.Pandoc.Writers.ConTeXt ( writeConTeXt ) where import Control.Monad.State.Strict import Data.Char (ord, isDigit) import Data.List (intersperse) -import Data.List.NonEmpty (nonEmpty) import Data.Maybe (mapMaybe) import Data.Text (Text) import qualified Data.Text as T @@ -233,14 +232,7 @@ blockToConTeXt (OrderedList (start, style', delim) lst) = do Period -> "stopper=." OneParen -> "stopper=)" TwoParens -> "left=(,stopper=)" - let width = maybe 0 maximum $ nonEmpty $ map T.length $ - take (length contents) - (orderedListMarkers (start, style', delim)) - let width' = (toEnum width + 1) / 2 - let width'' = if width' > (1.5 :: Double) - then "width=" <> tshow width' <> "em" - else "" - let specs2Items = filter (not . T.null) [start', delim', width''] + let specs2Items = filter (not . T.null) [start', delim'] let specs2 = if null specs2Items then "" else "[" <> T.intercalate "," specs2Items <> "]" @@ -254,8 +246,8 @@ blockToConTeXt (OrderedList (start, style', delim) lst) = do UpperAlpha -> 'A') : if isTightList lst then ",packed]" else "]" let specs = T.pack style'' <> specs2 - return $ "\\startitemize" <> literal specs $$ vcat contents $$ - "\\stopitemize" <> blankline + return $ "\\startenumerate" <> literal specs $$ vcat contents $$ + "\\stopenumerate" <> blankline blockToConTeXt (DefinitionList lst) = liftM vcat $ mapM defListItemToConTeXt lst blockToConTeXt HorizontalRule = return $ "\\thinrule" <> blankline diff --git a/test/writer.context b/test/writer.context index 77570c249..78667813b 100644 --- a/test/writer.context +++ b/test/writer.context @@ -17,6 +17,7 @@ % use microtypography \definefontfeature[default][default][script=latn, protrusion=quality, expansion=quality, itlc=yes, textitalics=yes, onum=yes, pnum=yes] +\definefontfeature[default:tnum][default][tnum=yes, pnum=no] \definefontfeature[smallcaps][script=latn, protrusion=quality, expansion=quality, smcp=yes, onum=yes, pnum=yes] \setupalign[hz,hanging] \setupitaliccorrection[global, always] @@ -48,6 +49,9 @@ \setupitemize[autointro] % prevent orphan list intro \setupitemize[indentnext=no] +\defineitemgroup[enumerate] +\setupenumerate[each][fit][itemalign=left,distance=.5em,style={\feature[+][default:tnum]}] + \setupfloat[figure][default={here,nonumber}] \setupfloat[table][default={here,nonumber}] @@ -133,12 +137,12 @@ sub status { A list: -\startitemize[n,packed][stopper=.] +\startenumerate[n,packed][stopper=.] \item item one \item item two -\stopitemize +\stopenumerate Nested block quotes: @@ -255,51 +259,51 @@ Minuses loose: Tight: -\startitemize[n,packed][stopper=.] +\startenumerate[n,packed][stopper=.] \item First \item Second \item Third -\stopitemize +\stopenumerate and: -\startitemize[n,packed][stopper=.] +\startenumerate[n,packed][stopper=.] \item One \item Two \item Three -\stopitemize +\stopenumerate Loose using tabs: -\startitemize[n][stopper=.] +\startenumerate[n][stopper=.] \item First \item Second \item Third -\stopitemize +\stopenumerate and using spaces: -\startitemize[n][stopper=.] +\startenumerate[n][stopper=.] \item One \item Two \item Three -\stopitemize +\stopenumerate Multiple paragraphs: -\startitemize[n][stopper=.] +\startenumerate[n][stopper=.] \item Item 1, graf one. @@ -308,7 +312,7 @@ Multiple paragraphs: Item 2. \item Item 3. -\stopitemize +\stopenumerate \subsection[title={Nested},reference={nested}] @@ -327,7 +331,7 @@ Multiple paragraphs: Here's another: -\startitemize[n,packed][stopper=.] +\startenumerate[n,packed][stopper=.] \item First \item @@ -342,11 +346,11 @@ Here's another: \stopitemize \item Third -\stopitemize +\stopenumerate Same thing but with paragraphs: -\startitemize[n][stopper=.] +\startenumerate[n][stopper=.] \item First \item @@ -362,7 +366,7 @@ Same thing but with paragraphs: \stopitemize \item Third -\stopitemize +\stopenumerate \subsection[title={Tabs and spaces},reference={tabs-and-spaces}] @@ -382,7 +386,7 @@ Same thing but with paragraphs: \subsection[title={Fancy list markers},reference={fancy-list-markers}] -\startitemize[n][start=2,left=(,stopper=),width=2.0em] +\startenumerate[n][start=2,left=(,stopper=)] \item begins with 2 \item @@ -390,51 +394,51 @@ Same thing but with paragraphs: with a continuation - \startitemize[r,packed][start=4,stopper=.,width=2.0em] + \startenumerate[r,packed][start=4,stopper=.] \item sublist with roman numerals, starting with 4 \item more items - \startitemize[A,packed][left=(,stopper=),width=2.0em] + \startenumerate[A,packed][left=(,stopper=)] \item a subsublist \item a subsublist - \stopitemize - \stopitemize -\stopitemize + \stopenumerate + \stopenumerate +\stopenumerate Nesting: -\startitemize[A,packed][stopper=.] +\startenumerate[A,packed][stopper=.] \item Upper Alpha - \startitemize[R,packed][stopper=.] + \startenumerate[R,packed][stopper=.] \item Upper Roman. - \startitemize[n,packed][start=6,left=(,stopper=),width=2.0em] + \startenumerate[n,packed][start=6,left=(,stopper=)] \item Decimal start with 6 - \startitemize[a,packed][start=3,stopper=)] + \startenumerate[a,packed][start=3,stopper=)] \item Lower alpha with paren - \stopitemize - \stopitemize - \stopitemize -\stopitemize + \stopenumerate + \stopenumerate + \stopenumerate +\stopenumerate Autonumbering: -\startitemize[n,packed] +\startenumerate[n,packed] \item Autonumber. \item More. - \startitemize[a,packed] + \startenumerate[a,packed] \item Nested. - \stopitemize -\stopitemize + \stopenumerate +\stopenumerate Should not be a list item: @@ -547,12 +551,12 @@ Blank line after term, indented marker, alternate markers: \startdescription{orange} orange fruit - \startitemize[n,packed][stopper=.] + \startenumerate[n,packed][stopper=.] \item sublist \item sublist - \stopitemize + \stopenumerate \stopdescription \section[title={HTML Blocks},reference={html-blocks}] @@ -897,10 +901,10 @@ note{]} Here is an inline note.\footnote{This is {\em easier} to type. Inline Notes can go in quotes.\footnote{In quote.} \stopblockquote -\startitemize[n,packed][stopper=.] +\startenumerate[n,packed][stopper=.] \item And in list items.\footnote{In list.} -\stopitemize +\stopenumerate This paragraph should not be part of the note, as it is not indented. diff --git a/test/writers-lang-and-dir.context b/test/writers-lang-and-dir.context index c4bff1a01..c01ecab58 100644 --- a/test/writers-lang-and-dir.context +++ b/test/writers-lang-and-dir.context @@ -15,6 +15,7 @@ % use microtypography \definefontfeature[default][default][script=latn, protrusion=quality, expansion=quality, itlc=yes, textitalics=yes, onum=yes, pnum=yes] +\definefontfeature[default:tnum][default][tnum=yes, pnum=no] \definefontfeature[smallcaps][script=latn, protrusion=quality, expansion=quality, smcp=yes, onum=yes, pnum=yes] \setupalign[hz,hanging] \setupitaliccorrection[global, always] @@ -46,6 +47,9 @@ \setupitemize[autointro] % prevent orphan list intro \setupitemize[indentnext=no] +\defineitemgroup[enumerate] +\setupenumerate[each][fit][itemalign=left,distance=.5em,style={\feature[+][default:tnum]}] + \setupfloat[figure][default={here,nonumber}] \setupfloat[table][default={here,nonumber}] -- cgit v1.2.3 From 5dc917da3ed997c6e48e22bde242f0f8e1ae5333 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 20 May 2021 09:11:26 -0700 Subject: LaTeX reader siunitx: add leading 0 to numbers starting with . --- src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 7 +++++-- test/command/6620.md | 3 --- test/command/6658.md | 9 +++++++++ 3 files changed, 14 insertions(+), 5 deletions(-) create mode 100644 test/command/6658.md (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs index 5e140ef7a..1474329d4 100644 --- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs +++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs @@ -83,8 +83,11 @@ parseNumPart = parseExp, parseSpace :: Parser Text () Inlines parseDecimalNum = try $ do pref <- option mempty $ (mempty <$ char '+') <|> ("\x2212" <$ char '-') - basenum <- (pref <>) . T.pack - <$> many1 (satisfy (\c -> isDigit c || c == '.')) + basenum' <- many1 (satisfy (\c -> isDigit c || c == '.')) + let basenum = pref <> T.pack + (case basenum' of + '.':_ -> '0':basenum' + _ -> basenum') uncertainty <- option mempty $ T.pack <$> parseParens if T.null uncertainty then return $ str basenum diff --git a/test/command/6620.md b/test/command/6620.md index b19cd1972..e448ca6b5 100644 --- a/test/command/6620.md +++ b/test/command/6620.md @@ -9,13 +9,10 @@ \SI{12.3(60)}{\m} \SI{10.0 \pm 3.3}{\ms} - -\SI{10.0 +- 3.3}{\ms} ^D <p>23 ± 2 m</p> <p>125 ± 12 m</p> <p>0.135 ± 0.021 m</p> <p>12.3 ± 6 m</p> <p>10.0 ± 3.3 ms</p> -<p>10.0 ± 3.3 ms</p> ``` diff --git a/test/command/6658.md b/test/command/6658.md new file mode 100644 index 000000000..bcd174465 --- /dev/null +++ b/test/command/6658.md @@ -0,0 +1,9 @@ +``` +pandoc -f latex +\SI{10.0 +- 3.3}{\ms} + +\num{.3e45} +^D +<p>10.0 ± 3.3 ms</p> +<p>0.3 × 10<sup>45</sup></p> +``` -- cgit v1.2.3 From bc5058234feab7646f58dc01379b4eadf95bf411 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 20 May 2021 09:18:23 -0700 Subject: LaTeX reader sinuitx: fix + sign on ang. --- src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 9 ++++++--- test/command/6658.md | 3 +++ 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs index 1474329d4..72f81dcde 100644 --- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs +++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs @@ -118,11 +118,14 @@ doSIang :: PandocMonad m => LP m Inlines doSIang = do skipopts ps <- T.splitOn ";" . untokenize <$> braced + let dropPlus t = case T.uncons t of + Just ('+',t') -> t' + _ -> t case ps ++ repeat "" of (d:m:s:_) -> return $ - (if T.null d then mempty else str d <> str "\xb0") <> - (if T.null m then mempty else str m <> str "\x2032") <> - (if T.null s then mempty else str s <> str "\x2033") + (if T.null d then mempty else str (dropPlus d) <> str "\xb0") <> + (if T.null m then mempty else str (dropPlus m) <> str "\x2032") <> + (if T.null s then mempty else str (dropPlus s) <> str "\x2033") _ -> return mempty -- converts e.g. \SIrange{100}{200}{\ms} to "100 ms--200 ms" diff --git a/test/command/6658.md b/test/command/6658.md index bcd174465..0a8512f85 100644 --- a/test/command/6658.md +++ b/test/command/6658.md @@ -3,7 +3,10 @@ pandoc -f latex \SI{10.0 +- 3.3}{\ms} \num{.3e45} + +\ang{+10;+3;} ^D <p>10.0 ± 3.3 ms</p> <p>0.3 × 10<sup>45</sup></p> +<p>10°3′</p> ``` -- cgit v1.2.3 From 4e990a8cf9207f2315d6a55a45c93c2857663316 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 20 May 2021 10:12:44 -0700 Subject: LaTeX/siunitx: fix parsing of `\cubic` etc. See #6658. --- src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 85 +++++++++++++++++++------------- test/command/6658.md | 3 ++ 2 files changed, 53 insertions(+), 35 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs index 72f81dcde..63ab7267d 100644 --- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs +++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE OverloadedStrings #-} module Text.Pandoc.Readers.LaTeX.SIunitx ( siunitxCommands ) @@ -154,40 +155,55 @@ doSIrange includeUnits tok = do emptyOr160 :: Inlines -> Inlines emptyOr160 x = if x == mempty then x else str "\160" -siUnit :: PandocMonad m => LP m Inlines -> LP m Inlines -siUnit tok = try (do - Tok _ (CtrlSeq name) _ <- anyControlSeq - case name of - "square" -> do - unit <- siUnit tok - return $ unit <> superscript "2" - "cubic" -> do - unit <- siUnit tok - return $ unit <> superscript "3" - "raisetothe" -> do - n <- tok - unit <- siUnit tok - return $ unit <> superscript n - _ -> - case M.lookup name siUnitMap of - Just il -> - option il $ - choice - [ (il <> superscript "2") <$ controlSeq "squared" - , (il <> superscript "3") <$ controlSeq "cubed" - , (\n -> il <> superscript n) <$> (controlSeq "tothe" *> tok) - ] - Nothing -> fail "not an siunit unit command") - <|> (lookAhead anyControlSeq >> tok) - <|> (do Tok _ Word t <- satisfyTok isWordTok - return $ str t) - <|> (symbol '^' *> (superscript <$> tok)) - <|> (symbol '_' *> (subscript <$> tok)) - <|> ("\xa0" <$ symbol '.') - <|> ("\xa0" <$ symbol '~') - <|> tok - <|> (do Tok _ _ t <- anyTok - return (str t)) +siUnit :: forall m. PandocMonad m => LP m Inlines -> LP m Inlines +siUnit tok = mconcat <$> many1 siUnitPart + where + siUnitPart :: LP m Inlines + siUnitPart = + (siPrefix <*> siUnitPart) + <|> (do u <- siBase <|> tok + option u $ siSuffix <*> pure u) + siPrefix :: LP m (Inlines -> Inlines) + siPrefix = + (do _ <- controlSeq "per" + skipopts -- TODO handle option + return (str "/" <>)) + <|> (do _ <- controlSeq "square" + skipopts + return (<> superscript "2")) + <|> (do _ <- controlSeq "cubic" + skipopts + return (<> superscript "3")) + <|> (do _ <- controlSeq "raisetothe" + skipopts + n <- tok + return (<> superscript n)) + siSuffix :: LP m (Inlines -> Inlines) + siSuffix = + (do _ <- controlSeq "squared" + skipopts + return (<> superscript "2")) + <|> (do _ <- controlSeq "cubed" + skipopts + return (<> superscript "3")) + <|> (do _ <- controlSeq "tothe" + skipopts + n <- tok + return (<> superscript n)) + siBase :: LP m Inlines + siBase = mconcat <$> many1 + ((try + (do Tok _ (CtrlSeq name) _ <- anyControlSeq + case M.lookup name siUnitMap of + Just il -> pure il + Nothing -> fail "not a unit command")) + <|> (do Tok _ Word t <- satisfyTok isWordTok + return $ str t) + <|> (symbol '^' *> (superscript <$> tok)) + <|> (symbol '_' *> (subscript <$> tok)) + <|> (str "\xa0" <$ symbol '.') + <|> (str "\xa0" <$ symbol '~') + ) siUnitMap :: M.Map Text Inlines siUnitMap = M.fromList @@ -347,7 +363,6 @@ siUnitMap = M.fromList , ("Pa", str "Pa") , ("pascal", str "Pa") , ("percent", str "%") - , ("per", str "/") , ("peta", str "P") , ("pico", str "p") , ("planckbar", emph (str "\x210f")) diff --git a/test/command/6658.md b/test/command/6658.md index 0a8512f85..549610992 100644 --- a/test/command/6658.md +++ b/test/command/6658.md @@ -5,8 +5,11 @@ pandoc -f latex \num{.3e45} \ang{+10;+3;} + +\si{\gram\per\cubic\centi\metre} ^D <p>10.0 ± 3.3 ms</p> <p>0.3 × 10<sup>45</sup></p> <p>10°3′</p> +<p>g/cm<sup>3</sup></p> ``` -- cgit v1.2.3 From bb11f5fb86993559f9999d4795355b02ae78cc3d Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 20 May 2021 12:06:15 -0700 Subject: LaTeX reader: More siunitx improvements. Closes #6658. There's still one slight divergence from the siunitx behavior: we get 'kg m/A/s' instead of 'kg m/(A s)'. At the moment I'm not going to worry about that. --- src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 3 +- src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 138 +++++++++++++++++++++---------- test/command/6658.md | 72 ++++++++++++++-- 3 files changed, 161 insertions(+), 52 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index b6804a825..1c77eb299 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -806,7 +806,8 @@ withRaw parser = do keyval :: PandocMonad m => LP m (Text, Text) keyval = try $ do - Tok _ Word key <- satisfyTok isWordTok + key <- untokenize <$> many1 (notFollowedBy (symbol '=') >> + (symbol '-' <|> symbol '_' <|> satisfyTok isWordTok)) sp val <- option mempty $ do symbol '=' diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs index 63ab7267d..b8bf0ce7f 100644 --- a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs +++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs @@ -10,27 +10,32 @@ import Text.Pandoc.Class import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline, optional, space, spaces, withRaw, (<|>)) import Control.Applicative ((<|>)) +import Control.Monad (void) import qualified Data.Map as M import Data.Char (isDigit) import Data.Text (Text) import qualified Data.Text as T import Data.List (intersperse) - +import qualified Data.Sequence as Seq +import Text.Pandoc.Walk (walk) siunitxCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines) siunitxCommands tok = M.fromList - [ ("si", skipopts *> dosi tok) + [ ("si", dosi tok) , ("SI", doSI tok) , ("SIrange", doSIrange True tok) , ("numrange", doSIrange False tok) , ("numlist", doSInumlist) + , ("SIlist", doSIlist tok) , ("num", doSInum) , ("ang", doSIang) ] dosi :: PandocMonad m => LP m Inlines -> LP m Inlines -dosi tok = grouped (siUnit tok) <|> siUnit tok +dosi tok = do + options <- option [] keyvals + grouped (siUnit options tok) <|> siUnit options tok -- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €" doSI :: PandocMonad m => LP m Inlines -> LP m Inlines @@ -65,9 +70,29 @@ doSInumlist = do mconcat (intersperse (str "," <> space) (init xs)) <> text ", & " <> last xs +doSIlist :: PandocMonad m => LP m Inlines -> LP m Inlines +doSIlist tok = do + options <- option [] keyvals + nums <- map tonum . T.splitOn ";" . untokenize <$> braced + unit <- grouped (siUnit options tok) <|> siUnit options tok + let xs = map (<> (str "\xa0" <> unit)) nums + case xs of + [] -> return mempty + [x] -> return x + _ -> return $ + mconcat (intersperse (str "," <> space) (init xs)) <> + text ", & " <> last xs + parseNum :: Parser Text () Inlines parseNum = (mconcat <$> many parseNumPart) <* eof +minus :: Text +minus = "\x2212" + +hyphenToMinus :: Inline -> Inline +hyphenToMinus (Str t) = Str (T.replace "-" minus t) +hyphenToMinus x = x + parseNumPart :: Parser Text () Inlines parseNumPart = parseDecimalNum <|> @@ -83,7 +108,7 @@ parseNumPart = parseComma, parseI, parseX, parseExp, parseSpace :: Parser Text () Inlines parseDecimalNum = try $ do - pref <- option mempty $ (mempty <$ char '+') <|> ("\x2212" <$ char '-') + pref <- option mempty $ (mempty <$ char '+') <|> (minus <$ char '-') basenum' <- many1 (satisfy (\c -> isDigit c || c == '.')) let basenum = pref <> T.pack (case basenum' of @@ -155,20 +180,30 @@ doSIrange includeUnits tok = do emptyOr160 :: Inlines -> Inlines emptyOr160 x = if x == mempty then x else str "\160" -siUnit :: forall m. PandocMonad m => LP m Inlines -> LP m Inlines -siUnit tok = mconcat <$> many1 siUnitPart +siUnit :: forall m. PandocMonad m => [(Text,Text)] -> LP m Inlines -> LP m Inlines +siUnit options tok = mconcat . intersperse (str "\xa0") <$> many1 siUnitPart where siUnitPart :: LP m Inlines - siUnitPart = - (siPrefix <*> siUnitPart) - <|> (do u <- siBase <|> tok - option u $ siSuffix <*> pure u) + siUnitPart = try $ do + skipMany (void (symbol '.') <|> void (symbol '~') <|> spaces1) + x <- ((siPrefix <*> siBase) + <|> (do u <- siBase <|> tok + option u $ siSuffix <*> pure u)) + option x (siInfix x) + siInfix :: Inlines -> LP m Inlines + siInfix u1 = try $ + (do _ <- controlSeq "per" + u2 <- siUnitPart + let useSlash = lookup "per-mode" options == Just "symbol" + if useSlash + then return (u1 <> str "/" <> u2) + else return (u1 <> str "\xa0" <> negateExponent u2)) + <|> (do _ <- symbol '/' + u2 <- siUnitPart + return (u1 <> str "/" <> u2)) siPrefix :: LP m (Inlines -> Inlines) siPrefix = - (do _ <- controlSeq "per" - skipopts -- TODO handle option - return (str "/" <>)) - <|> (do _ <- controlSeq "square" + (do _ <- controlSeq "square" skipopts return (<> superscript "2")) <|> (do _ <- controlSeq "cubic" @@ -176,7 +211,7 @@ siUnit tok = mconcat <$> many1 siUnitPart return (<> superscript "3")) <|> (do _ <- controlSeq "raisetothe" skipopts - n <- tok + n <- walk hyphenToMinus <$> tok return (<> superscript n)) siSuffix :: LP m (Inlines -> Inlines) siSuffix = @@ -188,23 +223,57 @@ siUnit tok = mconcat <$> many1 siUnitPart return (<> superscript "3")) <|> (do _ <- controlSeq "tothe" skipopts - n <- tok + n <- walk hyphenToMinus <$> tok return (<> superscript n)) + <|> (symbol '^' *> (do n <- walk hyphenToMinus <$> tok + return (<> superscript n))) + <|> (symbol '_' *> (do n <- walk hyphenToMinus <$> tok + return (<> subscript n))) + negateExponent :: Inlines -> Inlines + negateExponent ils = + case Seq.viewr (unMany ils) of + xs Seq.:> Superscript ss -> (Many xs) <> + superscript (str minus <> fromList ss) + _ -> ils <> superscript (str (minus <> "1")) siBase :: LP m Inlines - siBase = mconcat <$> many1 + siBase = ((try (do Tok _ (CtrlSeq name) _ <- anyControlSeq - case M.lookup name siUnitMap of - Just il -> pure il - Nothing -> fail "not a unit command")) + case M.lookup name siUnitModifierMap of + Just il -> (il <>) <$> siBase + Nothing -> + case M.lookup name siUnitMap of + Just il -> pure il + Nothing -> fail "not a unit command")) <|> (do Tok _ Word t <- satisfyTok isWordTok return $ str t) - <|> (symbol '^' *> (superscript <$> tok)) - <|> (symbol '_' *> (subscript <$> tok)) - <|> (str "\xa0" <$ symbol '.') - <|> (str "\xa0" <$ symbol '~') ) +siUnitModifierMap :: M.Map Text Inlines +siUnitModifierMap = M.fromList + [ ("atto", str "a") + , ("centi", str "c") + , ("deca", str "d") + , ("deci", str "d") + , ("deka", str "d") + , ("exa", str "E") + , ("femto", str "f") + , ("giga", str "G") + , ("hecto", str "h") + , ("kilo", str "k") + , ("mega", str "M") + , ("micro", str "μ") + , ("milli", str "m") + , ("nano", str "n") + , ("peta", str "P") + , ("pico", str "p") + , ("tera", str "T") + , ("yocto", str "y") + , ("yotta", str "Y") + , ("zepto", str "z") + , ("zetta", str "Z") + ] + siUnitMap :: M.Map Text Inlines siUnitMap = M.fromList [ ("fg", str "fg") @@ -303,7 +372,6 @@ siUnitMap = M.fromList , ("arcsecond", str "″") , ("astronomicalunit", str "ua") , ("atomicmassunit", str "u") - , ("atto", str "a") , ("bar", str "bar") , ("barn", str "b") , ("becquerel", str "Bq") @@ -311,51 +379,38 @@ siUnitMap = M.fromList , ("bohr", emph (str "a") <> subscript (str "0")) , ("candela", str "cd") , ("celsius", str "°C") - , ("centi", str "c") , ("clight", emph (str "c") <> subscript (str "0")) , ("coulomb", str "C") , ("dalton", str "Da") , ("day", str "d") - , ("deca", str "d") - , ("deci", str "d") , ("decibel", str "db") , ("degreeCelsius",str "°C") , ("degree", str "°") - , ("deka", str "d") , ("electronmass", emph (str "m") <> subscript (str "e")) , ("electronvolt", str "eV") , ("elementarycharge", emph (str "e")) - , ("exa", str "E") , ("farad", str "F") - , ("femto", str "f") - , ("giga", str "G") , ("gram", str "g") , ("gray", str "Gy") , ("hartree", emph (str "E") <> subscript (str "h")) , ("hectare", str "ha") - , ("hecto", str "h") , ("henry", str "H") , ("hertz", str "Hz") , ("hour", str "h") , ("joule", str "J") , ("katal", str "kat") , ("kelvin", str "K") - , ("kilo", str "k") , ("kilogram", str "kg") , ("knot", str "kn") , ("liter", str "L") , ("litre", str "l") , ("lumen", str "lm") , ("lux", str "lx") - , ("mega", str "M") , ("meter", str "m") , ("metre", str "m") - , ("micro", str "μ") - , ("milli", str "m") , ("minute", str "min") , ("mmHg", str "mmHg") , ("mole", str "mol") - , ("nano", str "n") , ("nauticalmile", str "M") , ("neper", str "Np") , ("newton", str "N") @@ -363,24 +418,17 @@ siUnitMap = M.fromList , ("Pa", str "Pa") , ("pascal", str "Pa") , ("percent", str "%") - , ("peta", str "P") - , ("pico", str "p") , ("planckbar", emph (str "\x210f")) , ("radian", str "rad") , ("second", str "s") , ("siemens", str "S") , ("sievert", str "Sv") , ("steradian", str "sr") - , ("tera", str "T") , ("tesla", str "T") , ("tonne", str "t") , ("volt", str "V") , ("watt", str "W") , ("weber", str "Wb") - , ("yocto", str "y") - , ("yotta", str "Y") - , ("zepto", str "z") - , ("zetta", str "Z") ] diff --git a/test/command/6658.md b/test/command/6658.md index 549610992..96700c8fe 100644 --- a/test/command/6658.md +++ b/test/command/6658.md @@ -1,15 +1,75 @@ ``` -pandoc -f latex -\SI{10.0 +- 3.3}{\ms} +pandoc -f latex -t html +\num{12345,67890} + +\num{1+-2i} \num{.3e45} -\ang{+10;+3;} +\num{1.654 x 2.34 x 3.430} + +\si{kg.m.s^{-1}} + +\si{\kilogram\metre\per\second} + +\si[per-mode=symbol]{\kilogram\metre\per\second} + +\si[per-mode=symbol]{\kilogram\metre\per\ampere\per\second} + +\numlist{10;20;30} + +\SIlist{0.13;0.67;0.80}{\milli\metre} + +\numrange{10}{20} + +\SIrange{0.13}{0.67}{\milli\metre} + +\ang{10} + +\ang{1;2;3} + +\ang{;;1} + +\ang{+10;;} + +\ang{-0;1;} + +\si{kg.m/s^2} + +\si{g_{polymer}~mol_{cat}.s^{-1}} + +\si{\kilo\gram\metre\per\square\second} \si{\gram\per\cubic\centi\metre} + +\si{\square\volt\cubic\lumen\per\farad} + +\si{\metre\squared\per\gray\cubic\lux} + +\si{\henry\second} ^D -<p>10.0 ± 3.3 ms</p> +<p>12345.67890</p> +<p>1 ± 2i</p> <p>0.3 × 10<sup>45</sup></p> -<p>10°3′</p> -<p>g/cm<sup>3</sup></p> +<p>1.654 × 2.34 × 3.430</p> +<p>kg m s<sup>−1</sup></p> +<p>kg m s<sup>−1</sup></p> +<p>kg m/s</p> +<p>kg m/A/s</p> +<p>10, 20, & 30</p> +<p>0.13 mm, 0.67 mm, & 0.80 mm</p> +<p>10–20</p> +<p>0.13 mm–0.67 mm</p> +<p>10°</p> +<p>1°2′3″</p> +<p>1″</p> +<p>10°</p> +<p>-0°1′</p> +<p>kg m/s<sup>2</sup></p> +<p>g<sub>polymer</sub> mol<sub>cat</sub> s<sup>−1</sup></p> +<p>kg m s<sup>−2</sup></p> +<p>g cm<sup>−3</sup></p> +<p>V<sup>2</sup> lm<sup>3</sup> F<sup>−1</sup></p> +<p>m<sup>2</sup> Gy<sup>−1</sup> lx<sup>3</sup></p> +<p>H s</p> ``` -- cgit v1.2.3 From d7b5def287aefe91f881daeecc5f72121c843b66 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 20 May 2021 17:12:00 -0700 Subject: Ms writer: handle tables with multiple paragraphs. Previously they overflowed the table cell width. We now set line lengths per-cell and restore them after the table has been written. Closes #7288. --- src/Text/Pandoc/Writers/Ms.hs | 28 ++++++++++++++++++++++------ test/command/7288.md | 40 ++++++++++++++++++++++++++++++++++++++++ test/tables.ms | 30 ++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 6 deletions(-) create mode 100644 test/command/7288.md (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs index 0ed7a8a64..97c23f24d 100644 --- a/src/Text/Pandoc/Writers/Ms.hs +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -245,13 +245,17 @@ blockToMs opts (Table _ blkCapt specs thead tbody tfoot) = aligncode AlignDefault = "l" in do caption' <- inlineListToMs' opts caption - let iwidths = if all (== 0) widths - then repeat "" - else map (T.pack . printf "w(%0.1fn)" . (70 *)) widths + let isSimple = all (== 0) widths + let totalWidth = 70 -- 78n default width - 8n indent = 70n let coldescriptions = literal $ T.unwords - (zipWith (\align width -> aligncode align <> width) - alignments iwidths) <> "." + (zipWith (\align width -> aligncode align <> + if width == 0 + then "" + else T.pack $ + printf "w(%0.1fn)" + (totalWidth * width)) + alignments widths) <> "." colheadings <- mapM (blockListToMs opts) headers let makeRow cols = literal "T{" $$ vcat (intersperse (literal "T}\tT{") cols) $$ @@ -260,13 +264,25 @@ blockToMs opts (Table _ blkCapt specs thead tbody tfoot) = then empty else makeRow colheadings $$ char '_' body <- mapM (\row -> do - cols <- mapM (blockListToMs opts) row + cols <- mapM (\(cell, w) -> + (if isSimple + then id + else (literal (".nr LL " <> + T.pack (printf "%0.1fn" + (w * totalWidth))) $$)) <$> + blockListToMs opts cell) (zip row widths) return $ makeRow cols) rows setFirstPara return $ literal ".PP" $$ caption' $$ literal ".na" $$ -- we don't want justification in table cells + (if isSimple + then "" + else ".nr LLold \\n[LL]") $$ literal ".TS" $$ literal "delim(@@) tab(\t);" $$ coldescriptions $$ colheadings' $$ vcat body $$ literal ".TE" $$ + (if isSimple + then "" + else ".nr LL \\n[LLold]") $$ literal ".ad" blockToMs opts (BulletList items) = do diff --git a/test/command/7288.md b/test/command/7288.md new file mode 100644 index 000000000..e94aeeeb3 --- /dev/null +++ b/test/command/7288.md @@ -0,0 +1,40 @@ +``` +% pandoc -f rst -t ms +.. list-table:: + :widths: 50 50 + :header-rows: 1 + + * - Left + - Right + * - Long text that should be easy to break up into multiple lines + - Another long text that should be easy to break up into multiple lines + + Bar +^D +.PP +.na +.nr LLold \n[LL] +.TS +delim(@@) tab( ); +lw(35.0n) lw(35.0n). +T{ +Left +T} T{ +Right +T} +_ +T{ +.nr LL 35.0n +.LP +Long text that should be easy to break up into multiple lines +T} T{ +.nr LL 35.0n +.PP +Another long text that should be easy to break up into multiple lines +.PP +Bar +T} +.TE +.nr LL \n[LLold] +.ad +``` diff --git a/test/tables.ms b/test/tables.ms index 7337db25f..21183a5eb 100644 --- a/test/tables.ms +++ b/test/tables.ms @@ -143,6 +143,7 @@ Multiline table with caption: .PP Here\[cq]s the caption. It may span multiple lines. .na +.nr LLold \n[LL] .TS delim(@@) tab( ); cw(10.5n) lw(9.6n) rw(11.4n) lw(24.5n). @@ -157,30 +158,40 @@ Default aligned T} _ T{ +.nr LL 10.5n First T} T{ +.nr LL 9.6n row T} T{ +.nr LL 11.4n 12.0 T} T{ +.nr LL 24.5n Example of a row that spans multiple lines. T} T{ +.nr LL 10.5n Second T} T{ +.nr LL 9.6n row T} T{ +.nr LL 11.4n 5.0 T} T{ +.nr LL 24.5n Here\[cq]s another one. Note the blank line between rows. T} .TE +.nr LL \n[LLold] .ad .LP Multiline table without caption: .PP .na +.nr LLold \n[LL] .TS delim(@@) tab( ); cw(10.5n) lw(9.6n) rw(11.4n) lw(24.5n). @@ -195,25 +206,34 @@ Default aligned T} _ T{ +.nr LL 10.5n First T} T{ +.nr LL 9.6n row T} T{ +.nr LL 11.4n 12.0 T} T{ +.nr LL 24.5n Example of a row that spans multiple lines. T} T{ +.nr LL 10.5n Second T} T{ +.nr LL 9.6n row T} T{ +.nr LL 11.4n 5.0 T} T{ +.nr LL 24.5n Here\[cq]s another one. Note the blank line between rows. T} .TE +.nr LL \n[LLold] .ad .LP Table without column headers: @@ -255,27 +275,37 @@ T} Multiline table without column headers: .PP .na +.nr LLold \n[LL] .TS delim(@@) tab( ); cw(10.5n) lw(9.6n) rw(11.4n) lw(24.5n). T{ +.nr LL 10.5n First T} T{ +.nr LL 9.6n row T} T{ +.nr LL 11.4n 12.0 T} T{ +.nr LL 24.5n Example of a row that spans multiple lines. T} T{ +.nr LL 10.5n Second T} T{ +.nr LL 9.6n row T} T{ +.nr LL 11.4n 5.0 T} T{ +.nr LL 24.5n Here\[cq]s another one. Note the blank line between rows. T} .TE +.nr LL \n[LLold] .ad -- cgit v1.2.3 From 07d299d353761a7c29aa7e7a51371ad7842ec767 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 20 May 2021 18:45:39 -0700 Subject: DocBook reader: ensure that first and last names are separated. Closes #6541. --- src/Text/Pandoc/Readers/DocBook.hs | 20 ++++++++++++++------ test/command/6541.md | 27 +++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 6 deletions(-) create mode 100644 test/command/6541.md (limited to 'src') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 3db459cfd..b01ad3252 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -600,16 +600,24 @@ addMetadataFromElement e = do Nothing -> return () Just z -> addMetaField "author" z addMetaField "subtitle" e - addMetaField "author" e + addAuthor e addMetaField "date" e addMetaField "release" e addMetaField "releaseinfo" e return mempty - where addMetaField fieldname elt = - case filterChildren (named fieldname) elt of - [] -> return () - [z] -> getInlines z >>= addMeta fieldname - zs -> mapM getInlines zs >>= addMeta fieldname + where + addAuthor elt = + case filterChildren (named "author") elt of + [] -> return () + [z] -> fromAuthor z >>= addMeta "author" + zs -> mapM fromAuthor zs >>= addMeta "author" + fromAuthor elt = + mconcat . intersperse space <$> mapM getInlines (elChildren elt) + addMetaField fieldname elt = + case filterChildren (named fieldname) elt of + [] -> return () + [z] -> getInlines z >>= addMeta fieldname + zs -> mapM getInlines zs >>= addMeta fieldname addMeta :: PandocMonad m => ToMetaValue a => Text -> a -> DB m () addMeta field val = modify (setMeta field val) diff --git a/test/command/6541.md b/test/command/6541.md new file mode 100644 index 000000000..956340d4c --- /dev/null +++ b/test/command/6541.md @@ -0,0 +1,27 @@ +``` +% pandoc -f docbook -t markdown -s +<?xml version="1.0"?> +<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN" + "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd"> +<book> + +<bookinfo> +<title>Title</title> +<author> +<firstname>Firstname</firstname><surname>Lastname</surname> +</author> +<releaseinfo>1.17</releaseinfo> +</bookinfo> + +<para>Text.</para> + +</book> +^D +--- +author: Firstname Lastname +releaseinfo: 1.17 +title: Title +--- + +Text. +``` -- cgit v1.2.3 From f76fe2ab56606528d4710cc6c40bceb5788c3906 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Sat, 22 May 2021 13:29:13 +0200 Subject: HTML reader: simplify col width parsing --- src/Text/Pandoc/Readers/HTML/Table.hs | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs index ad0b51253..6537bbce9 100644 --- a/src/Text/Pandoc/Readers/HTML/Table.hs +++ b/src/Text/Pandoc/Readers/HTML/Table.hs @@ -1,6 +1,5 @@ {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.HTML.Table Copyright : © 2006-2021 John MacFarlane, @@ -42,18 +41,15 @@ pCol = try $ do skipMany pBlank optional $ pSatisfy (matchTagClose "col") skipMany pBlank - let width = case lookup "width" attribs of - Nothing -> case lookup "style" attribs of - Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs -> - fromMaybe 0.0 $ safeRead (T.filter - (`notElem` (" \t\r\n%'\";" :: [Char])) xs) - _ -> 0.0 - Just (T.unsnoc -> Just (xs, '%')) -> - fromMaybe 0.0 $ safeRead xs - _ -> 0.0 - if width > 0.0 - then return $ ColWidth $ width / 100.0 - else return ColWidthDefault + let toColWidth = maybe ColWidthDefault (ColWidth . (/100.0)) . safeRead + return $ fromMaybe ColWidthDefault $ + (case lookup "width" attribs >>= T.unsnoc of + Just (xs, '%') -> Just (toColWidth xs) + _ -> Nothing) <|> + (case lookup "style" attribs >>= T.stripPrefix "width" of + Just xs | T.any (== '%') xs -> Just . toColWidth $ + T.filter (`notElem` (" \t\r\n%'\";" :: [Char])) xs + _ -> Nothing) pColgroup :: PandocMonad m => TagParser m [ColWidth] pColgroup = try $ do -- cgit v1.2.3 From 80b4b3fe82a19a4ea1e76fc4a81c9c88676c7ce0 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 22 May 2021 22:03:51 -0700 Subject: Revert "HTML reader: simplify col width parsing" This reverts commit f76fe2ab56606528d4710cc6c40bceb5788c3906. --- src/Text/Pandoc/Readers/HTML/Table.hs | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs index 6537bbce9..ad0b51253 100644 --- a/src/Text/Pandoc/Readers/HTML/Table.hs +++ b/src/Text/Pandoc/Readers/HTML/Table.hs @@ -1,5 +1,6 @@ {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} {- | Module : Text.Pandoc.Readers.HTML.Table Copyright : © 2006-2021 John MacFarlane, @@ -41,15 +42,18 @@ pCol = try $ do skipMany pBlank optional $ pSatisfy (matchTagClose "col") skipMany pBlank - let toColWidth = maybe ColWidthDefault (ColWidth . (/100.0)) . safeRead - return $ fromMaybe ColWidthDefault $ - (case lookup "width" attribs >>= T.unsnoc of - Just (xs, '%') -> Just (toColWidth xs) - _ -> Nothing) <|> - (case lookup "style" attribs >>= T.stripPrefix "width" of - Just xs | T.any (== '%') xs -> Just . toColWidth $ - T.filter (`notElem` (" \t\r\n%'\";" :: [Char])) xs - _ -> Nothing) + let width = case lookup "width" attribs of + Nothing -> case lookup "style" attribs of + Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs -> + fromMaybe 0.0 $ safeRead (T.filter + (`notElem` (" \t\r\n%'\";" :: [Char])) xs) + _ -> 0.0 + Just (T.unsnoc -> Just (xs, '%')) -> + fromMaybe 0.0 $ safeRead xs + _ -> 0.0 + if width > 0.0 + then return $ ColWidth $ width / 100.0 + else return ColWidthDefault pColgroup :: PandocMonad m => TagParser m [ColWidth] pColgroup = try $ do -- cgit v1.2.3 From 1af2cfb2873c5bb6ddd9fc00d076088b2e62af30 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 22 May 2021 21:56:10 -0700 Subject: Handle relative lengths (e.g. `2*`) in HTML column widths. See <https://www.w3.org/TR/html4/types.html#h-6.6>. "A relative length has the form "i*", where "i" is an integer. When allotting space among elements competing for that space, user agents allot pixel and percentage lengths first, then divide up remaining available space among relative lengths. Each relative length receives a portion of the available space that is proportional to the integer preceding the "*". The value "*" is equivalent to "1*". Thus, if 60 pixels of space are available after the user agent allots pixel and percentage space, and the competing relative lengths are 1*, 2*, and 3*, the 1* will be alloted 10 pixels, the 2* will be alloted 20 pixels, and the 3* will be alloted 30 pixels." Closes #4063. --- src/Text/Pandoc/Readers/HTML/Table.hs | 47 ++++++++++++++++++++++++----------- test/command/4063.md | 29 +++++++++++++++++++++ 2 files changed, 62 insertions(+), 14 deletions(-) create mode 100644 test/command/4063.md (limited to 'src') diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs index ad0b51253..3a569dd0a 100644 --- a/src/Text/Pandoc/Readers/HTML/Table.hs +++ b/src/Text/Pandoc/Readers/HTML/Table.hs @@ -17,6 +17,7 @@ module Text.Pandoc.Readers.HTML.Table (pTable) where import Control.Applicative ((<|>)) import Data.Maybe (fromMaybe) +import Data.Either (lefts, rights) import Data.List.NonEmpty (nonEmpty) import Data.Text (Text) import Text.HTML.TagSoup @@ -33,34 +34,51 @@ import Text.Pandoc.Shared (onlySimpleTableCells, safeRead) import qualified Data.Text as T import qualified Text.Pandoc.Builder as B --- | Parses a @<col>@ element, returning the column's width. Defaults to --- @'ColWidthDefault'@ if the width is not set or cannot be determined. -pCol :: PandocMonad m => TagParser m ColWidth +-- | Parses a @<col>@ element, returning the column's width. +-- An Either value is used: Left i means a "relative length" with +-- integral value i (see https://www.w3.org/TR/html4/types.html#h-6.6); +-- Right w means a regular width. Defaults to @'Right ColWidthDefault'@ +-- if the width is not set or cannot be determined. +pCol :: PandocMonad m => TagParser m (Either Int ColWidth) pCol = try $ do TagOpen _ attribs' <- pSatisfy (matchTagOpen "col" []) let attribs = toStringAttr attribs' skipMany pBlank optional $ pSatisfy (matchTagClose "col") skipMany pBlank - let width = case lookup "width" attribs of + return $ case lookup "width" attribs of Nothing -> case lookup "style" attribs of Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs -> - fromMaybe 0.0 $ safeRead (T.filter - (`notElem` (" \t\r\n%'\";" :: [Char])) xs) - _ -> 0.0 + maybe (Right ColWidthDefault) (Right . ColWidth) + $ safeRead (T.filter + (`notElem` (" \t\r\n%'\";" :: [Char])) xs) + _ -> Right ColWidthDefault + Just (T.unsnoc -> Just (xs, '*')) -> + maybe (Left 1) Left $ safeRead xs Just (T.unsnoc -> Just (xs, '%')) -> - fromMaybe 0.0 $ safeRead xs - _ -> 0.0 - if width > 0.0 - then return $ ColWidth $ width / 100.0 - else return ColWidthDefault + maybe (Right ColWidthDefault) + (Right . ColWidth . (/ 100.0)) $ safeRead xs + _ -> Right ColWidthDefault -pColgroup :: PandocMonad m => TagParser m [ColWidth] +pColgroup :: PandocMonad m => TagParser m [Either Int ColWidth] pColgroup = try $ do pSatisfy (matchTagOpen "colgroup" []) skipMany pBlank manyTill pCol (pCloses "colgroup" <|> eof) <* skipMany pBlank +resolveRelativeLengths :: [Either Int ColWidth] -> [ColWidth] +resolveRelativeLengths ws = + let remaining = 1 - sum (map getColWidth $ rights ws) + relatives = sum $ lefts ws + relUnit = remaining / fromIntegral relatives + toColWidth (Right x) = x + toColWidth (Left i) = ColWidth (fromIntegral i * relUnit) + in map toColWidth ws + +getColWidth :: ColWidth -> Double +getColWidth ColWidthDefault = 0 +getColWidth (ColWidth w) = w + data CellType = HeaderCell | BodyCell @@ -182,7 +200,8 @@ pTable :: PandocMonad m pTable block = try $ do TagOpen _ attribs <- pSatisfy (matchTagOpen "table" []) <* skipMany pBlank caption <- option mempty $ pInTags "caption" block <* skipMany pBlank - widths <- ((mconcat <$> many1 pColgroup) <|> many pCol) <* skipMany pBlank + widths <- resolveRelativeLengths <$> + ((mconcat <$> many1 pColgroup) <|> many pCol) <* skipMany pBlank thead <- pTableHead block <* skipMany pBlank topfoot <- optionMaybe (pTableFoot block) <* skipMany pBlank tbodies <- many (pTableBody block) <* skipMany pBlank diff --git a/test/command/4063.md b/test/command/4063.md new file mode 100644 index 000000000..838472b46 --- /dev/null +++ b/test/command/4063.md @@ -0,0 +1,29 @@ +``` +% pandoc -f html -t native +<table> +<colgroup> + <col width="30%" /> + <col width="*" /> +</colgroup> +<tr> + <td>1</td> + <td>2</td> +</tr> +</table> +^D +[Table ("",[],[]) (Caption Nothing + []) + [(AlignDefault,ColWidth 0.3) + ,(AlignDefault,ColWidth 0.7)] + (TableHead ("",[],[]) + []) + [(TableBody ("",[],[]) (RowHeadColumns 0) + [] + [Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "1"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "2"]]]])] + (TableFoot ("",[],[]) + [])] +``` -- cgit v1.2.3 From 58fbf56548bf985b40e4338befaf5b11a0665cbe Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Mon, 24 May 2021 09:56:02 +0200 Subject: Jira writer: use `{color}` when span has a color attribute Closes: tarleb/jira-wiki-markup#10 --- src/Text/Pandoc/Writers/Jira.hs | 10 +++++++--- test/Tests/Writers/Jira.hs | 4 ++++ 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Jira.hs b/src/Text/Pandoc/Writers/Jira.hs index aa78d9419..cf4dadebc 100644 --- a/src/Text/Pandoc/Writers/Jira.hs +++ b/src/Text/Pandoc/Writers/Jira.hs @@ -309,9 +309,13 @@ quotedToJira qtype xs = do spanToJira :: PandocMonad m => Attr -> [Inline] -> JiraConverter m [Jira.Inline] -spanToJira (ident, _classes, _attribs) inls = case ident of - "" -> toJiraInlines inls - _ -> (Jira.Anchor ident :) <$> toJiraInlines inls +spanToJira (ident, _classes, attribs) inls = + let wrap = case lookup "color" attribs of + Nothing -> id + Just color -> singleton . Jira.ColorInline (Jira.ColorName color) + in wrap <$> case ident of + "" -> toJiraInlines inls + _ -> (Jira.Anchor ident :) <$> toJiraInlines inls registerNotes :: PandocMonad m => [Block] -> JiraConverter m [Jira.Inline] registerNotes contents = do diff --git a/test/Tests/Writers/Jira.hs b/test/Tests/Writers/Jira.hs index 0c6f48853..d8e856e34 100644 --- a/test/Tests/Writers/Jira.hs +++ b/test/Tests/Writers/Jira.hs @@ -67,6 +67,10 @@ tests = [ "id is used as anchor" =: spanWith ("unicorn", [], []) (str "Unicorn") =?> "{anchor:unicorn}Unicorn" + + , "use `color` attribute" =: + spanWith ("",[],[("color","red")]) "ruby" =?> + "{color:red}ruby{color}" ] , testGroup "code" -- cgit v1.2.3 From 8511f6fdf6c9fbc2cc926538bca4ae9f554b4ed9 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 23 May 2021 22:57:02 -0700 Subject: MediaBag improvements. In the current dev version, we will sometimes add a version of an image with a hashed name, keeping the original version with the original name, which would leave to undesirable duplication. This change separates the media's filename from the media's canonical name (which is the path of the link in the document itself). Filenames are based on SHA1 hashes and assigned automatically. In Text.Pandoc.MediaBag: - Export MediaItem type [API change]. - Change MediaBag type to a map from Text to MediaItem [API change]. - `lookupMedia` now returns a `MediaItem` [API change]. - Change `insertMedia` so it sets the `mediaPath` to a filename based on the SHA1 hash of the contents. This will be used when contents are extracted. In Text.Pandoc.Class.PandocMonad: - Remove `fetchMediaResource` [API change]. Lua MediaBag module has been changed minimally. In the future it would be better, probably, to give Lua access to the full MediaItem type. --- src/Text/Pandoc/Class/IO.hs | 9 ++++--- src/Text/Pandoc/Class/PandocMonad.hs | 43 ++++++++++++++-------------------- src/Text/Pandoc/Lua/Module/MediaBag.hs | 6 ++--- src/Text/Pandoc/MediaBag.hs | 35 ++++++++++++++++++++------- test/Tests/Readers/Docx.hs | 10 ++++---- 5 files changed, 55 insertions(+), 48 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs index bb4e2b732..f12c0a938 100644 --- a/src/Text/Pandoc/Class/IO.hs +++ b/src/Text/Pandoc/Class/IO.hs @@ -62,7 +62,7 @@ import Text.Pandoc.Definition (Pandoc, Inline (Image)) import Text.Pandoc.Error (PandocError (..)) import Text.Pandoc.Logging (LogMessage (..), messageVerbosity, showLogMessage) import Text.Pandoc.MIME (MimeType) -import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory) +import Text.Pandoc.MediaBag (MediaBag, MediaItem(..), lookupMedia, mediaDirectory) import Text.Pandoc.Walk (walk) import qualified Control.Exception as E import qualified Data.ByteString as B @@ -213,14 +213,13 @@ writeMedia :: (PandocMonad m, MonadIO m) writeMedia dir mediabag subpath = do -- we join and split to convert a/b/c to a\b\c on Windows; -- in zip containers all paths use / - let fullpath = dir </> unEscapeString (normalise subpath) let mbcontents = lookupMedia subpath mediabag case mbcontents of Nothing -> throwError $ PandocResourceNotFound $ pack subpath - Just (_, bs) -> do - report $ Extracting $ pack fullpath + Just item -> do + let fullpath = dir </> mediaPath item liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) - logIOError $ BL.writeFile fullpath bs + logIOError $ BL.writeFile fullpath $ mediaContents item -- | If the given Inline element is an image with a @src@ path equal to -- one in the list of @paths@, then prepends @dir@ to the image source; diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index dd6499a73..ae6917e06 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -37,7 +37,6 @@ module Text.Pandoc.Class.PandocMonad , setUserDataDir , getUserDataDir , fetchItem - , fetchMediaResource , getInputFiles , setInputFiles , getOutputFile @@ -57,8 +56,6 @@ module Text.Pandoc.Class.PandocMonad import Codec.Archive.Zip import Control.Monad.Except (MonadError (catchError, throwError), MonadTrans, lift, when) -import Data.Digest.Pure.SHA (sha1, showDigest) -import Data.Maybe (fromMaybe) import Data.List (foldl') import Data.Time (UTCTime) import Data.Time.Clock.POSIX (POSIXTime, utcTimeToPOSIXSeconds, @@ -67,7 +64,7 @@ import Data.Time.LocalTime (TimeZone, ZonedTime, utcToZonedTime) import Network.URI ( escapeURIString, nonStrictRelativeTo, unEscapeString, parseURIReference, isAllowedInURI, parseURI, URI(..) ) -import System.FilePath ((</>), (<.>), takeExtension, dropExtension, +import System.FilePath ((</>), takeExtension, dropExtension, isRelative, splitDirectories) import System.Random (StdGen) import Text.Collate.Lang (Lang(..), parseLang, renderLang) @@ -75,8 +72,8 @@ import Text.Pandoc.Class.CommonState (CommonState (..)) import Text.Pandoc.Definition import Text.Pandoc.Error import Text.Pandoc.Logging -import Text.Pandoc.MIME (MimeType, getMimeType, extensionFromMimeType) -import Text.Pandoc.MediaBag (MediaBag, lookupMedia) +import Text.Pandoc.MIME (MimeType, getMimeType) +import Text.Pandoc.MediaBag (MediaBag, lookupMedia, MediaItem(..)) import Text.Pandoc.Shared (uriPathToPath, safeRead) import Text.Pandoc.Translations (Term(..), Translations, lookupTerm, readTranslations) @@ -376,7 +373,8 @@ fetchItem :: PandocMonad m fetchItem s = do mediabag <- getMediaBag case lookupMedia (T.unpack s) mediabag of - Just (mime, bs) -> return (BL.toStrict bs, Just mime) + Just item -> return (BL.toStrict (mediaContents item), + Just (mediaMimeType item)) Nothing -> downloadOrRead s -- | Returns the content and, if available, the MIME type of a resource. @@ -629,19 +627,6 @@ withPaths (p:ps) action fp = catchError (action (p </> fp)) (\_ -> withPaths ps action fp) --- | Fetch local or remote resource (like an image) and provide data suitable --- for adding it to the MediaBag. -fetchMediaResource :: PandocMonad m - => T.Text -> m (FilePath, Maybe MimeType, BL.ByteString) -fetchMediaResource src = do - (bs, mt) <- fetchItem src - let ext = fromMaybe (T.pack $ takeExtension $ T.unpack src) - (mt >>= extensionFromMimeType) - let bs' = BL.fromChunks [bs] - let basename = showDigest $ sha1 bs' - let fname = basename <.> T.unpack ext - return (fname, mt, bs') - -- | Traverse tree, filling media bag for any images that -- aren't already in the media bag. fillMediaBag :: PandocMonad m => Pandoc -> m Pandoc @@ -649,12 +634,18 @@ fillMediaBag d = walkM handleImage d where handleImage :: PandocMonad m => Inline -> m Inline handleImage (Image attr lab (src, tit)) = catchError (do mediabag <- getMediaBag - case lookupMedia (T.unpack src) mediabag of - Just (_, _) -> return $ Image attr lab (src, tit) - Nothing -> do - (fname, mt, bs) <- fetchMediaResource src - insertMedia fname mt bs - return $ Image attr lab (T.pack fname, tit)) + let fp = T.unpack src + src' <- T.pack <$> case lookupMedia fp mediabag of + Just item -> return $ mediaPath item + Nothing -> do + (bs, mt) <- fetchItem src + insertMedia fp mt (BL.fromStrict bs) + mediabag' <- getMediaBag + case lookupMedia fp mediabag' of + Just item -> return $ mediaPath item + Nothing -> throwError $ PandocSomeError $ + src <> " not successfully inserted into MediaBag" + return $ Image attr lab (src', tit)) (\e -> case e of PandocResourceNotFound _ -> do diff --git a/src/Text/Pandoc/Lua/Module/MediaBag.hs b/src/Text/Pandoc/Lua/Module/MediaBag.hs index 78b699176..3eed50fca 100644 --- a/src/Text/Pandoc/Lua/Module/MediaBag.hs +++ b/src/Text/Pandoc/Lua/Module/MediaBag.hs @@ -73,9 +73,9 @@ lookup fp = do res <- MB.lookupMedia fp <$> getMediaBag liftPandocLua $ case res of Nothing -> 1 <$ Lua.pushnil - Just (mimeType, contents) -> do - Lua.push mimeType - Lua.push contents + Just item -> do + Lua.push $ MB.mediaMimeType item + Lua.push $ MB.mediaContents item return 2 list :: PandocLua NumResults diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index 4a9b4efa1..a65f315fc 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -15,6 +15,7 @@ Definition of a MediaBag object to hold binary resources, and an interface for interacting with it. -} module Text.Pandoc.MediaBag ( + MediaItem(..), MediaBag, deleteMedia, lookupMedia, @@ -28,15 +29,23 @@ import qualified Data.Map as M import Data.Maybe (fromMaybe) import Data.Typeable (Typeable) import System.FilePath -import Text.Pandoc.MIME (MimeType, getMimeTypeDef) +import Text.Pandoc.MIME (MimeType, getMimeTypeDef, extensionFromMimeType) import Data.Text (Text) import qualified Data.Text as T +import Data.Digest.Pure.SHA (sha1, showDigest) + +data MediaItem = + MediaItem + { mediaMimeType :: MimeType + , mediaPath :: FilePath + , mediaContents :: BL.ByteString + } deriving (Eq, Ord, Show, Data, Typeable) -- | A container for a collection of binary resources, with names and -- mime types. Note that a 'MediaBag' is a Monoid, so 'mempty' -- can be used for an empty 'MediaBag', and '<>' can be used to append -- two 'MediaBag's. -newtype MediaBag = MediaBag (M.Map Text (MimeType, BL.ByteString)) +newtype MediaBag = MediaBag (M.Map Text MediaItem) deriving (Semigroup, Monoid, Data, Typeable) instance Show MediaBag where @@ -62,26 +71,34 @@ insertMedia :: FilePath -- ^ relative path and canonical name of resource -> MediaBag -> MediaBag insertMedia fp mbMime contents (MediaBag mediamap) = - MediaBag (M.insert (canonicalize fp) (mime, contents) mediamap) - where mime = fromMaybe fallback mbMime + MediaBag (M.insert (canonicalize fp) mediaItem mediamap) + where mediaItem = MediaItem{ mediaPath = showDigest (sha1 contents) <> + "." <> ext + , mediaContents = contents + , mediaMimeType = mt } fallback = case takeExtension fp of ".gz" -> getMimeTypeDef $ dropExtension fp _ -> getMimeTypeDef fp + mt = fromMaybe fallback mbMime + ext = maybe (takeExtension fp) T.unpack $ extensionFromMimeType mt + -- | Lookup a media item in a 'MediaBag', returning mime type and contents. lookupMedia :: FilePath -> MediaBag - -> Maybe (MimeType, BL.ByteString) + -> Maybe MediaItem lookupMedia fp (MediaBag mediamap) = M.lookup (canonicalize fp) mediamap -- | Get a list of the file paths stored in a 'MediaBag', with -- their corresponding mime types and the lengths in bytes of the contents. mediaDirectory :: MediaBag -> [(FilePath, MimeType, Int)] mediaDirectory (MediaBag mediamap) = - M.foldrWithKey (\fp (mime,contents) -> - ((T.unpack fp, mime, fromIntegral (BL.length contents)):)) [] mediamap + M.foldrWithKey (\fp item -> + ((T.unpack fp, mediaMimeType item, + fromIntegral (BL.length (mediaContents item))):)) [] mediamap mediaItems :: MediaBag -> [(FilePath, MimeType, BL.ByteString)] mediaItems (MediaBag mediamap) = - M.foldrWithKey (\fp (mime,contents) -> - ((T.unpack fp, mime, contents):)) [] mediamap + M.foldrWithKey (\fp item -> + ((T.unpack fp, mediaMimeType item, mediaContents item):)) + [] mediamap diff --git a/test/Tests/Readers/Docx.hs b/test/Tests/Readers/Docx.hs index 2cce70cc5..939ff9939 100644 --- a/test/Tests/Readers/Docx.hs +++ b/test/Tests/Readers/Docx.hs @@ -24,7 +24,7 @@ import Test.Tasty.HUnit import Tests.Helpers import Text.Pandoc import qualified Text.Pandoc.Class as P -import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory) +import qualified Text.Pandoc.MediaBag as MB import Text.Pandoc.UTF8 as UTF8 -- We define a wrapper around pandoc that doesn't normalize in the @@ -91,11 +91,11 @@ getMedia :: FilePath -> FilePath -> IO (Maybe B.ByteString) getMedia archivePath mediaPath = fmap fromEntry . findEntryByPath ("word/" ++ mediaPath) . toArchive <$> B.readFile archivePath -compareMediaPathIO :: FilePath -> MediaBag -> FilePath -> IO Bool +compareMediaPathIO :: FilePath -> MB.MediaBag -> FilePath -> IO Bool compareMediaPathIO mediaPath mediaBag docxPath = do docxMedia <- getMedia docxPath mediaPath - let mbBS = case lookupMedia mediaPath mediaBag of - Just (_, bs) -> bs + let mbBS = case MB.lookupMedia mediaPath mediaBag of + Just item -> MB.mediaContents item Nothing -> error ("couldn't find " ++ mediaPath ++ " in media bag") @@ -110,7 +110,7 @@ compareMediaBagIO docxFile = do mb <- runIOorExplode $ readDocx defopts df >> P.getMediaBag bools <- mapM (\(fp, _, _) -> compareMediaPathIO fp mb docxFile) - (mediaDirectory mb) + (MB.mediaDirectory mb) return $ and bools testMediaBagIO :: String -> FilePath -> IO TestTree -- cgit v1.2.3 From d46ea7d7da3f842d265f09730c90cfc3691576ef Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Tue, 25 May 2021 16:54:42 +0200 Subject: Jira: add support for "smart" links Support has been added for the new `[alias|https://example.com|smart-card]` syntax. --- pandoc.cabal | 2 +- src/Text/Pandoc/Readers/Jira.hs | 2 ++ src/Text/Pandoc/Writers/Jira.hs | 2 ++ stack.yaml | 2 +- test/Tests/Readers/Jira.hs | 8 ++++++++ test/Tests/Writers/Jira.hs | 8 ++++++++ 6 files changed, 22 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index 4e10e01f3..241196d7c 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -470,7 +470,7 @@ library http-client-tls >= 0.2.4 && < 0.4, http-types >= 0.8 && < 0.13, ipynb >= 0.1 && < 0.2, - jira-wiki-markup >= 1.3.5 && < 1.4, + jira-wiki-markup >= 1.4 && < 1.5, mtl >= 2.2 && < 2.3, network >= 2.6, network-uri >= 2.6 && < 2.8, diff --git a/src/Text/Pandoc/Readers/Jira.hs b/src/Text/Pandoc/Readers/Jira.hs index a3b415f09..cf111f173 100644 --- a/src/Text/Pandoc/Readers/Jira.hs +++ b/src/Text/Pandoc/Readers/Jira.hs @@ -172,6 +172,8 @@ jiraLinkToPandoc linkType alias url = Jira.Email -> link ("mailto:" <> url') "" alias' Jira.Attachment -> linkWith ("", ["attachment"], []) url' "" alias' Jira.User -> linkWith ("", ["user-account"], []) url' "" alias' + Jira.SmartCard -> linkWith ("", ["smart-card"], []) url' "" alias' + Jira.SmartLink -> linkWith ("", ["smart-link"], []) url' "" alias' -- | Get unicode representation of a Jira icon. iconUnicode :: Jira.Icon -> Text diff --git a/src/Text/Pandoc/Writers/Jira.hs b/src/Text/Pandoc/Writers/Jira.hs index cf4dadebc..1351814e9 100644 --- a/src/Text/Pandoc/Writers/Jira.hs +++ b/src/Text/Pandoc/Writers/Jira.hs @@ -280,6 +280,8 @@ toJiraLink (_, classes, _) (url, _) alias = do | Just email <- T.stripPrefix "mailto:" url' = (Jira.Email, email) | "user-account" `elem` classes = (Jira.User, dropTilde url) | "attachment" `elem` classes = (Jira.Attachment, url) + | "smart-card" `elem` classes = (Jira.SmartCard, url) + | "smart-link" `elem` classes = (Jira.SmartLink, url) | otherwise = (Jira.External, url) dropTilde txt = case T.uncons txt of Just ('~', username) -> username diff --git a/stack.yaml b/stack.yaml index 809157425..16e5ad2cf 100644 --- a/stack.yaml +++ b/stack.yaml @@ -9,7 +9,7 @@ packages: extra-deps: - hslua-1.3.0 - hslua-module-path-0.1.0 -- jira-wiki-markup-1.3.5 +- jira-wiki-markup-1.4.0 - random-1.2.0 - unicode-collation-0.1.3 - citeproc-0.4 diff --git a/test/Tests/Readers/Jira.hs b/test/Tests/Readers/Jira.hs index cb7dde4ea..b7194a3b9 100644 --- a/test/Tests/Readers/Jira.hs +++ b/test/Tests/Readers/Jira.hs @@ -167,6 +167,14 @@ tests = , "user with description" =: "[John Doe|~johndoe]" =?> para (linkWith ("", ["user-account"], []) "~johndoe" "" "John Doe") + + , "'smart' link" =: + "[x|http://example.com|smart-link]" =?> + para (linkWith ("", ["smart-link"], []) "http://example.com" "" "x") + + , "'smart' card" =: + "[x|http://example.com|smart-card]" =?> + para (linkWith ("", ["smart-card"], []) "http://example.com" "" "x") ] , "image" =: diff --git a/test/Tests/Writers/Jira.hs b/test/Tests/Writers/Jira.hs index d8e856e34..00a7ae931 100644 --- a/test/Tests/Writers/Jira.hs +++ b/test/Tests/Writers/Jira.hs @@ -61,6 +61,14 @@ tests = , "user link with user as description" =: linkWith ("", ["user-account"], []) "~johndoe" "" "~johndoe" =?> "[~johndoe]" + + , "'smart' link" =: + para (linkWith ("", ["smart-link"], []) "http://example.com" "" "x") =?> + "[x|http://example.com|smart-link]" + + , "'smart' card" =: + para (linkWith ("", ["smart-card"], []) "http://example.org" "" "x") =?> + "[x|http://example.org|smart-card]" ] , testGroup "spans" -- cgit v1.2.3 From fb40c8109dc969dce74c8153ad1c0d4b33d54a6c Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 25 May 2021 10:07:24 -0700 Subject: Logging: add LoadedResource constructor to LogMessage. [API change] This is for INFO-level messages telling where image data has been loaded from. (This can vary because of the resource path.) --- src/Text/Pandoc/Logging.hs | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src') diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs index 8c7292b69..2642d72ac 100644 --- a/src/Text/Pandoc/Logging.hs +++ b/src/Text/Pandoc/Logging.hs @@ -85,6 +85,7 @@ data LogMessage = | CouldNotParseCSS Text | Fetching Text | Extracting Text + | LoadedResource FilePath FilePath | NoTitleElement Text | NoLangSpecified | InvalidLang Text @@ -195,6 +196,9 @@ instance ToJSON LogMessage where ["path" .= fp] Extracting fp -> ["path" .= fp] + LoadedResource orig found -> + ["for" .= orig + ,"from" .= found] NoTitleElement fallback -> ["fallback" .= fallback] NoLangSpecified -> [] @@ -309,6 +313,8 @@ showLogMessage msg = "Fetching " <> fp <> "..." Extracting fp -> "Extracting " <> fp <> "..." + LoadedResource orig found -> + "Loaded " <> Text.pack orig <> " from " <> Text.pack found NoTitleElement fallback -> "This document format requires a nonempty <title> element.\n" <> "Defaulting to '" <> fallback <> "' as the title.\n" <> @@ -389,6 +395,7 @@ messageVerbosity msg = CouldNotParseCSS{} -> WARNING Fetching{} -> INFO Extracting{} -> INFO + LoadedResource{} -> INFO NoTitleElement{} -> WARNING NoLangSpecified -> INFO InvalidLang{} -> WARNING -- cgit v1.2.3 From f2c1b5746912db945be780961b6503e38c3c7e1e Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 25 May 2021 10:08:30 -0700 Subject: PandocMonad: add info message in `downloadOrRead`... indicating what path local resources have been loaded from. --- src/Text/Pandoc/Class/PandocMonad.hs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index ae6917e06..b5f401619 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -1,4 +1,5 @@ {-# LANGUAGE CPP #-} +{-# LANGUAGE TupleSections #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} @@ -409,9 +410,10 @@ downloadOrRead s = do _ -> readLocalFile fp -- get from local file system where readLocalFile f = do resourcePath <- getResourcePath - cont <- if isRelative f - then withPaths resourcePath readFileStrict f - else readFileStrict f + (fp', cont) <- if isRelative f + then withPaths resourcePath readFileStrict f + else (f,) <$> readFileStrict f + report $ LoadedResource f fp' return (cont, mime) httpcolon = URI{ uriScheme = "http:", uriAuthority = Nothing, @@ -621,10 +623,11 @@ makeCanonical = Posix.joinPath . transformPathParts . splitDirectories -- that filepath. Returns the result of the first successful execution -- of the action, or throws a @PandocResourceNotFound@ exception if the -- action errors for all filepaths. -withPaths :: PandocMonad m => [FilePath] -> (FilePath -> m a) -> FilePath -> m a +withPaths :: PandocMonad m + => [FilePath] -> (FilePath -> m a) -> FilePath -> m (FilePath, a) withPaths [] _ fp = throwError $ PandocResourceNotFound $ T.pack fp withPaths (p:ps) action fp = - catchError (action (p </> fp)) + catchError ((p </> fp,) <$> action (p </> fp)) (\_ -> withPaths ps action fp) -- | Traverse tree, filling media bag for any images that -- cgit v1.2.3 From bb2530caa414234f00e7c89ef18a538708b2297c Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Tue, 25 May 2021 17:49:48 +0200 Subject: Use haddock-library-1.10.0 --- pandoc.cabal | 2 +- src/Text/Pandoc/Readers/Haddock.hs | 3 ++- stack.yaml | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index 241196d7c..c8e49c620 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -461,7 +461,7 @@ library exceptions >= 0.8 && < 0.11, file-embed >= 0.0 && < 0.1, filepath >= 1.1 && < 1.5, - haddock-library >= 1.8 && < 1.10, + haddock-library >= 1.10 && < 1.11, hslua >= 1.1 && < 1.4, hslua-module-path >= 0.1.0 && < 0.2.0, hslua-module-system >= 0.2 && < 0.3, diff --git a/src/Text/Pandoc/Readers/Haddock.hs b/src/Text/Pandoc/Readers/Haddock.hs index 35eaac0a9..67b3af2d3 100644 --- a/src/Text/Pandoc/Readers/Haddock.hs +++ b/src/Text/Pandoc/Readers/Haddock.hs @@ -131,7 +131,8 @@ docHToInlines isCode d' = DocIdentifier s -> B.codeWith ("",["haskell","identifier"],[]) $ T.pack s _ -> mempty DocIdentifierUnchecked s -> B.codeWith ("",["haskell","identifier"],[]) $ T.pack s - DocModule s -> B.codeWith ("",["haskell","module"],[]) $ T.pack s + DocModule s -> B.codeWith ("",["haskell","module"],[]) $ + T.pack (modLinkName s) DocWarning _ -> mempty -- TODO DocEmphasis d -> B.emph (docHToInlines isCode d) DocMonospaced (DocString s) -> B.code $ T.pack s diff --git a/stack.yaml b/stack.yaml index 16e5ad2cf..ea48fbb88 100644 --- a/stack.yaml +++ b/stack.yaml @@ -7,6 +7,7 @@ flags: packages: - '.' extra-deps: +- haddock-library-1.10.0 - hslua-1.3.0 - hslua-module-path-0.1.0 - jira-wiki-markup-1.4.0 -- cgit v1.2.3 From 105a50569be6d2c1b37cc290abcbfbae1cd0fd1e Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Tue, 25 May 2021 17:49:48 +0200 Subject: Allow compilation with base 4.15 --- src/Text/Pandoc/Options.hs | 34 +++++----- src/Text/Pandoc/Readers/Docx/Combine.hs | 6 +- src/Text/Pandoc/Readers/Odt/ContentReader.hs | 11 ++-- src/Text/Pandoc/Writers/OpenDocument.hs | 98 +++++++++++++--------------- 4 files changed, 72 insertions(+), 77 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 92bda36b2..85d9aa103 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -315,6 +315,23 @@ defaultMathJaxURL = "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.j defaultKaTeXURL :: Text defaultKaTeXURL = "https://cdnjs.cloudflare.com/ajax/libs/KaTeX/0.11.1/" +-- Update documentation in doc/filters.md if this is changed. +$(deriveJSON defaultOptions{ constructorTagModifier = + camelCaseStrToHyphenated + } ''TrackChanges) + +$(deriveJSON defaultOptions{ constructorTagModifier = + camelCaseStrToHyphenated + } ''WrapOption) + +$(deriveJSON defaultOptions{ constructorTagModifier = + camelCaseStrToHyphenated . drop 8 + } ''TopLevelDivision) + +$(deriveJSON defaultOptions{ constructorTagModifier = + camelCaseStrToHyphenated + } ''ReferenceLocation) + -- Update documentation in doc/filters.md if this is changed. $(deriveJSON defaultOptions ''ReaderOptions) @@ -338,20 +355,3 @@ $(deriveJSON defaultOptions{ constructorTagModifier = } ''ObfuscationMethod) $(deriveJSON defaultOptions ''HTMLSlideVariant) - --- Update documentation in doc/filters.md if this is changed. -$(deriveJSON defaultOptions{ constructorTagModifier = - camelCaseStrToHyphenated - } ''TrackChanges) - -$(deriveJSON defaultOptions{ constructorTagModifier = - camelCaseStrToHyphenated - } ''WrapOption) - -$(deriveJSON defaultOptions{ constructorTagModifier = - camelCaseStrToHyphenated . drop 8 - } ''TopLevelDivision) - -$(deriveJSON defaultOptions{ constructorTagModifier = - camelCaseStrToHyphenated - } ''ReferenceLocation) diff --git a/src/Text/Pandoc/Readers/Docx/Combine.hs b/src/Text/Pandoc/Readers/Docx/Combine.hs index 7c6d01769..6e4faa639 100644 --- a/src/Text/Pandoc/Readers/Docx/Combine.hs +++ b/src/Text/Pandoc/Readers/Docx/Combine.hs @@ -61,7 +61,7 @@ import Data.List import Data.Bifunctor import Data.Sequence ( ViewL (..), ViewR (..), viewl, viewr, spanr, spanl , (><), (|>) ) -import Text.Pandoc.Builder +import Text.Pandoc.Builder as B data Modifier a = Modifier (a -> a) | AttrModifier (Attr -> a -> a) Attr @@ -116,12 +116,12 @@ ilModifierAndInnards ils = case viewl $ unMany ils of inlinesL :: Inlines -> (Inlines, Inlines) inlinesL ils = case viewl $ unMany ils of - (s :< sq) -> (singleton s, Many sq) + (s :< sq) -> (B.singleton s, Many sq) _ -> (mempty, ils) inlinesR :: Inlines -> (Inlines, Inlines) inlinesR ils = case viewr $ unMany ils of - (sq :> s) -> (Many sq, singleton s) + (sq :> s) -> (Many sq, B.singleton s) _ -> (ils, mempty) combineInlines :: Inlines -> Inlines -> Inlines diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs index c4220b0db..5520d039f 100644 --- a/src/Text/Pandoc/Readers/Odt/ContentReader.hs +++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs @@ -1,4 +1,5 @@ {-# LANGUAGE Arrows #-} +{-# LANGUAGE CPP #-} {-# LANGUAGE DeriveFoldable #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE PatternGuards #-} @@ -33,7 +34,7 @@ import Data.List (find) import qualified Data.Map as M import qualified Data.Text as T import Data.Maybe -import Data.Semigroup (First(..), Option(..)) +import Data.Monoid (Alt (..)) import Text.TeXMath (readMathML, writeTeX) import qualified Text.Pandoc.XML.Light as XML @@ -505,13 +506,11 @@ type InlineMatcher = ElementMatcher Inlines type BlockMatcher = ElementMatcher Blocks - -newtype FirstMatch a = FirstMatch (Option (First a)) - deriving (Foldable, Monoid, Semigroup) +newtype FirstMatch a = FirstMatch (Alt Maybe a) + deriving (Foldable, Monoid, Semigroup) firstMatch :: a -> FirstMatch a -firstMatch = FirstMatch . Option . Just . First - +firstMatch = FirstMatch . Alt . Just -- matchingElement :: (Monoid e) diff --git a/src/Text/Pandoc/Writers/OpenDocument.hs b/src/Text/Pandoc/Writers/OpenDocument.hs index 34a3a4aa5..5f3224c2f 100644 --- a/src/Text/Pandoc/Writers/OpenDocument.hs +++ b/src/Text/Pandoc/Writers/OpenDocument.hs @@ -1,4 +1,5 @@ {-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PatternGuards #-} {-# LANGUAGE ViewPatterns #-} @@ -372,36 +373,32 @@ blocksToOpenDocument o b = vcat <$> mapM (blockToOpenDocument o) b -- | Convert a Pandoc block element to OpenDocument. blockToOpenDocument :: PandocMonad m => WriterOptions -> Block -> OD m (Doc Text) -blockToOpenDocument o bs - | Plain b <- bs = if null b - then return empty - else inParagraphTags =<< inlinesToOpenDocument o b - | Para [Image attr c (s,T.stripPrefix "fig:" -> Just t)] <- bs - = figure attr c s t - | Para b <- bs = if null b && - not (isEnabled Ext_empty_paragraphs o) - then return empty - else inParagraphTags =<< inlinesToOpenDocument o b - | LineBlock b <- bs = blockToOpenDocument o $ linesToPara b - | Div attr xs <- bs = mkDiv attr xs - | Header i (ident,_,_) b - <- bs = setFirstPara >> (inHeaderTags i ident - =<< inlinesToOpenDocument o b) - | BlockQuote b <- bs = setFirstPara >> mkBlockQuote b - | DefinitionList b <- bs = setFirstPara >> defList b - | BulletList b <- bs = setFirstPara >> bulletListToOpenDocument o b - | OrderedList a b <- bs = setFirstPara >> orderedList a b - | CodeBlock _ s <- bs = setFirstPara >> preformatted s - | Table a bc s th tb tf <- bs = setFirstPara >> table (Ann.toTable a bc s th tb tf) - | HorizontalRule <- bs = setFirstPara >> return (selfClosingTag "text:p" - [ ("text:style-name", "Horizontal_20_Line") ]) - | RawBlock f s <- bs = if f == Format "opendocument" - then return $ text $ T.unpack s - else do - report $ BlockNotRendered bs - return empty - | Null <- bs = return empty - | otherwise = return empty +blockToOpenDocument o = \case + Plain b -> if null b + then return empty + else inParagraphTags =<< inlinesToOpenDocument o b + Para [Image attr c (s,T.stripPrefix "fig:" -> Just t)] -> figure attr c s t + Para b -> if null b && + not (isEnabled Ext_empty_paragraphs o) + then return empty + else inParagraphTags =<< inlinesToOpenDocument o b + LineBlock b -> blockToOpenDocument o $ linesToPara b + Div attr xs -> mkDiv attr xs + Header i (ident,_,_) b -> do + setFirstPara + inHeaderTags i ident =<< inlinesToOpenDocument o b + BlockQuote b -> setFirstPara >> mkBlockQuote b + DefinitionList b -> setFirstPara >> defList b + BulletList b -> setFirstPara >> bulletListToOpenDocument o b + OrderedList a b -> setFirstPara >> orderedList a b + CodeBlock _ s -> setFirstPara >> preformatted s + Table a bc s th tb tf -> setFirstPara >> table (Ann.toTable a bc s th tb tf) + HorizontalRule -> setFirstPara >> return (selfClosingTag "text:p" + [ ("text:style-name", "Horizontal_20_Line") ]) + b@(RawBlock f s) -> if f == Format "opendocument" + then return $ text $ T.unpack s + else empty <$ report (BlockNotRendered b) + Null -> return empty where defList b = do setInDefinitionList True r <- vcat <$> mapM (deflistItemToOpenDocument o) b @@ -874,27 +871,26 @@ data TextStyle = Italic textStyleAttr :: Map.Map Text Text -> TextStyle -> Map.Map Text Text -textStyleAttr m s - | Italic <- s = Map.insert "fo:font-style" "italic" . - Map.insert "style:font-style-asian" "italic" . - Map.insert "style:font-style-complex" "italic" $ m - | Bold <- s = Map.insert "fo:font-weight" "bold" . - Map.insert "style:font-weight-asian" "bold" . - Map.insert "style:font-weight-complex" "bold" $ m - | Under <- s = Map.insert "style:text-underline-style" "solid" . - Map.insert "style:text-underline-width" "auto" . - Map.insert "style:text-underline-color" "font-color" $ m - | Strike <- s = Map.insert "style:text-line-through-style" "solid" m - | Sub <- s = Map.insert "style:text-position" "sub 58%" m - | Sup <- s = Map.insert "style:text-position" "super 58%" m - | SmallC <- s = Map.insert "fo:font-variant" "small-caps" m - | Pre <- s = Map.insert "style:font-name" "Courier New" . - Map.insert "style:font-name-asian" "Courier New" . - Map.insert "style:font-name-complex" "Courier New" $ m - | Language lang <- s - = Map.insert "fo:language" (langLanguage lang) . - maybe id (Map.insert "fo:country") (langRegion lang) $ m - | otherwise = m +textStyleAttr m = \case + Italic -> Map.insert "fo:font-style" "italic" . + Map.insert "style:font-style-asian" "italic" . + Map.insert "style:font-style-complex" "italic" $ m + Bold -> Map.insert "fo:font-weight" "bold" . + Map.insert "style:font-weight-asian" "bold" . + Map.insert "style:font-weight-complex" "bold" $ m + Under -> Map.insert "style:text-underline-style" "solid" . + Map.insert "style:text-underline-width" "auto" . + Map.insert "style:text-underline-color" "font-color" $ m + Strike -> Map.insert "style:text-line-through-style" "solid" m + Sub -> Map.insert "style:text-position" "sub 58%" m + Sup -> Map.insert "style:text-position" "super 58%" m + SmallC -> Map.insert "fo:font-variant" "small-caps" m + Pre -> Map.insert "style:font-name" "Courier New" . + Map.insert "style:font-name-asian" "Courier New" . + Map.insert "style:font-name-complex" "Courier New" $ m + Language lang -> + Map.insert "fo:language" (langLanguage lang) . + maybe id (Map.insert "fo:country") (langRegion lang) $ m withLangFromAttr :: PandocMonad m => Attr -> OD m a -> OD m a withLangFromAttr (_,_,kvs) action = -- cgit v1.2.3 From 8d5014fdfc57b80be54a3d23358e92c3b45a7e7d Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 25 May 2021 10:36:51 -0700 Subject: Logging: remove single quotes around paths in messages. We weren't doing it consistently and it seems unnecessary. --- src/Text/Pandoc/Logging.hs | 12 ++++++------ test/command/7099.md | 2 +- test/command/svg.md | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Logging.hs b/src/Text/Pandoc/Logging.hs index 2642d72ac..193b8b61c 100644 --- a/src/Text/Pandoc/Logging.hs +++ b/src/Text/Pandoc/Logging.hs @@ -282,7 +282,7 @@ showLogMessage msg = ParsingUnescaped s pos -> "Parsing unescaped '" <> s <> "' at " <> showPos pos CouldNotLoadIncludeFile fp pos -> - "Could not load include file '" <> fp <> "' at " <> showPos pos + "Could not load include file " <> fp <> " at " <> showPos pos MacroAlreadyDefined name pos -> "Macro '" <> name <> "' already defined, ignoring at " <> showPos pos InlineNotRendered il -> @@ -294,18 +294,18 @@ showLogMessage msg = IgnoredIOError s -> "IO Error (ignored): " <> s CouldNotFetchResource fp s -> - "Could not fetch resource '" <> fp <> "'" <> + "Could not fetch resource " <> fp <> if Text.null s then "" else ": " <> s CouldNotDetermineImageSize fp s -> - "Could not determine image size for '" <> fp <> "'" <> + "Could not determine image size for " <> fp <> if Text.null s then "" else ": " <> s CouldNotConvertImage fp s -> - "Could not convert image '" <> fp <> "'" <> + "Could not convert image " <> fp <> if Text.null s then "" else ": " <> s CouldNotDetermineMimeType fp -> - "Could not determine mime type for '" <> fp <> "'" + "Could not determine mime type for " <> fp CouldNotConvertTeXMath s m -> - "Could not convert TeX math '" <> s <> "', rendering as TeX" <> + "Could not convert TeX math " <> s <> ", rendering as TeX" <> if Text.null m then "" else ":\n" <> m CouldNotParseCSS m -> "Could not parse CSS" <> if Text.null m then "" else ":\n" <> m diff --git a/test/command/7099.md b/test/command/7099.md index 33ac8aea1..467b22a16 100644 --- a/test/command/7099.md +++ b/test/command/7099.md @@ -11,7 +11,7 @@ <iframe src="h:invalid@url"></iframe> ^D [INFO] Fetching h:invalid@url... -[WARNING] Could not fetch resource 'h:invalid@url': Could not fetch h:invalid@url +[WARNING] Could not fetch resource h:invalid@url: Could not fetch h:invalid@url InvalidUrlException "h:invalid@url" "Invalid scheme" [INFO] Skipped '<iframe src="h:invalid@url"></iframe>' at input line 1 column 29 [] diff --git a/test/command/svg.md b/test/command/svg.md index 4ba836b20..57c99db33 100644 --- a/test/command/svg.md +++ b/test/command/svg.md @@ -2,7 +2,7 @@ % pandoc -f latex -t icml \includegraphics{command/corrupt.svg} ^D -[WARNING] Could not determine image size for 'command/corrupt.svg': could not determine image type +[WARNING] Could not determine image size for command/corrupt.svg: could not determine image type <ParagraphStyleRange AppliedParagraphStyle="ParagraphStyle/Paragraph"> <CharacterStyleRange AppliedCharacterStyle="$ID/NormalCharacterStyle"> <Rectangle Self="uec" StrokeWeight="0" ItemTransform="1 0 0 1 150 -100"> -- cgit v1.2.3 From 81eadfd99ad3e905b806cc6c80ab0fea0185286f Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 26 May 2021 22:50:35 -0700 Subject: LaTeX reader: improve `\def` and implement `\newif`. - Improve parsing of `\def` macros. We previously set "verbatim mode" even for parsing the initial `\def`; this caused problems for things like ``` \def\foo{\def\bar{BAR}} \foo \bar ``` - Implement `\newif`. - Add tests. --- src/Text/Pandoc/Readers/LaTeX/Macro.hs | 59 ++++++++++++++++++++++++-------- src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 19 +++++++++- test/command/newif.md | 55 +++++++++++++++++++++++++++++ 3 files changed, 118 insertions(+), 15 deletions(-) create mode 100644 test/command/newif.md (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX/Macro.hs b/src/Text/Pandoc/Readers/LaTeX/Macro.hs index 607f5438c..5495a8e74 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Macro.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Macro.hs @@ -14,6 +14,7 @@ import Text.Pandoc.Parsing hiding (blankline, mathDisplay, mathInline, import Control.Applicative ((<|>), optional) import qualified Data.Map as M import Data.Text (Text) +import qualified Data.Text as T macroDef :: (PandocMonad m, Monoid a) => (Text -> a) -> LP m a macroDef constructor = do @@ -22,9 +23,11 @@ macroDef constructor = do guardDisabled Ext_latex_macros) <|> return mempty where commandDef = do - (name, macro') <- newcommand <|> letmacro <|> defmacro + nameMacroPairs <- newcommand <|> letmacro <|> defmacro <|> newif guardDisabled Ext_latex_macros <|> - updateState (\s -> s{ sMacros = M.insert name macro' (sMacros s) }) + mapM_ (\(name, macro') -> + updateState (\s -> s{ sMacros = M.insert name macro' + (sMacros s) })) nameMacroPairs environmentDef = do mbenv <- newenvironment case mbenv of @@ -40,7 +43,7 @@ macroDef constructor = do -- @\newcommand{\envname}[n-args][default]{begin}@ -- @\newcommand{\endenvname}@ -letmacro :: PandocMonad m => LP m (Text, Macro) +letmacro :: PandocMonad m => LP m [(Text, Macro)] letmacro = do controlSeq "let" (name, contents) <- withVerbatimMode $ do @@ -53,18 +56,47 @@ letmacro = do contents <- bracedOrToken return (name, contents) contents' <- doMacros' 0 contents - return (name, Macro ExpandWhenDefined [] Nothing contents') + return [(name, Macro ExpandWhenDefined [] Nothing contents')] -defmacro :: PandocMonad m => LP m (Text, Macro) -defmacro = try $ +defmacro :: PandocMonad m => LP m [(Text, Macro)] +defmacro = do -- we use withVerbatimMode, because macros are to be expanded -- at point of use, not point of definition + controlSeq "def" withVerbatimMode $ do - controlSeq "def" Tok _ (CtrlSeq name) _ <- anyControlSeq argspecs <- many (argspecArg <|> argspecPattern) contents <- bracedOrToken - return (name, Macro ExpandWhenUsed argspecs Nothing contents) + return [(name, Macro ExpandWhenUsed argspecs Nothing contents)] + +-- \newif\iffoo' defines: +-- \iffoo to be \iffalse +-- \footrue to be a command that defines \iffoo to be \iftrue +-- \foofalse to be a command that defines \iffoo to be \iffalse +newif :: PandocMonad m => LP m [(Text, Macro)] +newif = do + controlSeq "newif" + withVerbatimMode $ do + Tok pos (CtrlSeq name) _ <- anyControlSeq + -- \def\iffoo\iffalse + -- \def\footrue{\def\iffoo\iftrue} + -- \def\foofalse{\def\iffoo\iffalse} + let base = T.drop 2 name + return [ (name, Macro ExpandWhenUsed [] Nothing + [Tok pos (CtrlSeq "iffalse") "\\iffalse"]) + , (base <> "true", + Macro ExpandWhenUsed [] Nothing + [ Tok pos (CtrlSeq "def") "\\def" + , Tok pos (CtrlSeq name) ("\\" <> name) + , Tok pos (CtrlSeq "iftrue") "\\iftrue" + ]) + , (base <> "false", + Macro ExpandWhenUsed [] Nothing + [ Tok pos (CtrlSeq "def") "\\def" + , Tok pos (CtrlSeq name) ("\\" <> name) + , Tok pos (CtrlSeq "iffalse") "\\iffalse" + ]) + ] argspecArg :: PandocMonad m => LP m ArgSpec argspecArg = do @@ -77,10 +109,9 @@ argspecPattern = (toktype' == Symbol || toktype' == Word) && (txt /= "{" && txt /= "\\" && txt /= "}"))) -newcommand :: PandocMonad m => LP m (Text, Macro) +newcommand :: PandocMonad m => LP m [(Text, Macro)] newcommand = do - pos <- getPosition - Tok _ (CtrlSeq mtype) _ <- controlSeq "newcommand" <|> + Tok pos (CtrlSeq mtype) _ <- controlSeq "newcommand" <|> controlSeq "renewcommand" <|> controlSeq "providecommand" <|> controlSeq "DeclareMathOperator" <|> @@ -112,9 +143,9 @@ newcommand = do Just macro | mtype == "newcommand" -> do report $ MacroAlreadyDefined txt pos - return (name, macro) - | mtype == "providecommand" -> return (name, macro) - _ -> return (name, Macro ExpandWhenUsed argspecs optarg contents) + return [(name, macro)] + | mtype == "providecommand" -> return [(name, macro)] + _ -> return [(name, Macro ExpandWhenUsed argspecs optarg contents)] newenvironment :: PandocMonad m => LP m (Maybe (Text, Macro, Macro)) newenvironment = do diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index 1c77eb299..a17b1f324 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -113,7 +113,6 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..), ArgSpec (..), Tok (..), TokType (..)) import Text.Pandoc.Shared import Text.Parsec.Pos --- import Debug.Trace newtype DottedNum = DottedNum [Int] deriving (Show, Eq) @@ -563,8 +562,26 @@ trySpecialMacro "xspace" ts = do Tok pos Word t : _ | startsWithAlphaNum t -> return $ Tok pos Spaces " " : ts' _ -> return ts' +trySpecialMacro "iftrue" ts = handleIf True ts +trySpecialMacro "iffalse" ts = handleIf False ts trySpecialMacro _ _ = mzero +handleIf :: PandocMonad m => Bool -> [Tok] -> LP m [Tok] +handleIf b ts = do + res' <- lift $ runParserT (ifParser b) defaultLaTeXState "tokens" ts + case res' of + Left _ -> Prelude.fail "Could not parse conditional" + Right ts' -> return ts' + +ifParser :: PandocMonad m => Bool -> LP m [Tok] +ifParser b = do + ifToks <- many (notFollowedBy (controlSeq "else" <|> controlSeq "fi") + *> anyTok) + elseToks <- (controlSeq "else" >> manyTill anyTok (controlSeq "fi")) + <|> ([] <$ controlSeq "fi") + rest <- getInput + return $ (if b then ifToks else elseToks) ++ rest + startsWithAlphaNum :: Text -> Bool startsWithAlphaNum t = case T.uncons t of diff --git a/test/command/newif.md b/test/command/newif.md new file mode 100644 index 000000000..f444f14c9 --- /dev/null +++ b/test/command/newif.md @@ -0,0 +1,55 @@ +``` +% pandoc -f latex -t plain +\iftrue +should print +\iftrue +should print +\else +should not print +\fi +\else +should not print +\fi + +\iffalse +should not print +\else +\iftrue +should print +\else +should not print +\fi +\fi + +\newif\ifepub + +\ifepub +should not print +\fi + +\epubtrue + +\ifepub +should print +\else +should not print +\fi + +\epubfalse + +\ifepub +should not print +\else +should print +\fi +^D +should print + +should print + +should print + +should print + +should print +``` -- cgit v1.2.3 From 834da53058069fe50da510fa86e0807a7ff7868f Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 24 May 2021 10:17:37 -0700 Subject: Add `rebase_relative_paths` extension. - Add manual entry for (non-default) extension `rebase_relative_paths`. - Add constructor `Ext_rebase_relative_paths` to `Extensions` in Text.Pandoc.Extensions [API change]. When enabled, this extension rewrites relative image and link paths by prepending the (relative) directory of the containing file. - Make Markdown reader sensitive to the new extension. - Add tests for #3752. Closes #3752. NB. currently the extension applies to markdown and associated readers but not commonmark/gfm. --- MANUAL.txt | 28 ++++++++++++++++++++++++++-- pandoc.cabal | 4 ++++ src/Text/Pandoc/Extensions.hs | 3 +++ src/Text/Pandoc/Readers/Markdown.hs | 36 +++++++++++++++++++++++++++++------- test/command/3752.md | 35 +++++++++++++++++++++++++++++++++++ test/command/chap1/spider.png | Bin 0 -> 63531 bytes test/command/chap1/text.md | 11 +++++++++++ test/command/chap2/spider.png | Bin 0 -> 9861 bytes test/command/chap2/text.md | 3 +++ 9 files changed, 111 insertions(+), 9 deletions(-) create mode 100644 test/command/3752.md create mode 100644 test/command/chap1/spider.png create mode 100644 test/command/chap1/text.md create mode 100644 test/command/chap2/spider.png create mode 100644 test/command/chap2/text.md (limited to 'src') diff --git a/MANUAL.txt b/MANUAL.txt index 6dc783e8c..48bf60d6e 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -3755,7 +3755,7 @@ definition: Note that space between items in a definition list is required. (A variant that loosens this requirement, but disallows "lazy" hard wrapping, can be activated with `compact_definition_lists`: see -[Non-pandoc extensions], below.) +[Non-default extensions], below.) [^3]: I have been influenced by the suggestions of [David Wheeler](https://justatheory.com/2009/02/modest-markdown-proposal/). @@ -5051,13 +5051,37 @@ author-in-text style inside notes when using a note style. [finding and editing styles]: https://citationstyles.org/authors/ [CSL locale files]: https://github.com/citation-style-language/locales -## Non-pandoc extensions +## Non-default extensions The following Markdown syntax extensions are not enabled by default in pandoc, but may be enabled by adding `+EXTENSION` to the format name, where `EXTENSION` is the name of the extension. Thus, for example, `markdown+hard_line_breaks` is Markdown with hard line breaks. +#### Extension: `rebase_relative_paths` #### + +Rewrite relative paths for Markdown links and images, depending +on the path of the file containing the link or image link. For +each link or image, pandoc will compute the directory of the +containing file, relative to the working directory, and prepend +the resulting path to the link or image path. + +The use of this extension is best understood by example. +Suppose you have a a subdirectory for each chapter of a book, +`chap1`, `chap2`, `chap3`. Each contains a file `text.md` and a +number of images used in the chapter. You would like to have +`` in `chap1/text.md` refer to +`chap1/spider.jpg` and `` in `chap2/text.md` +refer to `chap2/spider.jpg`. To do this, use + + pandoc chap*/*.md -f markdown+rebase_relative_paths + +Without this extension, you would have to use +`` in `chap1/text.md` and +`` in `chap2/text.md`. Links with +relative paths will be rewritten in the same way as images. +*This option currently only affects Markdown input.* + #### Extension: `attributes` #### Allows attributes to be attached to any inline or block-level diff --git a/pandoc.cabal b/pandoc.cabal index c74df3e69..c8343d16e 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -214,6 +214,10 @@ extra-source-files: test/command/C.txt test/command/D.txt test/command/01.csv + test/command/chap1/spider.png + test/command/chap2/spider.png + test/command/chap1/text.md + test/command/chap2/text.md test/command/defaults1.yaml test/command/defaults2.yaml test/command/defaults3.yaml diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 6423d5f56..c4d54c06e 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -136,6 +136,8 @@ data Extension = | Ext_raw_html -- ^ Allow raw HTML | Ext_raw_tex -- ^ Allow raw TeX (other than math) | Ext_raw_markdown -- ^ Parse markdown in ipynb as raw markdown + | Ext_rebase_relative_paths -- ^ Rebase relative image and link paths, + -- relative to directory of containing file | Ext_shortcut_reference_links -- ^ Shortcut reference links | Ext_simple_tables -- ^ Pandoc-style simple tables | Ext_smart -- ^ "Smart" quotes, apostrophes, ellipses, dashes @@ -462,6 +464,7 @@ getAllExtensions f = universalExtensions <> getAll f , Ext_gutenberg , Ext_smart , Ext_literate_haskell + , Ext_rebase_relative_paths ] getAll "markdown_strict" = allMarkdownExtensions getAll "markdown_phpextra" = allMarkdownExtensions diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 34f16ab4e..968c6c165 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -29,7 +29,7 @@ import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T import qualified Data.ByteString.Lazy as BL -import System.FilePath (addExtension, takeExtension) +import System.FilePath (addExtension, takeExtension, isAbsolute, takeDirectory) import Text.HTML.TagSoup hiding (Row) import Text.Pandoc.Builder (Blocks, Inlines) import qualified Text.Pandoc.Builder as B @@ -1836,9 +1836,12 @@ regLink :: PandocMonad m -> MarkdownParser m (F Inlines) regLink constructor lab = try $ do (src, tit) <- source + rebase <- option False (True <$ guardEnabled Ext_rebase_relative_paths) + pos <- getPosition + let src' = if rebase then rebasePath pos src else src attr <- option nullAttr $ guardEnabled Ext_link_attributes >> attributes - return $ constructor attr src tit <$> lab + return $ constructor attr src' tit <$> lab -- a link like [this][ref] or [this][] or [this] referenceLink :: PandocMonad m @@ -1854,6 +1857,8 @@ referenceLink constructor (lab, raw) = do return (mempty, ""))) <|> try ((guardDisabled Ext_spaced_reference_links <|> spnl) >> reference) + rebase <- option False (True <$ guardEnabled Ext_rebase_relative_paths) + pos <- getPosition when (raw' == "") $ guardEnabled Ext_shortcut_reference_links let labIsRef = raw' == "" || raw' == "[]" let key = toKey $ if labIsRef then raw else raw' @@ -1878,7 +1883,9 @@ referenceLink constructor (lab, raw) = do Just ((src, tit), _) -> constructor nullAttr src tit <$> lab Nothing -> makeFallback else makeFallback - Just ((src,tit), attr) -> constructor attr src tit <$> lab + Just ((src,tit), attr) -> + let src' = if rebase then rebasePath pos src else src + in constructor attr src' tit <$> lab dropBrackets :: Text -> Text dropBrackets = dropRB . dropLB @@ -1911,15 +1918,30 @@ autoLink = try $ do return $ return $ B.linkWith attr (src <> escapeURI extra) "" (B.str $ orig <> extra) +-- | Rebase a relative path, by adding the (relative) directory +-- of the containing source position. Absolute links and URLs +-- are untouched. +rebasePath :: SourcePos -> Text -> Text +rebasePath pos path = do + let fp = sourceName pos + in if isAbsolute (T.unpack path) || isURI path + then path + else + case takeDirectory fp of + "" -> path + "." -> path + d -> T.pack d <> "/" <> path + image :: PandocMonad m => MarkdownParser m (F Inlines) image = try $ do char '!' (lab,raw) <- reference defaultExt <- getOption readerDefaultImageExtension - let constructor attr' src = case takeExtension (T.unpack src) of - "" -> B.imageWith attr' (T.pack $ addExtension (T.unpack src) - $ T.unpack defaultExt) - _ -> B.imageWith attr' src + let constructor attr' src = + case takeExtension (T.unpack src) of + "" -> B.imageWith attr' (T.pack $ addExtension (T.unpack src) + $ T.unpack defaultExt) + _ -> B.imageWith attr' src regLink constructor lab <|> referenceLink constructor (lab,raw) note :: PandocMonad m => MarkdownParser m (F Inlines) diff --git a/test/command/3752.md b/test/command/3752.md new file mode 100644 index 000000000..76d51989b --- /dev/null +++ b/test/command/3752.md @@ -0,0 +1,35 @@ +``` +% pandoc command/chap1/text.md command/chap2/text.md -f markdown+rebase_relative_paths --verbose -t docx | pandoc -f docx -t plain +^D +[INFO] Loaded command/chap1/spider.png from ./command/chap1/spider.png +[INFO] Loaded command/chap1/../../lalune.jpg from ./command/chap1/../../lalune.jpg +[INFO] Loaded command/chap2/spider.png from ./command/chap2/spider.png +Chapter one + +A spider: [spider] + +The moon: [moon] + +Link to spider picture. + +URL left alone: manual. + +Absolute path left alone: absolute. + +Chapter two + +A spider: [spider] +``` + +``` +% pandoc command/chap1/text.md command/chap2/text.md -f markdown+rebase_relative_paths -t html +^D +<h1 id="chapter-one">Chapter one</h1> +<p>A spider: <img src="command/chap1/spider.png" alt="spider" /></p> +<p>The moon: <img src="command/chap1/../../lalune.jpg" alt="moon" /></p> +<p>Link to <a href="command/chap1/spider.png">spider picture</a>.</p> +<p>URL left alone: <a href="https://pandoc.org/MANUAL.html">manual</a>.</p> +<p>Absolute path left alone: <a href="/foo/bar/baz.png">absolute</a>.</p> +<h1 id="chapter-two">Chapter two</h1> +<p>A spider: <img src="command/chap2/spider.png" alt="spider" /></p> +``` diff --git a/test/command/chap1/spider.png b/test/command/chap1/spider.png new file mode 100644 index 000000000..7ee9fe339 Binary files /dev/null and b/test/command/chap1/spider.png differ diff --git a/test/command/chap1/text.md b/test/command/chap1/text.md new file mode 100644 index 000000000..88b30313d --- /dev/null +++ b/test/command/chap1/text.md @@ -0,0 +1,11 @@ +# Chapter one + +A spider:  + +The moon:  + +Link to [spider picture](spider.png). + +URL left alone: [manual](https://pandoc.org/MANUAL.html). + +Absolute path left alone: [absolute](/foo/bar/baz.png). diff --git a/test/command/chap2/spider.png b/test/command/chap2/spider.png new file mode 100644 index 000000000..5377e940b Binary files /dev/null and b/test/command/chap2/spider.png differ diff --git a/test/command/chap2/text.md b/test/command/chap2/text.md new file mode 100644 index 000000000..435a266d7 --- /dev/null +++ b/test/command/chap2/text.md @@ -0,0 +1,3 @@ +# Chapter two + +A spider:  -- cgit v1.2.3 From cbe16b2866abd8d0e4c15f027562b4b2bed3f01e Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 27 May 2021 10:49:45 -0700 Subject: Citeproc: Don't detect math elements as locators. Closes #7321. --- MANUAL.txt | 2 +- src/Text/Pandoc/Citeproc/Locator.hs | 7 +++++++ test/command/7321.md | 24 ++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 test/command/7321.md (limited to 'src') diff --git a/MANUAL.txt b/MANUAL.txt index 48bf60d6e..2ca50e44a 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -5015,7 +5015,7 @@ the suffix as locator by prepending curly braces: [@smith{ii, A, D-Z}, with a suffix] [@smith, {pp. iv, vi-xi, (xv)-(xvii)} with suffix here] - [@smith{}, $a^2$ and following] + [@smith{}, 99 years later] A minus sign (`-`) before the `@` will suppress mention of the author in the citation. This can be useful when the diff --git a/src/Text/Pandoc/Citeproc/Locator.hs b/src/Text/Pandoc/Citeproc/Locator.hs index 44416ca12..dbedc08d9 100644 --- a/src/Text/Pandoc/Citeproc/Locator.hs +++ b/src/Text/Pandoc/Citeproc/Locator.hs @@ -181,6 +181,7 @@ pPageUnit = roman <|> plainUnit plainUnit = do ts <- many1 (notFollowedBy pSpace >> notFollowedBy pLocatorPunct >> + notFollowedBy pMath >> anyToken) let s = stringify ts -- otherwise look for actual digits or -s @@ -211,6 +212,12 @@ pMatchChar msg f = satisfyTok f' <?> msg pSpace :: LocatorParser Inline pSpace = satisfyTok (\t -> isSpacey t || t == Str "\160") <?> "space" +pMath :: LocatorParser Inline +pMath = satisfyTok isMath + where + isMath (Math{}) = True + isMath _ = False + satisfyTok :: (Inline -> Bool) -> LocatorParser Inline satisfyTok f = tokenPrim show (\sp _ _ -> sp) (\tok -> if f tok then Just tok diff --git a/test/command/7321.md b/test/command/7321.md new file mode 100644 index 000000000..f5e644bee --- /dev/null +++ b/test/command/7321.md @@ -0,0 +1,24 @@ +``` +% pandoc -t plain --citeproc --wrap=none +--- +references: +- id: fenner2012a + title: One-click science marketing + author: + - family: Fenner + given: Martin + container-title: Nature Materials + volume: 11 + issue: 4 + publisher: Nature Publishing Group + type: article-journal + issued: + year: 2012 +--- + +[@fenner2012a, $a^2+b^2=c^2$] +^D +(Fenner 2012, a² + b² = c²) + +Fenner, Martin. 2012. “One-Click Science Marketing.” Nature Materials 11 (4). +``` -- cgit v1.2.3 From 6972a7dc9158c1d56b2dc9a06f596386f6b30860 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 27 May 2021 11:26:38 -0700 Subject: Modify rebase_reference_links treatment of reference links/images. The directory is based on the file containing the link reference, not the file containing the link, if these differ. --- MANUAL.txt | 6 +++++- src/Text/Pandoc/Readers/Markdown.hs | 9 ++++----- test/command/3752.md | 5 ++++- test/command/chap1/text.md | 2 ++ test/command/chap2/text.md | 2 ++ 5 files changed, 17 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/MANUAL.txt b/MANUAL.txt index 2ca50e44a..d755c0c00 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -5080,7 +5080,11 @@ Without this extension, you would have to use `` in `chap1/text.md` and `` in `chap2/text.md`. Links with relative paths will be rewritten in the same way as images. -*This option currently only affects Markdown input.* + +Note that relative paths in reference links and images will +be rewritten relative to the file containing the link +reference definition, not the file containing the reference link +or image itself, if these differ. #### Extension: `attributes` #### diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 968c6c165..e5cbadc94 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -336,7 +336,9 @@ referenceKey = try $ do notFollowedBy' (() <$ reference) many1Char $ notFollowedBy space >> litChar let betweenAngles = try $ char '<' >> manyTillChar litChar (char '>') - src <- try betweenAngles <|> sourceURL + rebase <- option False (True <$ guardEnabled Ext_rebase_relative_paths) + src <- (if rebase then rebasePath pos else id) <$> + (try betweenAngles <|> sourceURL) tit <- option "" referenceTitle attr <- option nullAttr $ try $ do guardEnabled Ext_link_attributes @@ -1857,8 +1859,6 @@ referenceLink constructor (lab, raw) = do return (mempty, ""))) <|> try ((guardDisabled Ext_spaced_reference_links <|> spnl) >> reference) - rebase <- option False (True <$ guardEnabled Ext_rebase_relative_paths) - pos <- getPosition when (raw' == "") $ guardEnabled Ext_shortcut_reference_links let labIsRef = raw' == "" || raw' == "[]" let key = toKey $ if labIsRef then raw else raw' @@ -1884,8 +1884,7 @@ referenceLink constructor (lab, raw) = do Nothing -> makeFallback else makeFallback Just ((src,tit), attr) -> - let src' = if rebase then rebasePath pos src else src - in constructor attr src' tit <$> lab + constructor attr src tit <$> lab dropBrackets :: Text -> Text dropBrackets = dropRB . dropLB diff --git a/test/command/3752.md b/test/command/3752.md index 76d51989b..6d31eb450 100644 --- a/test/command/3752.md +++ b/test/command/3752.md @@ -2,12 +2,14 @@ % pandoc command/chap1/text.md command/chap2/text.md -f markdown+rebase_relative_paths --verbose -t docx | pandoc -f docx -t plain ^D [INFO] Loaded command/chap1/spider.png from ./command/chap1/spider.png -[INFO] Loaded command/chap1/../../lalune.jpg from ./command/chap1/../../lalune.jpg [INFO] Loaded command/chap2/spider.png from ./command/chap2/spider.png +[INFO] Loaded command/chap1/../../lalune.jpg from ./command/chap1/../../lalune.jpg Chapter one A spider: [spider] +Another spider: [another spider] + The moon: [moon] Link to spider picture. @@ -26,6 +28,7 @@ A spider: [spider] ^D <h1 id="chapter-one">Chapter one</h1> <p>A spider: <img src="command/chap1/spider.png" alt="spider" /></p> +<p>Another spider: <img src="command/chap2/spider.png" alt="another spider" /></p> <p>The moon: <img src="command/chap1/../../lalune.jpg" alt="moon" /></p> <p>Link to <a href="command/chap1/spider.png">spider picture</a>.</p> <p>URL left alone: <a href="https://pandoc.org/MANUAL.html">manual</a>.</p> diff --git a/test/command/chap1/text.md b/test/command/chap1/text.md index 88b30313d..d25514241 100644 --- a/test/command/chap1/text.md +++ b/test/command/chap1/text.md @@ -2,6 +2,8 @@ A spider:  +Another spider: ![another spider][refspider] + The moon:  Link to [spider picture](spider.png). diff --git a/test/command/chap2/text.md b/test/command/chap2/text.md index 435a266d7..082a1d79e 100644 --- a/test/command/chap2/text.md +++ b/test/command/chap2/text.md @@ -1,3 +1,5 @@ # Chapter two A spider:  + +[refspider]: spider.png -- cgit v1.2.3 From 0661ce699fe83c64dd5a5874e8ab17241b19550c Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 27 May 2021 13:53:26 -0700 Subject: rebase_relative_paths extension: don't change fragment paths. We don't want a pure fragment path to be rewritten, since these are used for cross-referencing. --- MANUAL.txt | 3 +++ src/Text/Pandoc/Readers/Markdown.hs | 3 ++- test/command/3752.md | 3 +++ test/command/chap1/text.md | 2 ++ 4 files changed, 10 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/MANUAL.txt b/MANUAL.txt index d755c0c00..fb1a76e9a 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -5081,6 +5081,9 @@ Without this extension, you would have to use `` in `chap2/text.md`. Links with relative paths will be rewritten in the same way as images. +Absolute paths and URLs are not changed. Neither are paths +consisting entirely of a fragment, e.g. `[section one](#foo)`. + Note that relative paths in reference links and images will be rewritten relative to the file containing the link reference definition, not the file containing the reference link diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index e5cbadc94..74f2668e4 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1923,7 +1923,8 @@ autoLink = try $ do rebasePath :: SourcePos -> Text -> Text rebasePath pos path = do let fp = sourceName pos - in if isAbsolute (T.unpack path) || isURI path + isFragment = T.take 1 path == "#" + in if isFragment || isAbsolute (T.unpack path) || isURI path then path else case takeDirectory fp of diff --git a/test/command/3752.md b/test/command/3752.md index 6d31eb450..2765d79ac 100644 --- a/test/command/3752.md +++ b/test/command/3752.md @@ -18,6 +18,8 @@ URL left alone: manual. Absolute path left alone: absolute. +Link to fragment: chapter two. + Chapter two A spider: [spider] @@ -33,6 +35,7 @@ A spider: [spider] <p>Link to <a href="command/chap1/spider.png">spider picture</a>.</p> <p>URL left alone: <a href="https://pandoc.org/MANUAL.html">manual</a>.</p> <p>Absolute path left alone: <a href="/foo/bar/baz.png">absolute</a>.</p> +<p>Link to fragment: <a href="#chapter-two">chapter two</a>.</p> <h1 id="chapter-two">Chapter two</h1> <p>A spider: <img src="command/chap2/spider.png" alt="spider" /></p> ``` diff --git a/test/command/chap1/text.md b/test/command/chap1/text.md index d25514241..68a317161 100644 --- a/test/command/chap1/text.md +++ b/test/command/chap1/text.md @@ -11,3 +11,5 @@ Link to [spider picture](spider.png). URL left alone: [manual](https://pandoc.org/MANUAL.html). Absolute path left alone: [absolute](/foo/bar/baz.png). + +Link to fragment: [chapter two](#chapter-two). -- cgit v1.2.3 From 4b16d181e7219ed161a0e03c0c5ee9dec4b526b4 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 27 May 2021 14:16:37 -0700 Subject: rebase_relative_paths: leave empty paths unchanged. --- MANUAL.txt | 4 ++-- src/Text/Pandoc/Readers/Markdown.hs | 2 +- test/command/3752.md | 3 +++ test/command/chap1/text.md | 2 ++ 4 files changed, 8 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/MANUAL.txt b/MANUAL.txt index fb1a76e9a..c234debc9 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -5081,8 +5081,8 @@ Without this extension, you would have to use `` in `chap2/text.md`. Links with relative paths will be rewritten in the same way as images. -Absolute paths and URLs are not changed. Neither are paths -consisting entirely of a fragment, e.g. `[section one](#foo)`. +Absolute paths and URLs are not changed. Neither are empty +paths or paths consisting entirely of a fragment, e.g., `#foo`. Note that relative paths in reference links and images will be rewritten relative to the file containing the link diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 74f2668e4..bc5e3e30f 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1924,7 +1924,7 @@ rebasePath :: SourcePos -> Text -> Text rebasePath pos path = do let fp = sourceName pos isFragment = T.take 1 path == "#" - in if isFragment || isAbsolute (T.unpack path) || isURI path + in if T.null path || isFragment || isAbsolute (T.unpack path) || isURI path then path else case takeDirectory fp of diff --git a/test/command/3752.md b/test/command/3752.md index 2765d79ac..df8af0ba1 100644 --- a/test/command/3752.md +++ b/test/command/3752.md @@ -20,6 +20,8 @@ Absolute path left alone: absolute. Link to fragment: chapter two. +Empty path: empty. + Chapter two A spider: [spider] @@ -36,6 +38,7 @@ A spider: [spider] <p>URL left alone: <a href="https://pandoc.org/MANUAL.html">manual</a>.</p> <p>Absolute path left alone: <a href="/foo/bar/baz.png">absolute</a>.</p> <p>Link to fragment: <a href="#chapter-two">chapter two</a>.</p> +<p>Empty path: <a href="">empty</a>.</p> <h1 id="chapter-two">Chapter two</h1> <p>A spider: <img src="command/chap2/spider.png" alt="spider" /></p> ``` diff --git a/test/command/chap1/text.md b/test/command/chap1/text.md index 68a317161..54f0104de 100644 --- a/test/command/chap1/text.md +++ b/test/command/chap1/text.md @@ -13,3 +13,5 @@ URL left alone: [manual](https://pandoc.org/MANUAL.html). Absolute path left alone: [absolute](/foo/bar/baz.png). Link to fragment: [chapter two](#chapter-two). + +Empty path: [empty](). -- cgit v1.2.3 From 4842c5fb828c3c34d816fa7bccd4656857742a0b Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 27 May 2021 18:28:52 -0700 Subject: Two citeproc locator/suffix improvements: - Recognize locators spelled with a capital letter. Closes #7323. - Add a comma and a space in front of the suffix if it doesn't start with space or punctuation. Closes #7324. --- src/Text/Pandoc/Citeproc/Locator.hs | 14 +++++++++++--- test/command/7323.md | 29 +++++++++++++++++++++++++++++ test/command/7324.md | 25 +++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 3 deletions(-) create mode 100644 test/command/7323.md create mode 100644 test/command/7324.md (limited to 'src') diff --git a/src/Text/Pandoc/Citeproc/Locator.hs b/src/Text/Pandoc/Citeproc/Locator.hs index dbedc08d9..f8931d7b5 100644 --- a/src/Text/Pandoc/Citeproc/Locator.hs +++ b/src/Text/Pandoc/Citeproc/Locator.hs @@ -20,7 +20,7 @@ parseLocator :: Locale -> [Inline] -> (Maybe (Text, Text), [Inline]) parseLocator locale inp = case parse (pLocatorWords (toLocatorMap locale)) "suffix" $ splitInp inp of Right r -> r - Left _ -> (Nothing, inp) + Left _ -> (Nothing, maybeAddComma inp) splitInp :: [Inline] -> [Inline] splitInp = splitStrWhen (\c -> isSpace c || (isPunctuation c && c /= ':')) @@ -42,9 +42,17 @@ pLocatorWords locMap = do -- i.e. the first one will be " 9" return $ if T.null la && T.null lo - then (Nothing, s) + then (Nothing, maybeAddComma s) else (Just (la, T.strip lo), s) +maybeAddComma :: [Inline] -> [Inline] +maybeAddComma [] = [] +maybeAddComma ils@(Space : _) = ils +maybeAddComma ils@(Str t : _) + | Just (c, _) <- T.uncons t + , isPunctuation c = ils +maybeAddComma ils = Str "," : Space : ils + pLocatorDelimited :: LocatorMap -> LocatorParser (Text, Text) pLocatorDelimited locMap = try $ do _ <- pMatchChar "{" (== '{') @@ -97,7 +105,7 @@ pLocatorLabel' locMap lim = go "" t <- anyToken ts <- manyTill anyToken (try $ lookAhead lim) let s = acc <> stringify (t:ts) - case M.lookup (T.strip s) locMap of + case M.lookup (T.toCaseFold $ T.strip s) locMap of -- try to find a longer one, or return this one Just l -> go s <|> return (l, False) Nothing -> go s diff --git a/test/command/7323.md b/test/command/7323.md new file mode 100644 index 000000000..7de29cfe2 --- /dev/null +++ b/test/command/7323.md @@ -0,0 +1,29 @@ +``` +% pandoc --citeproc -t plain +--- +references: +- id: smith + author: John Smith + issued: 2019 + title: Insects + type: book +... + +@smith [chap. 6] + +@smith [chapter 6] + +@smith [Chap. 6] + +@smith [Chapter 6] +^D +John Smith (2019, chap. 6) + +John Smith (2019, chap. 6) + +John Smith (2019, chap. 6) + +John Smith (2019, chap. 6) + +John Smith. 2019. Insects. +``` diff --git a/test/command/7324.md b/test/command/7324.md new file mode 100644 index 000000000..fae1b9923 --- /dev/null +++ b/test/command/7324.md @@ -0,0 +1,25 @@ +``` +% pandoc --citeproc -t plain +--- +references: +- id: smith + author: John Smith + issued: 2019 + title: Insects + type: book +... + +@smith [, among others] + +@smith [ among others] + +@smith [among others] +^D +John Smith (2019, among others) + +John Smith (2019 among others) + +John Smith (2019, among others) + +John Smith. 2019. Insects. +``` -- cgit v1.2.3 From 44484d0dee1bd095240b9faf26f8d1dad8e560ea Mon Sep 17 00:00:00 2001 From: Emily Bourke <undergroundquizscene@gmail.com> Date: Sun, 11 Apr 2021 21:42:53 +0100 Subject: Docx reader: Read table column widths. --- src/Text/Pandoc/Readers/Docx.hs | 5 +- src/Text/Pandoc/Readers/Docx/Parse.hs | 2 +- test/Tests/Writers/Docx.hs | 5 ++ test/docx/0_level_headers.native | 4 +- test/docx/golden/table_one_row.docx | Bin 9840 -> 9840 bytes test/docx/golden/table_with_list_cell.docx | Bin 10159 -> 10162 bytes test/docx/golden/tables-default-widths.docx | Bin 0 -> 10200 bytes test/docx/golden/tables.docx | Bin 10200 -> 10202 bytes test/docx/sdt_elements.native | 6 +- test/docx/table_one_row.native | 8 +-- test/docx/table_variable_width.native | 12 ++-- test/docx/table_with_list_cell.native | 6 +- test/docx/tables-default-widths.native | 92 ++++++++++++++++++++++++++++ test/docx/tables.native | 18 +++--- 14 files changed, 128 insertions(+), 30 deletions(-) create mode 100644 test/docx/golden/tables-default-widths.docx create mode 100644 test/docx/tables-default-widths.native (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 22dd54193..375bb7338 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -639,7 +639,7 @@ bodyPartToBlocks (ListItem pPr _ _ _ parparts) = bodyPartToBlocks $ Paragraph pPr' parparts bodyPartToBlocks (Tbl _ _ _ []) = return $ para mempty -bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do +bodyPartToBlocks (Tbl cap grid look parts@(r:rs)) = do let cap' = simpleCaption $ plain $ text cap (hdr, rows) = case firstRowFormatting look of True | null rs -> (Nothing, [r]) @@ -669,7 +669,8 @@ bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do -- so should be possible. Alignment might be more difficult, -- since there doesn't seem to be a column entity in docx. let alignments = replicate width AlignDefault - widths = replicate width ColWidthDefault + totalWidth = sum grid + widths = (\w -> ColWidth (fromInteger w / fromInteger totalWidth)) <$> grid return $ table cap' (zip alignments widths) diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 7325ff300..978d6ff3a 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -563,7 +563,7 @@ elemToTblGrid :: NameSpaces -> Element -> D TblGrid elemToTblGrid ns element | isElem ns "w" "tblGrid" element = let cols = findChildrenByName ns "w" "gridCol" element in - mapD (\e -> maybeToD (findAttrByName ns "w" "val" e >>= stringToInteger)) + mapD (\e -> maybeToD (findAttrByName ns "w" "w" e >>= stringToInteger)) cols elemToTblGrid _ _ = throwError WrongElem diff --git a/test/Tests/Writers/Docx.hs b/test/Tests/Writers/Docx.hs index 2e0f1e3fb..da25b95e0 100644 --- a/test/Tests/Writers/Docx.hs +++ b/test/Tests/Writers/Docx.hs @@ -111,6 +111,11 @@ tests = [ testGroup "inlines" def "docx/tables.native" "docx/golden/tables.docx" + , docxTest + "tables without explicit column widths" + def + "docx/tables-default-widths.native" + "docx/golden/tables-default-widths.docx" , docxTest "tables with lists in cells" def diff --git a/test/docx/0_level_headers.native b/test/docx/0_level_headers.native index 7f875891e..ed589b029 100644 --- a/test/docx/0_level_headers.native +++ b/test/docx/0_level_headers.native @@ -1,6 +1,6 @@ [Table ("",[],[]) (Caption Nothing []) - [(AlignDefault,ColWidthDefault)] + [(AlignDefault,ColWidth 1.0)] (TableHead ("",[],[]) []) [(TableBody ("",[],[]) (RowHeadColumns 0) @@ -49,4 +49,4 @@ ,Para [Strong [Str "Table",Space,Str "Page"]] ,Para [Strong [Str "No",Space,Str "table",Space,Str "of",Space,Str "figures",Space,Str "entries",Space,Str "found."]] ,Header 1 ("introduction",[],[]) [Str "Introduction"] -,Para [Str "Nothing",Space,Str "to",Space,Str "introduce,",Space,Str "yet."]] \ No newline at end of file +,Para [Str "Nothing",Space,Str "to",Space,Str "introduce,",Space,Str "yet."]] diff --git a/test/docx/golden/table_one_row.docx b/test/docx/golden/table_one_row.docx index f75e567ab..a7a8f2519 100644 Binary files a/test/docx/golden/table_one_row.docx and b/test/docx/golden/table_one_row.docx differ diff --git a/test/docx/golden/table_with_list_cell.docx b/test/docx/golden/table_with_list_cell.docx index a49f70643..1362d4609 100644 Binary files a/test/docx/golden/table_with_list_cell.docx and b/test/docx/golden/table_with_list_cell.docx differ diff --git a/test/docx/golden/tables-default-widths.docx b/test/docx/golden/tables-default-widths.docx new file mode 100644 index 000000000..f24e27516 Binary files /dev/null and b/test/docx/golden/tables-default-widths.docx differ diff --git a/test/docx/golden/tables.docx b/test/docx/golden/tables.docx index f24e27516..9dcbbc9d0 100644 Binary files a/test/docx/golden/tables.docx and b/test/docx/golden/tables.docx differ diff --git a/test/docx/sdt_elements.native b/test/docx/sdt_elements.native index dca82f0a0..a072c0d39 100644 --- a/test/docx/sdt_elements.native +++ b/test/docx/sdt_elements.native @@ -1,8 +1,8 @@ [Table ("",[],[]) (Caption Nothing []) - [(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault)] + [(AlignDefault,ColWidth 0.22069570301081556) + ,(AlignDefault,ColWidth 0.22069570301081556) + ,(AlignDefault,ColWidth 0.5586085939783689)] (TableHead ("",[],[]) []) [(TableBody ("",[],[]) (RowHeadColumns 0) diff --git a/test/docx/table_one_row.native b/test/docx/table_one_row.native index e9188b145..88d5e3af5 100644 --- a/test/docx/table_one_row.native +++ b/test/docx/table_one_row.native @@ -1,8 +1,8 @@ [Table ("",[],[]) (Caption Nothing []) - [(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault)] + [(AlignDefault,ColWidth 0.3333333333333333) + ,(AlignDefault,ColWidth 0.3333333333333333) + ,(AlignDefault,ColWidth 0.3333333333333333)] (TableHead ("",[],[]) []) [(TableBody ("",[],[]) (RowHeadColumns 0) @@ -15,4 +15,4 @@ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) [Plain [Str "Table"]]]])] (TableFoot ("",[],[]) - [])] \ No newline at end of file + [])] diff --git a/test/docx/table_variable_width.native b/test/docx/table_variable_width.native index 229cb83b1..43ac40cca 100644 --- a/test/docx/table_variable_width.native +++ b/test/docx/table_variable_width.native @@ -1,10 +1,10 @@ [Table ("",[],[]) (Caption Nothing []) - [(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault)] + [(AlignDefault,ColWidth 2.0096205237840725e-2) + ,(AlignDefault,ColWidth 1.9882415820416888e-2) + ,(AlignDefault,ColWidth 0.22202030999465527) + ,(AlignDefault,ColWidth 0.4761090326028862) + ,(AlignDefault,ColWidth 1.0689470871191876e-4)] (TableHead ("",[],[]) [Row ("",[],[]) [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) @@ -42,4 +42,4 @@ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) []]])] (TableFoot ("",[],[]) - [])] \ No newline at end of file + [])] diff --git a/test/docx/table_with_list_cell.native b/test/docx/table_with_list_cell.native index 06d8606da..51a35184b 100644 --- a/test/docx/table_with_list_cell.native +++ b/test/docx/table_with_list_cell.native @@ -1,7 +1,7 @@ [Table ("",[],[]) (Caption Nothing []) - [(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault)] + [(AlignDefault,ColWidth 0.5) + ,(AlignDefault,ColWidth 0.5)] (TableHead ("",[],[]) [Row ("",[],[]) [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) @@ -22,4 +22,4 @@ ,[Para [Str "A"]] ,[Para [Str "Numbered",Space,Str "list."]]]]]])] (TableFoot ("",[],[]) - [])] \ No newline at end of file + [])] diff --git a/test/docx/tables-default-widths.native b/test/docx/tables-default-widths.native new file mode 100644 index 000000000..e541e5a6e --- /dev/null +++ b/test/docx/tables-default-widths.native @@ -0,0 +1,92 @@ +[Header 2 ("a-table-with-and-without-a-header-row",[],[]) [Str "A",Space,Str "table,",Space,Str "with",Space,Str "and",Space,Str "without",Space,Str "a",Space,Str "header",Space,Str "row"] +,Table ("",[],[]) (Caption Nothing + []) + [(AlignDefault,ColWidthDefault) + ,(AlignDefault,ColWidthDefault) + ,(AlignDefault,ColWidthDefault) + ,(AlignDefault,ColWidthDefault)] + (TableHead ("",[],[]) + [Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Name"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Game"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Fame"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Blame"]]]]) + [(TableBody ("",[],[]) (RowHeadColumns 0) + [] + [Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Lebron",Space,Str "James"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Basketball"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Very",Space,Str "High"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Leaving",Space,Str "Cleveland"]]] + ,Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Ryan",Space,Str "Braun"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Baseball"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Moderate"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Steroids"]]] + ,Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Russell",Space,Str "Wilson"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Football"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "High"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Tacky",Space,Str "uniform"]]]])] + (TableFoot ("",[],[]) + []) +,Table ("",[],[]) (Caption Nothing + []) + [(AlignDefault,ColWidthDefault) + ,(AlignDefault,ColWidthDefault)] + (TableHead ("",[],[]) + []) + [(TableBody ("",[],[]) (RowHeadColumns 0) + [] + [Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Sinple"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Table"]]] + ,Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Without"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Header"]]]])] + (TableFoot ("",[],[]) + []) +,Table ("",[],[]) (Caption Nothing + []) + [(AlignDefault,ColWidthDefault) + ,(AlignDefault,ColWidthDefault)] + (TableHead ("",[],[]) + []) + [(TableBody ("",[],[]) (RowHeadColumns 0) + [] + [Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Para [Str "Simple"] + ,Para [Str "Multiparagraph"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Para [Str "Table"] + ,Para [Str "Full"]]] + ,Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Para [Str "Of"] + ,Para [Str "Paragraphs"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Para [Str "In",Space,Str "each"] + ,Para [Str "Cell."]]]])] + (TableFoot ("",[],[]) + [])] \ No newline at end of file diff --git a/test/docx/tables.native b/test/docx/tables.native index e541e5a6e..5a89496be 100644 --- a/test/docx/tables.native +++ b/test/docx/tables.native @@ -1,10 +1,10 @@ [Header 2 ("a-table-with-and-without-a-header-row",[],[]) [Str "A",Space,Str "table,",Space,Str "with",Space,Str "and",Space,Str "without",Space,Str "a",Space,Str "header",Space,Str "row"] ,Table ("",[],[]) (Caption Nothing []) - [(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault)] + [(AlignDefault,ColWidth 0.25) + ,(AlignDefault,ColWidth 0.25) + ,(AlignDefault,ColWidth 0.25) + ,(AlignDefault,ColWidth 0.25)] (TableHead ("",[],[]) [Row ("",[],[]) [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) @@ -48,8 +48,8 @@ []) ,Table ("",[],[]) (Caption Nothing []) - [(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault)] + [(AlignDefault,ColWidth 0.5) + ,(AlignDefault,ColWidth 0.5)] (TableHead ("",[],[]) []) [(TableBody ("",[],[]) (RowHeadColumns 0) @@ -68,8 +68,8 @@ []) ,Table ("",[],[]) (Caption Nothing []) - [(AlignDefault,ColWidthDefault) - ,(AlignDefault,ColWidthDefault)] + [(AlignDefault,ColWidth 0.5) + ,(AlignDefault,ColWidth 0.5)] (TableHead ("",[],[]) []) [(TableBody ("",[],[]) (RowHeadColumns 0) @@ -89,4 +89,4 @@ [Para [Str "In",Space,Str "each"] ,Para [Str "Cell."]]]])] (TableFoot ("",[],[]) - [])] \ No newline at end of file + [])] -- cgit v1.2.3 From 56b211120c62a01f8aba1c4512acfe4677d8c7d0 Mon Sep 17 00:00:00 2001 From: Emily Bourke <undergroundquizscene@protonmail.com> Date: Thu, 18 Jun 2020 09:53:32 +0100 Subject: Docx reader: Support new table features. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Column spans * Row spans - The spec says that if the `val` attribute is ommitted, its value should be assumed to be `continue`, and that its values are restricted to {`restart`, `continue`}. If the value has any other value, I think it seems reasonable to default it to `continue`. It might cause problems if the spec is extended in the future by adding a third possible value, in which case this would probably give incorrect behaviour, and wouldn't error. * Allow multiple header rows * Include table description in simple caption - The table description element is like alt text for a table (along with the table caption element). It seems like we should include this somewhere, but I’m not 100% sure how – I’m pairing it with the simple caption for the moment. (Should it maybe go in the block caption instead?) * Detect table captions - Check for caption paragraph style /and/ either the simple or complex table field. This means the caption detection fails for captions which don’t contain a field, as in an example doc I added as a test. However, I think it’s better to be too conservative: a missed table caption will still show up as a paragraph next to the table, whereas if I incorrectly classify something else as a table caption it could cause havoc by pairing it up with a table it’s not at all related to, or dropping it entirely. * Update tests and add new ones Partially fixes: #6316 --- src/Text/Pandoc/Readers/Docx.hs | 92 ++++++++------ src/Text/Pandoc/Readers/Docx/Parse.hs | 113 +++++++++++++++-- src/Text/Pandoc/Readers/Docx/Util.hs | 7 ++ test/Tests/Readers/Docx.hs | 16 +++ test/docx/sdt_elements.native | 13 +- test/docx/table_captions_no_field.docx | Bin 0 -> 40482 bytes test/docx/table_captions_no_field.native | 34 ++++++ test/docx/table_captions_with_field.docx | Bin 0 -> 41091 bytes test/docx/table_captions_with_field.native | 54 +++++++++ test/docx/table_header_rowspan.docx | Bin 0 -> 15826 bytes test/docx/table_header_rowspan.native | 189 +++++++++++++++++++++++++++++ test/docx/table_one_header_row.docx | Bin 0 -> 12185 bytes test/docx/table_one_header_row.native | 18 +++ test/docx/table_one_row.docx | Bin 25251 -> 12148 bytes test/docx/table_variable_width.native | 19 ++- 15 files changed, 487 insertions(+), 68 deletions(-) create mode 100644 test/docx/table_captions_no_field.docx create mode 100644 test/docx/table_captions_no_field.native create mode 100644 test/docx/table_captions_with_field.docx create mode 100644 test/docx/table_captions_with_field.native create mode 100644 test/docx/table_header_rowspan.docx create mode 100644 test/docx/table_header_rowspan.native create mode 100644 test/docx/table_one_header_row.docx create mode 100644 test/docx/table_one_header_row.native (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 375bb7338..c06adf7e3 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -61,13 +61,14 @@ module Text.Pandoc.Readers.Docx import Codec.Archive.Zip import Control.Monad.Reader import Control.Monad.State.Strict +import Data.Bifunctor (bimap, first) import qualified Data.ByteString.Lazy as B import Data.Default (Default) -import Data.List (delete, intersect) +import Data.List (delete, intersect, foldl') import Data.Char (isSpace) import qualified Data.Map as M import qualified Data.Text as T -import Data.Maybe (isJust, fromMaybe) +import Data.Maybe (catMaybes, isJust, fromMaybe) import Data.Sequence (ViewL (..), viewl) import qualified Data.Sequence as Seq import qualified Data.Set as Set @@ -113,6 +114,7 @@ data DState = DState { docxAnchorMap :: M.Map T.Text T.Text -- restarting , docxListState :: M.Map (T.Text, T.Text) Integer , docxPrevPara :: Inlines + , docxTableCaptions :: [Blocks] } instance Default DState where @@ -123,6 +125,7 @@ instance Default DState where , docxDropCap = mempty , docxListState = M.empty , docxPrevPara = mempty + , docxTableCaptions = [] } data DEnv = DEnv { docxOptions :: ReaderOptions @@ -491,15 +494,32 @@ singleParaToPlain blks singleton $ Plain ils singleParaToPlain blks = blks -cellToBlocks :: PandocMonad m => Docx.Cell -> DocxContext m Blocks -cellToBlocks (Docx.Cell bps) = do +cellToCell :: PandocMonad m => RowSpan -> Docx.Cell -> DocxContext m Pandoc.Cell +cellToCell rowSpan (Docx.Cell gridSpan _ bps) = do blks <- smushBlocks <$> mapM bodyPartToBlocks bps - return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks + let blks' = singleParaToPlain $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks + return (cell AlignDefault rowSpan (ColSpan (fromIntegral gridSpan)) blks') + +rowsToRows :: PandocMonad m => [Docx.Row] -> DocxContext m [Pandoc.Row] +rowsToRows rows = do + let rowspans = (fmap . fmap) (first RowSpan) (Docx.rowsToRowspans rows) + cells <- traverse (traverse (uncurry cellToCell)) rowspans + return (fmap (Pandoc.Row nullAttr) cells) + +splitHeaderRows :: Bool -> [Docx.Row] -> ([Docx.Row], [Docx.Row]) +splitHeaderRows hasFirstRowFormatting rs = bimap reverse reverse $ fst + $ if hasFirstRowFormatting + then foldl' f ((take 1 rs, []), True) (drop 1 rs) + else foldl' f (([], []), False) rs + where + f ((headerRows, bodyRows), previousRowWasHeader) r@(Docx.Row h cs) + | h == HasTblHeader || (previousRowWasHeader && any isContinuationCell cs) + = ((r : headerRows, bodyRows), True) + | otherwise + = ((headerRows, r : bodyRows), False) + + isContinuationCell (Docx.Cell _ vm _) = vm == Docx.Continue -rowToBlocksList :: PandocMonad m => Docx.Row -> DocxContext m [Blocks] -rowToBlocksList (Docx.Row cells) = do - blksList <- mapM cellToBlocks cells - return $ map singleParaToPlain blksList -- like trimInlines, but also take out linebreaks trimSps :: Inlines -> Inlines @@ -546,6 +566,11 @@ normalizeToClassName = T.map go . fromStyleName where go c | isSpace c = '-' | otherwise = c +bodyPartToTableCaption :: PandocMonad m => BodyPart -> DocxContext m (Maybe Blocks) +bodyPartToTableCaption (TblCaption pPr parparts) = + Just <$> bodyPartToBlocks (Paragraph pPr parparts) +bodyPartToTableCaption _ = pure Nothing + bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks bodyPartToBlocks (Paragraph pPr parparts) | Just True <- pBidi pPr = do @@ -637,50 +662,43 @@ bodyPartToBlocks (ListItem pPr _ _ _ parparts) = let pPr' = pPr {pStyle = constructBogusParStyleData "list-paragraph": pStyle pPr} in bodyPartToBlocks $ Paragraph pPr' parparts +bodyPartToBlocks (TblCaption _ _) = + return $ para mempty -- collected separately bodyPartToBlocks (Tbl _ _ _ []) = return $ para mempty -bodyPartToBlocks (Tbl cap grid look parts@(r:rs)) = do - let cap' = simpleCaption $ plain $ text cap - (hdr, rows) = case firstRowFormatting look of - True | null rs -> (Nothing, [r]) - | otherwise -> (Just r, rs) - False -> (Nothing, r:rs) - - cells <- mapM rowToBlocksList rows +bodyPartToBlocks (Tbl cap grid look parts) = do + captions <- gets docxTableCaptions + fullCaption <- case captions of + c : cs -> do + modify (\s -> s { docxTableCaptions = cs }) + return c + [] -> return $ if T.null cap then mempty else plain (text cap) + let shortCaption = if T.null cap then Nothing else Just (toList (text cap)) + cap' = caption shortCaption fullCaption + (hdr, rows) = splitHeaderRows (firstRowFormatting look) parts let width = maybe 0 maximum $ nonEmpty $ map rowLength parts rowLength :: Docx.Row -> Int - rowLength (Docx.Row c) = length c + rowLength (Docx.Row _ c) = sum (fmap (\(Docx.Cell gridSpan _ _) -> fromIntegral gridSpan) c) - let toRow = Pandoc.Row nullAttr . map simpleCell - toHeaderRow l = [toRow l | not (null l)] + headerCells <- rowsToRows hdr + bodyCells <- rowsToRows rows - -- pad cells. New Text.Pandoc.Builder will do that for us, - -- so this is for compatibility while we switch over. - let cells' = map (\row -> toRow $ take width (row ++ repeat mempty)) cells - - hdrCells <- case hdr of - Just r' -> toHeaderRow <$> rowToBlocksList r' - Nothing -> return [] - - -- The two following variables (horizontal column alignment and - -- relative column widths) go to the default at the - -- moment. Width information is in the TblGrid field of the Tbl, - -- so should be possible. Alignment might be more difficult, - -- since there doesn't seem to be a column entity in docx. + -- Horizontal column alignment goes to the default at the moment. Getting + -- it might be difficult, since there doesn't seem to be a column entity + -- in docx. let alignments = replicate width AlignDefault totalWidth = sum grid widths = (\w -> ColWidth (fromInteger w / fromInteger totalWidth)) <$> grid return $ table cap' (zip alignments widths) - (TableHead nullAttr hdrCells) - [TableBody nullAttr 0 [] cells'] + (TableHead nullAttr headerCells) + [TableBody nullAttr 0 [] bodyCells] (TableFoot nullAttr []) bodyPartToBlocks (OMathPara e) = return $ para $ displayMath (writeTeX e) - -- replace targets with generated anchors. rewriteLink' :: PandocMonad m => Inline -> DocxContext m Inline rewriteLink' l@(Link attr ils (T.uncons -> Just ('#',target), title)) = do @@ -716,6 +734,8 @@ bodyToOutput :: PandocMonad m => Body -> DocxContext m (Meta, [Block]) bodyToOutput (Body bps) = do let (metabps, blkbps) = sepBodyParts bps meta <- bodyPartsToMeta metabps + captions <- catMaybes <$> mapM bodyPartToTableCaption blkbps + modify (\s -> s { docxTableCaptions = captions }) blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks blks'' <- removeOrphanAnchors blks' diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index 978d6ff3a..aaa8f4ad0 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -33,7 +33,9 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..) , ParStyle , CharStyle(cStyleData) , Row(..) + , TblHeader(..) , Cell(..) + , VMerge(..) , TrackedChange(..) , ChangeType(..) , ChangeInfo(..) @@ -50,6 +52,7 @@ module Text.Pandoc.Readers.Docx.Parse ( Docx(..) , pHeading , constructBogusParStyleData , leftBiasedMergeRunStyle + , rowsToRowspans ) where import Text.Pandoc.Readers.Docx.Parse.Styles import Codec.Archive.Zip @@ -225,6 +228,7 @@ defaultParagraphStyle = ParagraphStyle { pStyle = [] data BodyPart = Paragraph ParagraphStyle [ParPart] | ListItem ParagraphStyle T.Text T.Text (Maybe Level) [ParPart] | Tbl T.Text TblGrid TblLook [Row] + | TblCaption ParagraphStyle [ParPart] | OMathPara [Exp] deriving Show @@ -236,12 +240,61 @@ newtype TblLook = TblLook {firstRowFormatting::Bool} defaultTblLook :: TblLook defaultTblLook = TblLook{firstRowFormatting = False} -newtype Row = Row [Cell] - deriving Show +data Row = Row TblHeader [Cell] deriving Show + +data TblHeader = HasTblHeader | NoTblHeader deriving (Show, Eq) -newtype Cell = Cell [BodyPart] +data Cell = Cell GridSpan VMerge [BodyPart] deriving Show +type GridSpan = Integer + +data VMerge = Continue + -- ^ This cell should be merged with the one above it + | Restart + -- ^ This cell should not be merged with the one above it + deriving (Show, Eq) + +rowsToRowspans :: [Row] -> [[(Int, Cell)]] +rowsToRowspans rows = let + removeMergedCells = fmap (filter (\(_, Cell _ vmerge _) -> vmerge == Restart)) + in removeMergedCells (foldr f [] rows) + where + f :: Row -> [[(Int, Cell)]] -> [[(Int, Cell)]] + f (Row _ cells) acc = let + spans = g cells Nothing (listToMaybe acc) + in spans : acc + + g :: + -- | The current row + [Cell] -> + -- | Number of columns left below + Maybe Integer -> + -- | (rowspan so far, cell) for the row below this one + Maybe [(Int, Cell)] -> + -- | (rowspan so far, cell) for this row + [(Int, Cell)] + g cells _ Nothing = zip (repeat 1) cells + g cells columnsLeftBelow (Just rowBelow) = + case cells of + [] -> [] + thisCell@(Cell thisGridSpan _ _) : restOfRow -> case rowBelow of + [] -> zip (repeat 1) cells + (spanSoFarBelow, Cell gridSpanBelow vmerge _) : _ -> + let spanSoFar = case vmerge of + Restart -> 1 + Continue -> 1 + spanSoFarBelow + columnsToDrop = thisGridSpan + (gridSpanBelow - fromMaybe gridSpanBelow columnsLeftBelow) + (newColumnsLeftBelow, restOfRowBelow) = dropColumns columnsToDrop rowBelow + in (spanSoFar, thisCell) : g restOfRow (Just newColumnsLeftBelow) (Just restOfRowBelow) + + dropColumns :: Integer -> [(a, Cell)] -> (Integer, [(a, Cell)]) + dropColumns n [] = (n, []) + dropColumns n cells@((_, Cell gridSpan _ _) : otherCells) = + if n < gridSpan + then (gridSpan - n, cells) + else dropColumns (n - gridSpan) otherCells + leftBiasedMergeRunStyle :: RunStyle -> RunStyle -> RunStyle leftBiasedMergeRunStyle a b = RunStyle { isBold = isBold a <|> isBold b @@ -587,14 +640,31 @@ elemToRow ns element | isElem ns "w" "tr" element = do let cellElems = findChildrenByName ns "w" "tc" element cells <- mapD (elemToCell ns) cellElems - return $ Row cells + let hasTblHeader = maybe NoTblHeader (const HasTblHeader) + (findChildByName ns "w" "trPr" element + >>= findChildByName ns "w" "tblHeader") + return $ Row hasTblHeader cells elemToRow _ _ = throwError WrongElem elemToCell :: NameSpaces -> Element -> D Cell elemToCell ns element | isElem ns "w" "tc" element = do + let properties = findChildByName ns "w" "tcPr" element + let gridSpan = properties + >>= findChildByName ns "w" "gridSpan" + >>= findAttrByName ns "w" "val" + >>= stringToInteger + let vMerge = case properties >>= findChildByName ns "w" "vMerge" of + Nothing -> Restart + Just e -> + fromMaybe Continue $ do + s <- findAttrByName ns "w" "val" e + case s of + "continue" -> Just Continue + "restart" -> Just Restart + _ -> Nothing cellContents <- mapD (elemToBodyPart ns) (elChildren element) - return $ Cell cellContents + return $ Cell (fromMaybe 1 gridSpan) vMerge cellContents elemToCell _ _ = throwError WrongElem elemToParIndentation :: NameSpaces -> Element -> Maybe ParIndentation @@ -626,10 +696,9 @@ pNumInfo = getParStyleField numInfo . pStyle elemToBodyPart :: NameSpaces -> Element -> D BodyPart elemToBodyPart ns element | isElem ns "w" "p" element - , (c:_) <- findChildrenByName ns "m" "oMathPara" element = - do - expsLst <- eitherToD $ readOMML $ showElement c - return $ OMathPara expsLst + , (c:_) <- findChildrenByName ns "m" "oMathPara" element = do + expsLst <- eitherToD $ readOMML $ showElement c + return $ OMathPara expsLst elemToBodyPart ns element | isElem ns "w" "p" element , Just (numId, lvl) <- getNumInfo ns element = do @@ -647,13 +716,31 @@ elemToBodyPart ns element Nothing | Just (numId, lvl) <- pNumInfo parstyle -> do levelInfo <- lookupLevel numId lvl <$> asks envNumbering return $ ListItem parstyle numId lvl levelInfo parparts - _ -> return $ Paragraph parstyle parparts + _ -> let + hasCaptionStyle = elem "Caption" (pStyleId <$> pStyle parstyle) + + hasSimpleTableField = fromMaybe False $ do + fldSimple <- findChildByName ns "w" "fldSimple" element + instr <- findAttrByName ns "w" "instr" fldSimple + pure ("Table" `elem` T.words instr) + + hasComplexTableField = fromMaybe False $ do + instrText <- findElementByName ns "w" "instrText" element + pure ("Table" `elem` T.words (strContent instrText)) + + in if hasCaptionStyle && (hasSimpleTableField || hasComplexTableField) + then return $ TblCaption parstyle parparts + else return $ Paragraph parstyle parparts + elemToBodyPart ns element | isElem ns "w" "tbl" element = do - let caption' = findChildByName ns "w" "tblPr" element + let tblProperties = findChildByName ns "w" "tblPr" element + caption = fromMaybe "" $ tblProperties >>= findChildByName ns "w" "tblCaption" >>= findAttrByName ns "w" "val" - caption = fromMaybe "" caption' + description = fromMaybe "" $ tblProperties + >>= findChildByName ns "w" "tblDescription" + >>= findAttrByName ns "w" "val" grid' = case findChildByName ns "w" "tblGrid" element of Just g -> elemToTblGrid ns g Nothing -> return [] @@ -666,7 +753,7 @@ elemToBodyPart ns element grid <- grid' tblLook <- tblLook' rows <- mapD (elemToRow ns) (elChildren element) - return $ Tbl caption grid tblLook rows + return $ Tbl (caption <> description) grid tblLook rows elemToBodyPart _ _ = throwError WrongElem lookupRelationship :: DocumentLocation -> RelId -> [Relationship] -> Maybe Target diff --git a/src/Text/Pandoc/Readers/Docx/Util.hs b/src/Text/Pandoc/Readers/Docx/Util.hs index ac331cba6..970697a2d 100644 --- a/src/Text/Pandoc/Readers/Docx/Util.hs +++ b/src/Text/Pandoc/Readers/Docx/Util.hs @@ -19,6 +19,7 @@ module Text.Pandoc.Readers.Docx.Util ( , elemToNameSpaces , findChildByName , findChildrenByName + , findElementByName , findAttrByName ) where @@ -56,6 +57,12 @@ findChildrenByName ns pref name el = let ns' = ns <> elemToNameSpaces el in findChildren (elemName ns' pref name) el +-- | Like 'findChildrenByName', but searches descendants. +findElementByName :: NameSpaces -> Text -> Text -> Element -> Maybe Element +findElementByName ns pref name el = + let ns' = ns <> elemToNameSpaces el + in findElement (elemName ns' pref name) el + findAttrByName :: NameSpaces -> Text -> Text -> Element -> Maybe Text findAttrByName ns pref name el = let ns' = ns <> elemToNameSpaces el diff --git a/test/Tests/Readers/Docx.hs b/test/Tests/Readers/Docx.hs index 939ff9939..220c7d9c5 100644 --- a/test/Tests/Readers/Docx.hs +++ b/test/Tests/Readers/Docx.hs @@ -317,14 +317,30 @@ tests = [ testGroup "document" "tables with lists in cells" "docx/table_with_list_cell.docx" "docx/table_with_list_cell.native" + , testCompare + "a table with a header which contains rowspans greater than 1" + "docx/table_header_rowspan.docx" + "docx/table_header_rowspan.native" , testCompare "tables with one row" "docx/table_one_row.docx" "docx/table_one_row.native" + , testCompare + "tables with just one row, which is a header" + "docx/table_one_header_row.docx" + "docx/table_one_header_row.native" , testCompare "tables with variable width" "docx/table_variable_width.docx" "docx/table_variable_width.native" + , testCompare + "tables with captions which contain a Table field" + "docx/table_captions_with_field.docx" + "docx/table_captions_with_field.native" + , testCompare + "tables with captions which don't contain a Table field" + "docx/table_captions_no_field.docx" + "docx/table_captions_no_field.native" , testCompare "code block" "docx/codeblock.docx" diff --git a/test/docx/sdt_elements.native b/test/docx/sdt_elements.native index a072c0d39..d2aa00994 100644 --- a/test/docx/sdt_elements.native +++ b/test/docx/sdt_elements.native @@ -4,17 +4,16 @@ ,(AlignDefault,ColWidth 0.22069570301081556) ,(AlignDefault,ColWidth 0.5586085939783689)] (TableHead ("",[],[]) - []) - [(TableBody ("",[],[]) (RowHeadColumns 0) - [] - [Row ("",[],[]) + [Row ("",[],[]) [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) [Plain [Strong [Str "col1Header"]]] ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) [Plain [Strong [Str "col2Header"]]] ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [Plain [Strong [Str "col3Header"]]]] - ,Row ("",[],[]) + [Plain [Strong [Str "col3Header"]]]]]) + [(TableBody ("",[],[]) (RowHeadColumns 0) + [] + [Row ("",[],[]) [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) [Plain [Str "col1",Space,Str "content"]] ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) @@ -22,4 +21,4 @@ ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) [Plain [Str "col3",Space,Str "content"]]]])] (TableFoot ("",[],[]) - [])] \ No newline at end of file + [])] diff --git a/test/docx/table_captions_no_field.docx b/test/docx/table_captions_no_field.docx new file mode 100644 index 000000000..1687d32a2 Binary files /dev/null and b/test/docx/table_captions_no_field.docx differ diff --git a/test/docx/table_captions_no_field.native b/test/docx/table_captions_no_field.native new file mode 100644 index 000000000..b8f54d541 --- /dev/null +++ b/test/docx/table_captions_no_field.native @@ -0,0 +1,34 @@ +[Para [Str "See",Space,Str "Table",Space,Str "5.1."] +,Para [Str "Table",Space,Str "5.1"] +,Table ("",[],[]) (Caption Nothing + []) + [(AlignDefault,ColWidth 0.7605739372523825) + ,(AlignDefault,ColWidth 0.11971303137380876) + ,(AlignDefault,ColWidth 0.11971303137380876)] + (TableHead ("",[],[]) +[Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Count"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "%"]]]]) + [(TableBody ("",[],[]) (RowHeadColumns 0) + [] + [Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "First",Space,Str "option"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "242"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "45"]]] +,Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Second",Space,Str "option"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "99"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "18"]]]])] + (TableFoot ("",[],[]) + []) +,Header 2 ("section", [], []) []] diff --git a/test/docx/table_captions_with_field.docx b/test/docx/table_captions_with_field.docx new file mode 100644 index 000000000..db6de3088 Binary files /dev/null and b/test/docx/table_captions_with_field.docx differ diff --git a/test/docx/table_captions_with_field.native b/test/docx/table_captions_with_field.native new file mode 100644 index 000000000..deb8afc6b --- /dev/null +++ b/test/docx/table_captions_with_field.native @@ -0,0 +1,54 @@ +[Para [Str "See",Space,Str "Table",Space,Str "1."] +,Para [] +,Table ("",[],[]) (Caption Nothing + [Para [Str "Table",Space,Str "1"]]) + [(AlignDefault,ColWidth 0.7605739372523825) + ,(AlignDefault,ColWidth 0.11971303137380876) + ,(AlignDefault,ColWidth 0.11971303137380876)] + (TableHead ("",[],[]) +[Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Count"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "%"]]]]) + [(TableBody ("",[],[]) (RowHeadColumns 0) + [] + [Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "First",Space,Str "option"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "242"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "45"]]] +,Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Second",Space,Str "option"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "99"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "18"]]]])] + (TableFoot ("",[],[]) + []) +,Header 2 ("section", [], []) [] +,Table ("",[],[]) (Caption Nothing + [Para [Str "Table",Space,Str "2"]]) + [(AlignDefault,ColWidth 0.3332963620230701) + ,(AlignDefault,ColWidth 0.3332963620230701) + ,(AlignDefault,ColWidth 0.3334072759538598)] + (TableHead ("",[],[]) + [Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "One"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Two"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Three"]]]]) + [(TableBody ("",[],[]) (RowHeadColumns 0) + [] + [])] + (TableFoot ("",[],[]) + []) +,Para [] +,Para [Str "See",Space,Str "Table",Space,Str "2."]] diff --git a/test/docx/table_header_rowspan.docx b/test/docx/table_header_rowspan.docx new file mode 100644 index 000000000..1cc32a105 Binary files /dev/null and b/test/docx/table_header_rowspan.docx differ diff --git a/test/docx/table_header_rowspan.native b/test/docx/table_header_rowspan.native new file mode 100644 index 000000000..d951f29e4 --- /dev/null +++ b/test/docx/table_header_rowspan.native @@ -0,0 +1,189 @@ +[Table ("",[],[]) (Caption Nothing + []) + [(AlignDefault,ColWidth 0.30701754385964913) + ,(AlignDefault,ColWidth 0.1364522417153996) + ,(AlignDefault,ColWidth 0.10009746588693957) + ,(AlignDefault,ColWidth 9.707602339181287e-2) + ,(AlignDefault,ColWidth 7.719298245614035e-2) + ,(AlignDefault,ColWidth 7.085769980506823e-2) + ,(AlignDefault,ColWidth 7.09551656920078e-2) + ,(AlignDefault,ColWidth 0.14035087719298245)] + (TableHead ("",[],[]) +[Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 2) (ColSpan 1) + [Plain [Str "A"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 2) (ColSpan 1) + [Plain [Strong [Str "B"]]] + ,Cell ("",[],[]) AlignDefault (RowSpan 2) (ColSpan 1) + [Plain [Strong [Str "C"]]] + ,Cell ("",[],[]) AlignDefault (RowSpan 2) (ColSpan 1) + [Plain [Strong [Str "D"]]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 3) + [Plain [Str "E"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 2) (ColSpan 1) + [Plain [Str "F"]]] +,Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Strong [Str "G"]]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Strong [Str "H"]]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Strong [Str "I"]]]]]) + [(TableBody ("",[],[]) (RowHeadColumns 0) + [] + [Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "1"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "2"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "3"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "4"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "5"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "6"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "7"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "8"]]] + ,Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "1"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "2"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "3"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "4"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "5"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "6"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "7"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "8"]]] + ,Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "1"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "2"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "3"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "4"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "5"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "6"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "7"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "8"]]] + ,Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "1"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "2"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "3"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "4"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "5"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "6"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "7"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "8"]]] + ,Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "1"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "2"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "3"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "4"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "5"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "6"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "7"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "8"]]] + ,Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "1"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "2"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "3"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "4"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "5"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "6"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "7"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "8"]]] + ,Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "1"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "2"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "3"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "4"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "5"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "6"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "7"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "8"]]] + ,Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "1"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "2"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "3"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "4"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "5"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "6"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "7"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "8"]]] + ,Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "1"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "2"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "3"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "4"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "5"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "6"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "7"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "8"]]] + ])] + (TableFoot ("",[],[]) + [])] diff --git a/test/docx/table_one_header_row.docx b/test/docx/table_one_header_row.docx new file mode 100644 index 000000000..db715dda8 Binary files /dev/null and b/test/docx/table_one_header_row.docx differ diff --git a/test/docx/table_one_header_row.native b/test/docx/table_one_header_row.native new file mode 100644 index 000000000..4aae830ac --- /dev/null +++ b/test/docx/table_one_header_row.native @@ -0,0 +1,18 @@ +[Table ("",[],[]) (Caption Nothing + []) + [(AlignDefault,ColWidth 0.33302433371958284) + ,(AlignDefault,ColWidth 0.3332560834298957) + ,(AlignDefault,ColWidth 0.33371958285052145)] + (TableHead ("",[],[]) + [Row ("",[],[]) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "One"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Row"]] + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Plain [Str "Table"]]]]) + [(TableBody ("",[],[]) (RowHeadColumns 0) + [] + [])] + (TableFoot ("",[],[]) + [])] diff --git a/test/docx/table_one_row.docx b/test/docx/table_one_row.docx index f7e0ebe43..d05a856b5 100644 Binary files a/test/docx/table_one_row.docx and b/test/docx/table_one_row.docx differ diff --git a/test/docx/table_variable_width.native b/test/docx/table_variable_width.native index 43ac40cca..ff1cc0dc4 100644 --- a/test/docx/table_variable_width.native +++ b/test/docx/table_variable_width.native @@ -4,7 +4,8 @@ ,(AlignDefault,ColWidth 1.9882415820416888e-2) ,(AlignDefault,ColWidth 0.22202030999465527) ,(AlignDefault,ColWidth 0.4761090326028862) - ,(AlignDefault,ColWidth 1.0689470871191876e-4)] + ,(AlignDefault,ColWidth 1.0689470871191876e-4) + ,(AlignDefault,ColWidth 0.26178514163548905)] (TableHead ("",[],[]) [Row ("",[],[]) [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) @@ -13,33 +14,27 @@ [] ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) [Plain [Str "h3"]] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 2) [Plain [Str "h4"]] ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) [Plain [Str "h5"]]]]) [(TableBody ("",[],[]) (RowHeadColumns 0) [] [Row ("",[],[]) - [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 3) [Plain [Str "c11"]] ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 2) []] ,Row ("",[],[]) [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 2) [Plain [Str "c22"]] ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) [Plain [Str "c23"]] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) + ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 2) []]])] (TableFoot ("",[],[]) [])] -- cgit v1.2.3 From b6b2331fdcee37f1bfb3fcc21816d73d6d56cfae Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 28 May 2021 13:58:44 -0700 Subject: Support `rebase_relative_paths` for commonmark based formats. (Including `gfm`.) --- src/Text/Pandoc/Extensions.hs | 1 + src/Text/Pandoc/Readers/CommonMark.hs | 4 +++- test/command/3752.md | 16 ++++++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index c4d54c06e..9c55d0a7a 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -517,6 +517,7 @@ getAllExtensions f = universalExtensions <> getAll f , Ext_attributes , Ext_sourcepos , Ext_yaml_metadata_block + , Ext_rebase_relative_paths ] getAll "commonmark_x" = getAll "commonmark" getAll "org" = autoIdExtensions <> diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index b099a9b50..228e65312 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -112,5 +112,7 @@ specFor opts = foldr ($) defaultSyntaxSpec exts | isEnabled Ext_implicit_header_references opts ] ++ [ (footnoteSpec <>) | isEnabled Ext_footnotes opts ] ++ [ (definitionListSpec <>) | isEnabled Ext_definition_lists opts ] ++ - [ (taskListSpec <>) | isEnabled Ext_task_lists opts ] + [ (taskListSpec <>) | isEnabled Ext_task_lists opts ] ++ + [ (rebaseRelativePathsSpec <>) + | isEnabled Ext_rebase_relative_paths opts ] diff --git a/test/command/3752.md b/test/command/3752.md index df8af0ba1..2e96b531e 100644 --- a/test/command/3752.md +++ b/test/command/3752.md @@ -42,3 +42,19 @@ A spider: [spider] <h1 id="chapter-two">Chapter two</h1> <p>A spider: <img src="command/chap2/spider.png" alt="spider" /></p> ``` + +``` +% pandoc command/chap1/text.md command/chap2/text.md -f commonmark+rebase_relative_paths -t html +^D +<h1>Chapter one</h1> +<p>A spider: <img src="command/chap1/spider.png" alt="spider" /></p> +<p>Another spider: <img src="command/chap2/spider.png" alt="another spider" /></p> +<p>The moon: <img src="command/chap1/../../lalune.jpg" alt="moon" /></p> +<p>Link to <a href="command/chap1/spider.png">spider picture</a>.</p> +<p>URL left alone: <a href="https://pandoc.org/MANUAL.html">manual</a>.</p> +<p>Absolute path left alone: <a href="/foo/bar/baz.png">absolute</a>.</p> +<p>Link to fragment: <a href="#chapter-two">chapter two</a>.</p> +<p>Empty path: <a href="">empty</a>.</p> +<h1>Chapter two</h1> +<p>A spider: <img src="command/chap2/spider.png" alt="spider" /></p> +``` -- cgit v1.2.3 From 0d7103de7e893002eedcc50fea7f81aae9535106 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 29 May 2021 14:41:28 -0700 Subject: In rebasePath, check for absolute paths two ways. isAbsolute from FilePath doesn't return True on Windows for paths beginning with `/`, so we check that separately. --- src/Text/Pandoc/Readers/Markdown.hs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index bc5e3e30f..9a5e0889e 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1924,7 +1924,10 @@ rebasePath :: SourcePos -> Text -> Text rebasePath pos path = do let fp = sourceName pos isFragment = T.take 1 path == "#" - in if T.null path || isFragment || isAbsolute (T.unpack path) || isURI path + -- check for leading / because on Windows this won't be + -- recognized as absolute by isAbsolute + isAbsolutePath = isAbsolute (T.unpack path) || T.take 1 path == "/" + in if T.null path || isFragment || isAbsolutePath || isURI path then path else case takeDirectory fp of -- cgit v1.2.3 From cc6dcf03928404c08cfb3075a90d814b25fd7c69 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 29 May 2021 17:36:30 -0700 Subject: Markdown reader: in rebasePaths, check for both Windows and Posix absolute paths. Previously Windows pandoc was treating `/foo/bar.jpg` as non-absolute. --- src/Text/Pandoc/Readers/Markdown.hs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 9a5e0889e..1761ee2c1 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -29,7 +29,9 @@ import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T import qualified Data.ByteString.Lazy as BL -import System.FilePath (addExtension, takeExtension, isAbsolute, takeDirectory) +import System.FilePath (addExtension, takeExtension, takeDirectory) +import qualified System.FilePath.Windows as Windows +import qualified System.FilePath.Posix as Posix import Text.HTML.TagSoup hiding (Row) import Text.Pandoc.Builder (Blocks, Inlines) import qualified Text.Pandoc.Builder as B @@ -1924,9 +1926,8 @@ rebasePath :: SourcePos -> Text -> Text rebasePath pos path = do let fp = sourceName pos isFragment = T.take 1 path == "#" - -- check for leading / because on Windows this won't be - -- recognized as absolute by isAbsolute - isAbsolutePath = isAbsolute (T.unpack path) || T.take 1 path == "/" + path' = T.unpack path + isAbsolutePath = Posix.isAbsolute path' || Windows.isAbsolute path' in if T.null path || isFragment || isAbsolutePath || isURI path then path else -- cgit v1.2.3 From c2f46e6df4a80a313ab4329b740770dbdfbe1578 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 30 May 2021 10:07:28 -0700 Subject: Docx writer: fix regression on captions. The "Table Caption" style was no longer getting applied. (It was overwritten by "Compact.") Closes #7328. --- src/Text/Pandoc/Writers/Docx/Table.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs index e7fc82a10..13cb31040 100644 --- a/src/Text/Pandoc/Writers/Docx/Table.hs +++ b/src/Text/Pandoc/Writers/Docx/Table.hs @@ -33,7 +33,6 @@ tableToOpenXML :: PandocMonad m -> WS m [Content] tableToOpenXML blocksToOpenXML gridTable = do setFirstPara - modify $ \s -> s { stInTable = True } let (Grid.Table _attr caption colspecs _rowheads thead tbodies tfoot) = gridTable let (Caption _maybeShortCaption captionBlocks) = caption @@ -43,6 +42,9 @@ tableToOpenXML blocksToOpenXML gridTable = do then return [] else withParaPropM (pStyleM "Table Caption") $ blocksToOpenXML captionBlocks + -- We set "in table" after processing the caption, because we don't + -- want the "Table Caption" style to be overwritten with "Compact". + modify $ \s -> s { stInTable = True } head' <- cellGridToOpenXML blocksToOpenXML HeadRow aligns thead bodies <- mapM (cellGridToOpenXML blocksToOpenXML BodyRow aligns) tbodies foot' <- cellGridToOpenXML blocksToOpenXML FootRow aligns tfoot -- cgit v1.2.3 From cc206af392a40dd7b01b714ae7f33b2fbf4925cc Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 30 May 2021 10:22:02 -0700 Subject: Have LoadedResource use relative paths. The immediate reason for this is to allow the test output of #3752 to work on both windows and linux. --- src/Text/Pandoc/Class/PandocMonad.hs | 4 ++-- test/command/3752.md | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index b5f401619..4eb80df29 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -66,7 +66,7 @@ import Network.URI ( escapeURIString, nonStrictRelativeTo, unEscapeString, parseURIReference, isAllowedInURI, parseURI, URI(..) ) import System.FilePath ((</>), takeExtension, dropExtension, - isRelative, splitDirectories) + isRelative, splitDirectories, makeRelative) import System.Random (StdGen) import Text.Collate.Lang (Lang(..), parseLang, renderLang) import Text.Pandoc.Class.CommonState (CommonState (..)) @@ -413,7 +413,7 @@ downloadOrRead s = do (fp', cont) <- if isRelative f then withPaths resourcePath readFileStrict f else (f,) <$> readFileStrict f - report $ LoadedResource f fp' + report $ LoadedResource f (makeRelative "." fp') return (cont, mime) httpcolon = URI{ uriScheme = "http:", uriAuthority = Nothing, diff --git a/test/command/3752.md b/test/command/3752.md index 6ac025ebe..863e3f2d4 100644 --- a/test/command/3752.md +++ b/test/command/3752.md @@ -1,9 +1,9 @@ ``` % pandoc command/chap1/text.md command/chap2/text.md -f markdown+rebase_relative_paths --verbose -t docx -o - | pandoc -f docx -t plain ^D -[INFO] Loaded command/chap1/spider.png from ./command/chap1/spider.png -[INFO] Loaded command/chap2/spider.png from ./command/chap2/spider.png -[INFO] Loaded command/chap1/../../lalune.jpg from ./command/chap1/../../lalune.jpg +[INFO] Loaded command/chap1/spider.png from command/chap1/spider.png +[INFO] Loaded command/chap2/spider.png from command/chap2/spider.png +[INFO] Loaded command/chap1/../../lalune.jpg from command/chap1/../../lalune.jpg Chapter one A spider: [spider] -- cgit v1.2.3 From fc70f44ee2814914c0d32f6dff30fb36cc51bf11 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 30 May 2021 17:15:14 -0700 Subject: HTML reader: fix column width regression. Column widths specified with a style attribute were off by a factor of 100 in 2.14. Closes #7334. --- src/Text/Pandoc/Readers/HTML/Table.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/HTML/Table.hs b/src/Text/Pandoc/Readers/HTML/Table.hs index 3a569dd0a..6e62e12f5 100644 --- a/src/Text/Pandoc/Readers/HTML/Table.hs +++ b/src/Text/Pandoc/Readers/HTML/Table.hs @@ -49,7 +49,7 @@ pCol = try $ do return $ case lookup "width" attribs of Nothing -> case lookup "style" attribs of Just (T.stripPrefix "width:" -> Just xs) | T.any (== '%') xs -> - maybe (Right ColWidthDefault) (Right . ColWidth) + maybe (Right ColWidthDefault) (Right . ColWidth . (/ 100.0)) $ safeRead (T.filter (`notElem` (" \t\r\n%'\";" :: [Char])) xs) _ -> Right ColWidthDefault -- cgit v1.2.3 From 62f46b3995425c9a3ec87cba0eb8a4d736adec07 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 31 May 2021 21:34:51 -0600 Subject: Fix regression with commonmark/gfm yaml metdata block parsing. A regression in 2.14 led to the document body being omitted after YAML metadata in some cases. This is now fixed. Closes #7339. --- src/Text/Pandoc/Readers/CommonMark.hs | 10 +++++----- test/command/7339.md | 11 +++++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) create mode 100644 test/command/7339.md (limited to 'src') diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index 228e65312..411d64278 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -30,7 +30,7 @@ import Text.Pandoc.Readers.Metadata (yamlMetaBlock) import Control.Monad.Except import Data.Functor.Identity (runIdentity) import Data.Typeable -import Text.Pandoc.Parsing (runParserT, getPosition, +import Text.Pandoc.Parsing (runParserT, getInput, runF, defaultParserState, option, many1, anyChar, Sources(..), ToSources(..), ParserT, Future, sourceName) @@ -44,14 +44,14 @@ readCommonMark opts s let sources = toSources s let toks = concatMap sourceToToks (unSources sources) res <- runParserT (do meta <- yamlMetaBlock (metaValueParser opts) - pos <- getPosition - return (meta, pos)) + rest <- getInput + return (meta, rest)) defaultParserState "YAML metadata" (toSources s) case res of Left _ -> readCommonMarkBody opts sources toks - Right (meta, pos) -> do + Right (meta, rest) -> do -- strip off metadata section and parse body - let body = dropWhile (\t -> tokPos t < pos) toks + let body = concatMap sourceToToks (unSources rest) Pandoc _ bs <- readCommonMarkBody opts sources body return $ Pandoc (runF meta defaultParserState) bs | otherwise = do diff --git a/test/command/7339.md b/test/command/7339.md new file mode 100644 index 000000000..9697c1c32 --- /dev/null +++ b/test/command/7339.md @@ -0,0 +1,11 @@ +``` +% pandoc -f gfm -s -t native +--- +title: Test +--- + +Hi +^D +Pandoc (Meta {unMeta = fromList [("title",MetaInlines [Str "Test"])]}) +[Para [Str "Hi"]] +``` -- cgit v1.2.3 From abb59bd58222c67fd36a8e447c01de3404a7ed1d Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 1 Jun 2021 13:54:51 -0600 Subject: LaTeX reader: don't allow optional * on symbol control sequences. Generally we allow optional starred variants of LaTeX commands (since many allow them, and if we don't accept these explicitly, ignoring the star usually gives acceptable results). But we don't want to do this for `\(*\)` and similar cases. Closes #7340. --- src/Text/Pandoc/Readers/LaTeX.hs | 6 ++++-- test/command/7340.md | 6 ++++++ 2 files changed, 10 insertions(+), 2 deletions(-) create mode 100644 test/command/7340.md (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 2ace18d1b..9e14c159a 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -24,7 +24,7 @@ module Text.Pandoc.Readers.LaTeX ( readLaTeX, import Control.Applicative (many, optional, (<|>)) import Control.Monad import Control.Monad.Except (throwError) -import Data.Char (isDigit, isLetter, toUpper, chr) +import Data.Char (isDigit, isLetter, isAlphaNum, toUpper, chr) import Data.Default import Data.List (intercalate) import qualified Data.Map as M @@ -300,7 +300,9 @@ inlineCommand' :: PandocMonad m => LP m Inlines inlineCommand' = try $ do Tok _ (CtrlSeq name) cmd <- anyControlSeq guard $ name /= "begin" && name /= "end" && name /= "and" - star <- option "" ("*" <$ symbol '*' <* sp) + star <- if T.all isAlphaNum name + then option "" ("*" <$ symbol '*' <* sp) + else pure "" overlay <- option "" overlaySpecification let name' = name <> star <> overlay let names = ordNub [name', name] -- check non-starred as fallback diff --git a/test/command/7340.md b/test/command/7340.md new file mode 100644 index 000000000..25decd732 --- /dev/null +++ b/test/command/7340.md @@ -0,0 +1,6 @@ +``` +% pandoc -f latex -t native +\(*\) +^D +[Para [Math InlineMath "*"]] +``` -- cgit v1.2.3 From 2e4ef14d9112f230e336a1f3767f87293bdfb73e Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 1 Jun 2021 21:44:55 -0600 Subject: Markdown reader: fix pipe table regression in 2.11.4. Previously pipe tables with empty headers (that is, a header line with all empty cells) would be rendered as headerless tables. This broke in 2.11.4. The fix here is to produce an AST with an empty table head when a pipe table has all empty header cells. Closes #7343. --- src/Text/Pandoc/Readers/Markdown.hs | 2 +- test/pipe-tables.native | 12 ++---------- 2 files changed, 3 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 1761ee2c1..1e9867d07 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -2254,4 +2254,4 @@ toRow :: [Blocks] -> Row toRow = Row nullAttr . map B.simpleCell toHeaderRow :: [Blocks] -> [Row] -toHeaderRow l = [toRow l | not (null l)] +toHeaderRow l = [toRow l | not (null l) && not (all null l)] diff --git a/test/pipe-tables.native b/test/pipe-tables.native index 557cd0642..249eec17e 100644 --- a/test/pipe-tables.native +++ b/test/pipe-tables.native @@ -131,13 +131,7 @@ ,(AlignLeft,ColWidthDefault) ,(AlignCenter,ColWidthDefault)] (TableHead ("",[],[]) - [Row ("",[],[]) - [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - [] - ,Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - []]]) + []) [(TableBody ("",[],[]) (RowHeadColumns 0) [] [Row ("",[],[]) @@ -213,9 +207,7 @@ []) [(AlignCenter,ColWidthDefault)] (TableHead ("",[],[]) - [Row ("",[],[]) - [Cell ("",[],[]) AlignDefault (RowSpan 1) (ColSpan 1) - []]]) + []) [(TableBody ("",[],[]) (RowHeadColumns 0) [] [Row ("",[],[]) -- cgit v1.2.3 From 3b628f7664a95e6f3b3d7da177c83333ec2bc0fa Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 1 Jun 2021 21:57:49 -0600 Subject: HTML writer: Don't omit width attribute on div. Closes #7342. --- src/Text/Pandoc/Writers/HTML.hs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index f7a387927..b1dd9a659 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -770,9 +770,10 @@ blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do lookup "entry-spacing" kvs' >>= safeRead } let isCslBibEntry = "csl-entry" `elem` classes - let kvs = [(k,v) | (k,v) <- kvs', k /= "width"] ++ - [("style", "width:" <> w <> ";") | "column" `elem` classes, - ("width", w) <- kvs'] ++ + let kvs = [(k,v) | (k,v) <- kvs' + , k /= "width" || "column" `notElem` classes] ++ + [("style", "width:" <> w <> ";") | "column" `elem` classes + , ("width", w) <- kvs'] ++ [("role", "doc-bibliography") | isCslBibBody && html5] ++ [("role", "doc-biblioentry") | isCslBibEntry && html5] let speakerNotes = "notes" `elem` classes -- cgit v1.2.3 From 2b5dad9912de659424246657c91f70417590c3fe Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 2 Jun 2021 10:42:22 -0600 Subject: Fix regression in 2.14 for generation of PDFs with SVGs. Closes #7344. --- src/Text/Pandoc/PDF.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index 6f462aad5..7fce17cea 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -202,7 +202,7 @@ convertImage opts tmpdir fname = do Just "image/svg+xml" -> E.catch (do (exit, _) <- pipeProcess Nothing "rsvg-convert" ["-f","pdf","-a","--dpi-x",dpi,"--dpi-y",dpi, - "-o",pdfOut,fname] BL.empty + "-o",pdfOut,svgIn] BL.empty if exit == ExitSuccess then return $ Right pdfOut else return $ Left "conversion from SVG failed") @@ -217,6 +217,7 @@ convertImage opts tmpdir fname = do where pngOut = replaceDirectory (replaceExtension fname ".png") tmpdir pdfOut = replaceDirectory (replaceExtension fname ".pdf") tmpdir + svgIn = tmpdir </> fname mime = getMimeType fname doNothing = return (Right fname) -- cgit v1.2.3 From 311736fb0ae21e5b5c78d1ad6fa4b3f4840941f9 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 2 Jun 2021 15:21:13 -0600 Subject: Text.Pandoc.PDF: only print relevant part of environment on `--verbose`. --- src/Text/Pandoc/PDF.hs | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index 7fce17cea..f85ef5b1f 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -507,8 +507,20 @@ showVerboseInfo mbTmpDir program programArgs env source = do UTF8.hPutStrLn stderr $ T.pack program <> " " <> T.pack (unwords (map show programArgs)) UTF8.hPutStr stderr "\n" - UTF8.hPutStrLn stderr "[makePDF] Environment:" - mapM_ (UTF8.hPutStrLn stderr . tshow) env + UTF8.hPutStrLn stderr "[makePDF] Relevant environment variables:" + -- we filter out irrelevant stuff to avoid leaking passwords and keys! + let isRelevant ("PATH",_) = True + isRelevant ("TMPDIR",_) = True + isRelevant ("PWD",_) = True + isRelevant ("LANG",_) = True + isRelevant ("HOME",_) = True + isRelevant ("LUA_PATH",_) = True + isRelevant ("LUA_CPATH",_) = True + isRelevant ("SHELL",_) = True + isRelevant ("TEXINPUTS",_) = True + isRelevant ("TEXMFOUTPUT",_) = True + isRelevant _ = False + mapM_ (UTF8.hPutStrLn stderr . tshow) (filter isRelevant env) UTF8.hPutStr stderr "\n" UTF8.hPutStrLn stderr "[makePDF] Source:" UTF8.hPutStrLn stderr source -- cgit v1.2.3 From b6c04383e403b0962db09e6748760d3ec376f2ed Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 3 Jun 2021 18:34:38 -0600 Subject: T.P.Class.IO: normalise path in writeMedia. This ensures that we get `\` separators on Windows. --- src/Text/Pandoc/Class/IO.hs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs index f12c0a938..6df39d4d0 100644 --- a/src/Text/Pandoc/Class/IO.hs +++ b/src/Text/Pandoc/Class/IO.hs @@ -211,13 +211,12 @@ writeMedia :: (PandocMonad m, MonadIO m) => FilePath -> MediaBag -> FilePath -> m () writeMedia dir mediabag subpath = do - -- we join and split to convert a/b/c to a\b\c on Windows; - -- in zip containers all paths use / let mbcontents = lookupMedia subpath mediabag case mbcontents of Nothing -> throwError $ PandocResourceNotFound $ pack subpath Just item -> do - let fullpath = dir </> mediaPath item + -- we normalize to get proper path separators for the platform + let fullpath = dir </> normalise (mediaPath item) liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) logIOError $ BL.writeFile fullpath $ mediaContents item -- cgit v1.2.3 From af9de925de4a441f4c45a977363f9a56589f57bc Mon Sep 17 00:00:00 2001 From: Jan Tojnar <jtojnar@gmail.com> Date: Sat, 5 Jun 2021 14:16:44 +0200 Subject: DocBook writer: Remove non-existent admonitions attention, error and hint are actually just reStructuredText specific. danger was too until introduced in DocBook 5.2: https://github.com/docbook/docbook/issues/55 --- src/Text/Pandoc/Writers/Docbook.hs | 3 +-- test/Tests/Writers/Docbook.hs | 12 ++++++------ 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index 25bd308bf..33a6f5f0c 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -198,8 +198,7 @@ blockToDocbook opts (Div (id',"section":_,_) (Header lvl (_,_,attrs) ils : xs)) blockToDocbook opts (Div (ident,classes,_) bs) = do version <- ask let identAttribs = [(idName version, ident) | not (T.null ident)] - admonitions = ["attention","caution","danger","error","hint", - "important","note","tip","warning"] + admonitions = ["caution","danger","important","note","tip","warning"] case classes of (l:_) | l `elem` admonitions -> do let (mTitleBs, bodyBs) = diff --git a/test/Tests/Writers/Docbook.hs b/test/Tests/Writers/Docbook.hs index 46203eeae..f517f803a 100644 --- a/test/Tests/Writers/Docbook.hs +++ b/test/Tests/Writers/Docbook.hs @@ -83,32 +83,32 @@ tests = [ testGroup "line blocks" , "</warning>" ] , "admonition-with-title" =: - divWith ("foo", ["attention"], []) ( + divWith ("foo", ["note"], []) ( divWith ("foo", ["title"], []) (plain (text "This is title")) <> para "This is a test" ) =?> unlines - [ "<attention id=\"foo\">" + [ "<note id=\"foo\">" , " <title>This is title</title>" , " <para>" , " This is a test" , " </para>" - , "</attention>" + , "</note>" ] , "admonition-with-title-in-para" =: - divWith ("foo", ["attention"], []) ( + divWith ("foo", ["note"], []) ( divWith ("foo", ["title"], []) (para "This is title") <> para "This is a test" ) =?> unlines - [ "<attention id=\"foo\">" + [ "<note id=\"foo\">" , " <title>This is title</title>" , " <para>" , " This is a test" , " </para>" - , "</attention>" + , "</note>" ] , "single-child" =: divWith ("foo", [], []) (para "This is a test") -- cgit v1.2.3 From c8ab8bccf25ad01b9a98b2e59c84c151c5b0c371 Mon Sep 17 00:00:00 2001 From: Jan Tojnar <jtojnar@gmail.com> Date: Sat, 5 Jun 2021 14:26:55 +0200 Subject: DocBook reader: Add support for danger element Added in DocBook 5.2: - https://github.com/docbook/docbook/pull/64 - https://tdg.docbook.org/tdg/5.2/danger.html --- src/Text/Pandoc/Readers/DocBook.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index b01ad3252..6ac1c99f9 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -134,6 +134,7 @@ List of all DocBook tags, with [x] indicating implemented, [ ] corpcredit - A corporation or organization credited in a document [ ] corpname - The name of a corporation [ ] country - The name of a country +[x] danger - An admonition set off from the text indicating hazardous situation [ ] database - The name of a database, or part of a database [x] date - The date of publication or revision of a document [ ] dedication - A wrapper for the dedication section of a book @@ -718,7 +719,7 @@ blockTags = ] ++ admonitionTags admonitionTags :: [Text] -admonitionTags = ["important","caution","note","tip","warning"] +admonitionTags = ["caution","danger","important","note","tip","warning"] -- Trim leading and trailing newline characters trimNl :: Text -> Text -- cgit v1.2.3 From c6f8c38c49e9579bf0bc0c91131bb206f0617e6b Mon Sep 17 00:00:00 2001 From: Jan Tojnar <jtojnar@gmail.com> Date: Sat, 5 Jun 2021 16:31:12 +0200 Subject: Markdown writer: re-use functions from Inline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of duplicating linkAttributes and attrsToMarkdown, let’s just use those from the Inline module. --- src/Text/Pandoc/Writers/Markdown.hs | 29 +---------------------------- src/Text/Pandoc/Writers/Markdown/Inline.hs | 4 +++- 2 files changed, 4 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 2ad9eabd9..38227dfa8 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -43,7 +43,7 @@ import Text.Pandoc.Templates (renderTemplate) import Text.DocTemplates (Val(..), Context(..), FromContext(..)) import Text.Pandoc.Walk import Text.Pandoc.Writers.HTML (writeHtml5String) -import Text.Pandoc.Writers.Markdown.Inline (inlineListToMarkdown) +import Text.Pandoc.Writers.Markdown.Inline (inlineListToMarkdown, linkAttributes, attrsToMarkdown) import Text.Pandoc.Writers.Markdown.Types (MarkdownVariant(..), WriterState(..), WriterEnv(..), @@ -257,39 +257,12 @@ noteToMarkdown opts num blocks = do then hang (writerTabStop opts) (marker <> spacer) contents else marker <> spacer <> contents -attrsToMarkdown :: Attr -> Doc Text -attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys] - where attribId = case attribs of - ("",_,_) -> empty - (i,_,_) -> "#" <> escAttr i - attribClasses = case attribs of - (_,[],_) -> empty - (_,cs,_) -> hsep $ - map (escAttr . ("."<>)) - cs - attribKeys = case attribs of - (_,_,[]) -> empty - (_,_,ks) -> hsep $ - map (\(k,v) -> escAttr k - <> "=\"" <> - escAttr v <> "\"") ks - escAttr = mconcat . map escAttrChar . T.unpack - escAttrChar '"' = literal "\\\"" - escAttrChar '\\' = literal "\\\\" - escAttrChar c = literal $ T.singleton c - -- | (Code) blocks with a single class can just use it standalone, -- no need to bother with curly braces. classOrAttrsToMarkdown :: Attr -> Doc Text classOrAttrsToMarkdown ("",[cls],_) = literal cls classOrAttrsToMarkdown attrs = attrsToMarkdown attrs -linkAttributes :: WriterOptions -> Attr -> Doc Text -linkAttributes opts attr = - if isEnabled Ext_link_attributes opts && attr /= nullAttr - then attrsToMarkdown attr - else empty - -- | Ordered list start parser for use in Para below. olMarker :: Parser Text ParserState () olMarker = do (start, style', delim) <- anyOrderedListMarker diff --git a/src/Text/Pandoc/Writers/Markdown/Inline.hs b/src/Text/Pandoc/Writers/Markdown/Inline.hs index e6c6da5a9..e66258220 100644 --- a/src/Text/Pandoc/Writers/Markdown/Inline.hs +++ b/src/Text/Pandoc/Writers/Markdown/Inline.hs @@ -11,7 +11,9 @@ Portability : portable -} module Text.Pandoc.Writers.Markdown.Inline ( - inlineListToMarkdown + inlineListToMarkdown, + linkAttributes, + attrsToMarkdown ) where import Control.Monad.Reader import Control.Monad.State.Strict -- cgit v1.2.3 From 7a3ee9d3d83b73cb53de80e01a9968ebf8f7cf12 Mon Sep 17 00:00:00 2001 From: Jan Tojnar <jtojnar@gmail.com> Date: Sat, 5 Jun 2021 15:53:24 +0200 Subject: CommonMark writer: do not throw away attributes when Ext_attributes is enabled Ext_attributes covers at least the following: - Ext_fenced_code_attributes - Ext_header_attributes - Ext_inline_code_attributes - Ext_link_attributes --- src/Text/Pandoc/Writers/Markdown.hs | 8 +++++--- src/Text/Pandoc/Writers/Markdown/Inline.hs | 22 ++++++++++++---------- 2 files changed, 17 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 38227dfa8..6316d9419 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -366,7 +366,7 @@ blockToMarkdown' opts (Plain inlines) = do -- title beginning with fig: indicates figure blockToMarkdown' opts (Para [Image attr alt (src,tgt@(T.stripPrefix "fig:" -> Just tit))]) | isEnabled Ext_raw_html opts && - not (isEnabled Ext_link_attributes opts) && + not (isEnabled Ext_link_attributes opts || isEnabled Ext_attributes opts) && attr /= nullAttr = -- use raw HTML (<> blankline) . literal . T.strip <$> writeHtml5String opts{ writerTemplate = Nothing } @@ -431,7 +431,8 @@ blockToMarkdown' opts (Header level attr inlines) = do && id' == autoId -> empty (id',_,_) | isEnabled Ext_mmd_header_identifiers opts -> space <> brackets (literal id') - _ | isEnabled Ext_header_attributes opts -> + _ | isEnabled Ext_header_attributes opts || + isEnabled Ext_attributes opts -> space <> attrsToMarkdown attr | otherwise -> empty contents <- inlineListToMarkdown opts $ @@ -490,7 +491,8 @@ blockToMarkdown' opts (CodeBlock attribs str) = do endline c = literal $ T.replicate (endlineLen c) $ T.singleton c backticks = endline '`' tildes = endline '~' - attrs = if isEnabled Ext_fenced_code_attributes opts + attrs = if isEnabled Ext_fenced_code_attributes opts || + isEnabled Ext_attributes opts then nowrap $ " " <> classOrAttrsToMarkdown attribs else case attribs of (_,cls:_,_) -> " " <> literal cls diff --git a/src/Text/Pandoc/Writers/Markdown/Inline.hs b/src/Text/Pandoc/Writers/Markdown/Inline.hs index e66258220..cd5f5b896 100644 --- a/src/Text/Pandoc/Writers/Markdown/Inline.hs +++ b/src/Text/Pandoc/Writers/Markdown/Inline.hs @@ -117,7 +117,7 @@ attrsToMarkdown attribs = braces $ hsep [attribId, attribClasses, attribKeys] linkAttributes :: WriterOptions -> Attr -> Doc Text linkAttributes opts attr = - if isEnabled Ext_link_attributes opts && attr /= nullAttr + if (isEnabled Ext_link_attributes opts || isEnabled Ext_attributes opts) && attr /= nullAttr then attrsToMarkdown attr else empty @@ -394,13 +394,15 @@ inlineToMarkdown opts (Quoted DoubleQuote lst) = do then "“" <> contents <> "”" else "“" <> contents <> "”" inlineToMarkdown opts (Code attr str) = do - let tickGroups = filter (T.any (== '`')) $ T.group str - let longest = maybe 0 maximum $ nonEmpty $ map T.length tickGroups - let marker = T.replicate (longest + 1) "`" - let spacer = if longest == 0 then "" else " " - let attrs = if isEnabled Ext_inline_code_attributes opts && attr /= nullAttr - then attrsToMarkdown attr - else empty + let tickGroups = filter (T.any (== '`')) $ T.group str + let longest = maybe 0 maximum $ nonEmpty $ map T.length tickGroups + let marker = T.replicate (longest + 1) "`" + let spacer = if longest == 0 then "" else " " + let attrsEnabled = isEnabled Ext_inline_code_attributes opts || + isEnabled Ext_attributes opts + let attrs = if attrsEnabled && attr /= nullAttr + then attrsToMarkdown attr + else empty variant <- asks envVariant case variant of PlainText -> return $ literal str @@ -559,7 +561,7 @@ inlineToMarkdown opts lnk@(Link attr txt (src, tit)) = do else "[" <> reftext <> "]" in return $ first <> second | isEnabled Ext_raw_html opts - , not (isEnabled Ext_link_attributes opts) + , not (isEnabled Ext_link_attributes opts || isEnabled Ext_attributes opts) , attr /= nullAttr -> -- use raw HTML to render attributes literal . T.strip <$> writeHtml5String opts{ writerTemplate = Nothing } @@ -569,7 +571,7 @@ inlineToMarkdown opts lnk@(Link attr txt (src, tit)) = do linkAttributes opts attr inlineToMarkdown opts img@(Image attr alternate (source, tit)) | isEnabled Ext_raw_html opts && - not (isEnabled Ext_link_attributes opts) && + not (isEnabled Ext_link_attributes opts || isEnabled Ext_attributes opts) && attr /= nullAttr = -- use raw HTML literal . T.strip <$> writeHtml5String opts{ writerTemplate = Nothing } (Pandoc nullMeta [Plain [img]]) -- cgit v1.2.3 From c550bf8482310dfbcb20694e7bc969d19acc5f7d Mon Sep 17 00:00:00 2001 From: Jan Tojnar <jtojnar@gmail.com> Date: Sat, 5 Jun 2021 16:34:27 +0200 Subject: CommonMark writer: do not use simple class for fenced-divs In https://github.com/jgm/pandoc/pull/7242, we introduced a simple attribute style for for code blocks and fenced divs with a single class but turns out the CommonMark extension does not support it for fenced divs. https://github.com/jgm/commonmark-hs/blob/master/commonmark-extensions/test/fenced_divs.md --- src/Text/Pandoc/Writers/Markdown.hs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 6316d9419..b16c71358 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -322,9 +322,12 @@ blockToMarkdown' opts (Div attrs ils) = do case () of _ | isEnabled Ext_fenced_divs opts && attrs /= nullAttr -> - nowrap (literal ":::" <+> classOrAttrsToMarkdown attrs) $$ - chomp contents $$ - literal ":::" <> blankline + let attrsToMd = if variant == Commonmark + then attrsToMarkdown + else classOrAttrsToMarkdown + in nowrap (literal ":::" <+> attrsToMd attrs) $$ + chomp contents $$ + literal ":::" <> blankline | isEnabled Ext_native_divs opts || (isEnabled Ext_raw_html opts && (variant == Commonmark || -- cgit v1.2.3 From 21cc52abe33997ea2f2c539f10d26684b7633bc0 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 5 Jun 2021 14:13:58 -0600 Subject: LaTeX writer: Fix regression in table header position. In recent versions the table headers were no longer bottom-aligned (if more than one line). This patch fixes that by using minipages for table headers in non-simple tables. Closes #7347. --- src/Text/Pandoc/Writers/LaTeX/Table.hs | 13 ++++++++++--- test/command/5367.md | 8 ++++++-- test/tables.latex | 30 +++++++++++++++++++++++++++--- test/tables/nordics.latex | 26 ++++++++++++++++++++++---- test/tables/planets.latex | 8 ++++---- test/tables/students.latex | 12 ++++++++++-- 6 files changed, 79 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/LaTeX/Table.hs b/src/Text/Pandoc/Writers/LaTeX/Table.hs index 16f63314b..8dc7d1162 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Table.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Table.hs @@ -16,6 +16,7 @@ module Text.Pandoc.Writers.LaTeX.Table ) where import Control.Monad.State.Strict import Data.List (intersperse) +import qualified Data.List.NonEmpty as NonEmpty import Data.List.NonEmpty (NonEmpty ((:|))) import Data.Text (Text) import qualified Data.Text as T @@ -243,8 +244,13 @@ cellToLaTeX :: PandocMonad m -> Ann.Cell -> LW m (Doc Text) cellToLaTeX blockListToLaTeX celltype annotatedCell = do - let (Ann.Cell _specs _colnum cell) = annotatedCell - let (Cell _attr align rowspan colspan blocks) = cell + let (Ann.Cell specs _colnum cell) = annotatedCell + let hasWidths = snd (NonEmpty.head specs) /= ColWidthDefault + let specAlign = fst (NonEmpty.head specs) + let (Cell _attr align' rowspan colspan blocks) = cell + let align = case align' of + AlignDefault -> specAlign + _ -> align' beamer <- gets stBeamer externalNotes <- gets stExternalNotes inMinipage <- gets stInMinipage @@ -256,7 +262,7 @@ cellToLaTeX blockListToLaTeX celltype annotatedCell = do Plain{} -> True _ -> False result <- - if all isPlainOrPara blocks + if not hasWidths || (celltype /= HeaderCell && all isPlainOrPara blocks) then blockListToLaTeX $ walk fixLineBreaks $ walk displayMathToInline blocks else do @@ -290,3 +296,4 @@ cellToLaTeX blockListToLaTeX celltype annotatedCell = do data CellType = HeaderCell | BodyCell + deriving Eq diff --git a/test/command/5367.md b/test/command/5367.md index 2d3a5e52e..a67011c2f 100644 --- a/test/command/5367.md +++ b/test/command/5367.md @@ -24,11 +24,15 @@ hello\footnote{doc footnote} >{\centering\arraybackslash}p{(\columnwidth - 0\tabcolsep) * \real{0.17}}@{}} \caption[Sample table.]{Sample table.\footnote{caption footnote}}\tabularnewline \toprule -Fruit\footnote{header footnote} \\ +\begin{minipage}[b]{\linewidth}\centering +Fruit\footnote{header footnote} +\end{minipage} \\ \midrule \endfirsthead \toprule -Fruit{} \\ +\begin{minipage}[b]{\linewidth}\centering +Fruit{} +\end{minipage} \\ \midrule \endhead Bans\footnote{table cell footnote} \\ diff --git a/test/tables.latex b/test/tables.latex index afa14d845..9d111fa7a 100644 --- a/test/tables.latex +++ b/test/tables.latex @@ -56,11 +56,27 @@ Multiline table with caption: >{\raggedright\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.35}}@{}} \caption{Here's the caption. It may span multiple lines.}\tabularnewline \toprule -Centered Header & Left Aligned & Right Aligned & Default aligned \\ +\begin{minipage}[b]{\linewidth}\centering +Centered Header +\end{minipage} & \begin{minipage}[b]{\linewidth}\raggedright +Left Aligned +\end{minipage} & \begin{minipage}[b]{\linewidth}\raggedleft +Right Aligned +\end{minipage} & \begin{minipage}[b]{\linewidth}\raggedright +Default aligned +\end{minipage} \\ \midrule \endfirsthead \toprule -Centered Header & Left Aligned & Right Aligned & Default aligned \\ +\begin{minipage}[b]{\linewidth}\centering +Centered Header +\end{minipage} & \begin{minipage}[b]{\linewidth}\raggedright +Left Aligned +\end{minipage} & \begin{minipage}[b]{\linewidth}\raggedleft +Right Aligned +\end{minipage} & \begin{minipage}[b]{\linewidth}\raggedright +Default aligned +\end{minipage} \\ \midrule \endhead First & row & 12.0 & Example of a row that spans multiple lines. \\ @@ -76,7 +92,15 @@ Multiline table without caption: >{\raggedleft\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.16}} >{\raggedright\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.35}}@{}} \toprule -Centered Header & Left Aligned & Right Aligned & Default aligned \\ +\begin{minipage}[b]{\linewidth}\centering +Centered Header +\end{minipage} & \begin{minipage}[b]{\linewidth}\raggedright +Left Aligned +\end{minipage} & \begin{minipage}[b]{\linewidth}\raggedleft +Right Aligned +\end{minipage} & \begin{minipage}[b]{\linewidth}\raggedright +Default aligned +\end{minipage} \\ \midrule \endhead First & row & 12.0 & Example of a row that spans multiple lines. \\ diff --git a/test/tables/nordics.latex b/test/tables/nordics.latex index 1b5929bad..1dcac7319 100644 --- a/test/tables/nordics.latex +++ b/test/tables/nordics.latex @@ -5,13 +5,31 @@ >{\raggedright\arraybackslash}p{(\columnwidth - 6\tabcolsep) * \real{0.20}}@{}} \caption{States belonging to the \emph{Nordics.}}\tabularnewline \toprule -Name & Capital & \vtop{\hbox{\strut Population}\hbox{\strut (in 2018)}} & -\vtop{\hbox{\strut Area}\hbox{\strut (in km\textsuperscript{2})}} \\ +\begin{minipage}[b]{\linewidth}\centering +Name +\end{minipage} & \begin{minipage}[b]{\linewidth}\centering +Capital +\end{minipage} & \begin{minipage}[b]{\linewidth}\centering +Population\\ +(in 2018) +\end{minipage} & \begin{minipage}[b]{\linewidth}\centering +Area\\ +(in km\textsuperscript{2}) +\end{minipage} \\ \midrule \endfirsthead \toprule -Name & Capital & \vtop{\hbox{\strut Population}\hbox{\strut (in 2018)}} & -\vtop{\hbox{\strut Area}\hbox{\strut (in km\textsuperscript{2})}} \\ +\begin{minipage}[b]{\linewidth}\centering +Name +\end{minipage} & \begin{minipage}[b]{\linewidth}\centering +Capital +\end{minipage} & \begin{minipage}[b]{\linewidth}\centering +Population\\ +(in 2018) +\end{minipage} & \begin{minipage}[b]{\linewidth}\centering +Area\\ +(in km\textsuperscript{2}) +\end{minipage} \\ \midrule \endhead Denmark & Copenhagen & 5,809,502 & 43,094 \\ diff --git a/test/tables/planets.latex b/test/tables/planets.latex index 8238c43f3..b22c3adeb 100644 --- a/test/tables/planets.latex +++ b/test/tables/planets.latex @@ -1,18 +1,18 @@ \begin{longtable}[]{@{}cclrrrrrrrrl@{}} \caption{Data about the planets of our solar system.}\tabularnewline \toprule -\multicolumn{2}{l}{} & Name & Mass (10\^{}24kg) & Diameter (km) & Density +\multicolumn{2}{c}{} & Name & Mass (10\^{}24kg) & Diameter (km) & Density (kg/m\^{}3) & Gravity (m/s\^{}2) & Length of day (hours) & Distance from Sun (10\^{}6km) & Mean temperature (C) & Number of moons & Notes \\ \midrule \endfirsthead \toprule -\multicolumn{2}{l}{} & Name & Mass (10\^{}24kg) & Diameter (km) & Density +\multicolumn{2}{c}{} & Name & Mass (10\^{}24kg) & Diameter (km) & Density (kg/m\^{}3) & Gravity (m/s\^{}2) & Length of day (hours) & Distance from Sun (10\^{}6km) & Mean temperature (C) & Number of moons & Notes \\ \midrule \endhead -\multicolumn{2}{l}{\multirow{4}{*}{Terrestrial planets}} & Mercury & 0.330 & +\multicolumn{2}{c}{\multirow{4}{*}{Terrestrial planets}} & Mercury & 0.330 & 4,879 & 5427 & 3.7 & 4222.6 & 57.9 & 167 & 0 & Closest to the Sun \\ & & Venus & 4.87 & 12,104 & 5243 & 8.9 & 2802.0 & 108.2 & 464 & 0 & \\ & & Earth & 5.97 & 12,756 & 5514 & 9.8 & 24.0 & 149.6 & 15 & 1 & Our world \\ @@ -24,7 +24,7 @@ planet \\ & \multirow{2}{*}{Ice giants} & Uranus & 86.8 & 51,118 & 1271 & 8.7 & 17.2 & 2872.5 & -195 & 27 & \\ & & Neptune & 102 & 49,528 & 1638 & 11.0 & 16.1 & 4495.1 & -200 & 14 & \\ -\multicolumn{2}{l}{Dwarf planets} & Pluto & 0.0146 & 2,370 & 2095 & 0.7 & +\multicolumn{2}{c}{Dwarf planets} & Pluto & 0.0146 & 2,370 & 2095 & 0.7 & 153.3 & 5906.4 & -225 & 5 & Declassified as a planet in 2006. \\ \bottomrule \end{longtable} diff --git a/test/tables/students.latex b/test/tables/students.latex index 87efb0851..3d4d287d9 100644 --- a/test/tables/students.latex +++ b/test/tables/students.latex @@ -3,11 +3,19 @@ >{\raggedright\arraybackslash}p{(\columnwidth - 2\tabcolsep) * \real{0.50}}@{}} \caption{List of Students}\tabularnewline \toprule -Student ID & Name \\ +\begin{minipage}[b]{\linewidth}\centering +Student ID +\end{minipage} & \begin{minipage}[b]{\linewidth}\centering +Name +\end{minipage} \\ \midrule \endfirsthead \toprule -Student ID & Name \\ +\begin{minipage}[b]{\linewidth}\centering +Student ID +\end{minipage} & \begin{minipage}[b]{\linewidth}\centering +Name +\end{minipage} \\ \midrule \endhead \multicolumn{2}{l}{Computer Science} \\ -- cgit v1.2.3 From 76e5f047b0ef4c293687f9ddce62d601b23058a9 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 8 Jun 2021 08:20:22 -0600 Subject: Citeproc: avoid duplicate classes and attributes on refs div. --- src/Text/Pandoc/Citeproc.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index de63aed1f..ad3b26c0f 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -49,7 +49,6 @@ import qualified Data.Text as T import System.FilePath (takeExtension) import Safe (lastMay, initSafe) - processCitations :: PandocMonad m => Pandoc -> m Pandoc processCitations (Pandoc meta bs) = do style <- getStyle (Pandoc meta bs) @@ -499,7 +498,8 @@ insertRefs refkvs refclasses meta refs bs = put True -- refHeader isn't used if you have an explicit references div let cs' = ordNub $ cs ++ refclasses - return $ Div ("refs",cs' ++ refclasses,kvs ++ refkvs) (xs ++ refs) + let kvs' = ordNub $ kvs ++ refkvs + return $ Div ("refs",cs',kvs') (xs ++ refs) go x = return x refTitle :: Meta -> Maybe [Inline] -- cgit v1.2.3 From 55bcd4b4fb1dced6c6e316db6cd117b52bbadee5 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Thu, 10 Jun 2021 18:26:53 +0200 Subject: Lua utils: fix handling of table headers in `from_simple_table` Passing an empty list of header cells now results in an empty table header. Fixes: #7369 --- src/Text/Pandoc/Lua/Module/Utils.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Lua/Module/Utils.hs b/src/Text/Pandoc/Lua/Module/Utils.hs index 1b04021a7..3ec3afc26 100644 --- a/src/Text/Pandoc/Lua/Module/Utils.hs +++ b/src/Text/Pandoc/Lua/Module/Utils.hs @@ -146,7 +146,7 @@ from_simple_table (SimpleTable capt aligns widths head' body) = do nullAttr (Caption Nothing [Plain capt]) (zipWith (\a w -> (a, toColWidth w)) aligns widths) - (TableHead nullAttr [blockListToRow head']) + (TableHead nullAttr [blockListToRow head' | not (null head') ]) [TableBody nullAttr 0 [] $ map blockListToRow body] (TableFoot nullAttr []) return (NumResults 1) -- cgit v1.2.3 From c7dd33d5aaa30f4c95e141b10e6d8fe0212edf1e Mon Sep 17 00:00:00 2001 From: Albert Krewinkel <albert@zeitkraut.de> Date: Thu, 10 Jun 2021 18:34:20 +0200 Subject: Docx writer: fix handling of empty table headers A table header which does not contain any cells is now treated as an empty header. Fixes: #7369 --- src/Text/Pandoc/Writers/Docx/Table.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs index 13cb31040..49917e315 100644 --- a/src/Text/Pandoc/Writers/Docx/Table.hs +++ b/src/Text/Pandoc/Writers/Docx/Table.hs @@ -116,8 +116,8 @@ cellGridToOpenXML :: PandocMonad m -> [Alignment] -> Part -> WS m [Element] -cellGridToOpenXML blocksToOpenXML rowType aligns part@(Part _ _ rowAttrs) = - if null (indices rowAttrs) +cellGridToOpenXML blocksToOpenXML rowType aligns part@(Part _ cellArray _) = + if null (elems cellArray) then return mempty else mapM (rowToOpenXML blocksToOpenXML) $ partToRows rowType aligns part -- cgit v1.2.3 From aa79b3035c3343adf1bb41b37266049a65ab5da7 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 10 Jun 2021 16:36:54 -0700 Subject: T.P.MIME, extensionFromMimeType: add a few special cases. When we do a reverse lookup in the MIME table, we just get the last match, so when the same mime type is associated with several different extensions, we sometimes got weird results, e.g. `.vs` for `text/plain`. These special cases help us get the most standard extensions for mime types like `text/plain`. --- src/Text/Pandoc/MIME.hs | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src') diff --git a/src/Text/Pandoc/MIME.hs b/src/Text/Pandoc/MIME.hs index 3d06e1579..77c7069e9 100644 --- a/src/Text/Pandoc/MIME.hs +++ b/src/Text/Pandoc/MIME.hs @@ -43,6 +43,16 @@ getMimeTypeDef :: FilePath -> MimeType getMimeTypeDef = fromMaybe "application/octet-stream" . getMimeType extensionFromMimeType :: MimeType -> Maybe T.Text +-- few special cases, where there are multiple options: +extensionFromMimeType "text/plain" = Just "txt" +extensionFromMimeType "video/quicktime" = Just "mov" +extensionFromMimeType "video/mpeg" = Just "mpeg" +extensionFromMimeType "video/dv" = Just "dv" +extensionFromMimeType "image/vnd.djvu" = Just "djvu" +extensionFromMimeType "image/tiff" = Just "tiff" +extensionFromMimeType "image/jpeg" = Just "jpg" +extensionFromMimeType "application/xml" = Just "xml" +extensionFromMimeType "application/ogg" = Just "ogg" extensionFromMimeType mimetype = M.lookup (T.takeWhile (/=';') mimetype) reverseMimeTypes -- note: we just look up the basic mime type, dropping the content-encoding etc. -- cgit v1.2.3 From 3776e828a83048697e5c64d9fb4bedc0145197dc Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Thu, 10 Jun 2021 16:47:02 -0700 Subject: Fix MediaBag regressions. With the 2.14 release `--extract-media` stopped working as before; there could be mismatches between the paths in the rendered document and the extracted media. This patch makes several changes (while keeping the same API). The `mediaPath` in 2.14 was always constructed from the SHA1 hash of the media contents. Now, we preserve the original path unless it's an absolute path or contains `..` segments (in that case we use a path based on the SHA1 hash of the contents). When constructing a path from the SHA1 hash, we always use the original extension, if there is one. Otherwise we look up an appropriate extension for the mime type. `mediaDirectory` and `mediaItems` now use the `mediaPath`, rather than the mediabag key, for the first component of the tuple. This makes more sense, I think, and fits with the documentation of these functions; eventually, though, we should rework the API so that `mediaItems` returns both the keys and the MediaItems. Rewriting of source paths in `extractMedia` has been fixed. `fillMediaBag` has been modified so that it doesn't modify image paths (that was part of the problem in #7345). We now do path normalization (e.g. `\` separators on Windows) only in writing the media; the paths are left unchanged in the image links (sensibly, since they might be URLs and not file paths). These changes should restore the original behavior from before 2.14. Closes #7345. --- MANUAL.txt | 12 +++++------ src/Text/Pandoc/Class/IO.hs | 41 ++++++++++++++++++------------------ src/Text/Pandoc/Class/PandocMonad.hs | 17 ++++++--------- src/Text/Pandoc/MediaBag.hs | 25 ++++++++++++---------- 4 files changed, 47 insertions(+), 48 deletions(-) (limited to 'src') diff --git a/MANUAL.txt b/MANUAL.txt index b3a1f95e2..ef569433a 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -675,12 +675,12 @@ header when requesting a document from a URL: : Extract images and other media contained in or linked from the source document to the path *DIR*, creating it if necessary, and adjust the images references in the document - so they point to the extracted files. If the source format is - a binary container (docx, epub, or odt), the media is - extracted from the container and the original - filenames are used. Otherwise the media is read from the - file system or downloaded, and new filenames are constructed - based on SHA1 hashes of the contents. + so they point to the extracted files. Media are downloaded, + read from the file system, or extracted from a binary + container (e.g. docx), as needed. The original file paths + are used if they are relative paths not containing `..`. + Otherwise filenames are constructed from the SHA1 hash of + the contents. `--abbreviations=`*FILE* diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs index 6df39d4d0..169074860 100644 --- a/src/Text/Pandoc/Class/IO.hs +++ b/src/Text/Pandoc/Class/IO.hs @@ -62,7 +62,7 @@ import Text.Pandoc.Definition (Pandoc, Inline (Image)) import Text.Pandoc.Error (PandocError (..)) import Text.Pandoc.Logging (LogMessage (..), messageVerbosity, showLogMessage) import Text.Pandoc.MIME (MimeType) -import Text.Pandoc.MediaBag (MediaBag, MediaItem(..), lookupMedia, mediaDirectory) +import Text.Pandoc.MediaBag (MediaBag, MediaItem(..), lookupMedia, mediaItems) import Text.Pandoc.Walk (walk) import qualified Control.Exception as E import qualified Data.ByteString as B @@ -200,31 +200,32 @@ alertIndent (l:ls) = do extractMedia :: (PandocMonad m, MonadIO m) => FilePath -> Pandoc -> m Pandoc extractMedia dir d = do media <- getMediaBag - case [fp | (fp, _, _) <- mediaDirectory media] of - [] -> return d - fps -> do - mapM_ (writeMedia dir media) fps - return $ walk (adjustImagePath dir fps) d + let items = mediaItems media + if null items + then return d + else do + mapM_ (writeMedia dir) items + return $ walk (adjustImagePath dir media) d -- | Write the contents of a media bag to a path. writeMedia :: (PandocMonad m, MonadIO m) - => FilePath -> MediaBag -> FilePath + => FilePath + -> (FilePath, MimeType, BL.ByteString) -> m () -writeMedia dir mediabag subpath = do - let mbcontents = lookupMedia subpath mediabag - case mbcontents of - Nothing -> throwError $ PandocResourceNotFound $ pack subpath - Just item -> do - -- we normalize to get proper path separators for the platform - let fullpath = dir </> normalise (mediaPath item) - liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) - logIOError $ BL.writeFile fullpath $ mediaContents item +writeMedia dir (fp, _mt, bs) = do + -- we normalize to get proper path separators for the platform + let fullpath = normalise $ dir </> fp + liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) + logIOError $ BL.writeFile fullpath bs -- | If the given Inline element is an image with a @src@ path equal to -- one in the list of @paths@, then prepends @dir@ to the image source; -- returns the element unchanged otherwise. -adjustImagePath :: FilePath -> [FilePath] -> Inline -> Inline -adjustImagePath dir paths (Image attr lab (src, tit)) - | unpack src `elem` paths - = Image attr lab (pack (normalise $ dir </> unpack src), tit) +adjustImagePath :: FilePath -> MediaBag -> Inline -> Inline +adjustImagePath dir mediabag (Image attr lab (src, tit)) = + case lookupMedia (T.unpack src) mediabag of + Nothing -> Image attr lab (src, tit) + Just item -> + let fullpath = dir </> mediaPath item + in Image attr lab (T.pack fullpath, tit) adjustImagePath _ _ x = x diff --git a/src/Text/Pandoc/Class/PandocMonad.hs b/src/Text/Pandoc/Class/PandocMonad.hs index 4eb80df29..439aec071 100644 --- a/src/Text/Pandoc/Class/PandocMonad.hs +++ b/src/Text/Pandoc/Class/PandocMonad.hs @@ -638,17 +638,12 @@ fillMediaBag d = walkM handleImage d handleImage (Image attr lab (src, tit)) = catchError (do mediabag <- getMediaBag let fp = T.unpack src - src' <- T.pack <$> case lookupMedia fp mediabag of - Just item -> return $ mediaPath item - Nothing -> do - (bs, mt) <- fetchItem src - insertMedia fp mt (BL.fromStrict bs) - mediabag' <- getMediaBag - case lookupMedia fp mediabag' of - Just item -> return $ mediaPath item - Nothing -> throwError $ PandocSomeError $ - src <> " not successfully inserted into MediaBag" - return $ Image attr lab (src', tit)) + case lookupMedia fp mediabag of + Just _ -> return () + Nothing -> do + (bs, mt) <- fetchItem src + insertMedia fp mt (BL.fromStrict bs) + return $ Image attr lab (src, tit)) (\e -> case e of PandocResourceNotFound _ -> do diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index a65f315fc..06fba5632 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -71,16 +71,21 @@ insertMedia :: FilePath -- ^ relative path and canonical name of resource -> MediaBag -> MediaBag insertMedia fp mbMime contents (MediaBag mediamap) = - MediaBag (M.insert (canonicalize fp) mediaItem mediamap) - where mediaItem = MediaItem{ mediaPath = showDigest (sha1 contents) <> - "." <> ext + MediaBag (M.insert fp' mediaItem mediamap) + where mediaItem = MediaItem{ mediaPath = newpath , mediaContents = contents , mediaMimeType = mt } + fp' = canonicalize fp + newpath = if isRelative fp && ".." `notElem` splitPath fp + then T.unpack fp' + else showDigest (sha1 contents) <> "." <> ext fallback = case takeExtension fp of ".gz" -> getMimeTypeDef $ dropExtension fp _ -> getMimeTypeDef fp mt = fromMaybe fallback mbMime - ext = maybe (takeExtension fp) T.unpack $ extensionFromMimeType mt + ext = case takeExtension fp of + '.':e -> e + _ -> maybe "" T.unpack $ extensionFromMimeType mt -- | Lookup a media item in a 'MediaBag', returning mime type and contents. @@ -92,13 +97,11 @@ lookupMedia fp (MediaBag mediamap) = M.lookup (canonicalize fp) mediamap -- | Get a list of the file paths stored in a 'MediaBag', with -- their corresponding mime types and the lengths in bytes of the contents. mediaDirectory :: MediaBag -> [(FilePath, MimeType, Int)] -mediaDirectory (MediaBag mediamap) = - M.foldrWithKey (\fp item -> - ((T.unpack fp, mediaMimeType item, - fromIntegral (BL.length (mediaContents item))):)) [] mediamap +mediaDirectory mediabag = + map (\(fp, mt, bs) -> (fp, mt, fromIntegral (BL.length bs))) + (mediaItems mediabag) mediaItems :: MediaBag -> [(FilePath, MimeType, BL.ByteString)] mediaItems (MediaBag mediamap) = - M.foldrWithKey (\fp item -> - ((T.unpack fp, mediaMimeType item, mediaContents item):)) - [] mediamap + map (\item -> (mediaPath item, mediaMimeType item, mediaContents item)) + (M.elems mediamap) -- cgit v1.2.3 From b0cd6c622494666add6bdd7674ec5b7791bc83d0 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 12 Jun 2021 10:16:44 -0700 Subject: Fix regression in citeproc processing. If inline references are used (in the metadata `references` field), we should still only include in the bibliography items that are actually cited -- unless `nocite` is used. Closes #7376. --- src/Text/Pandoc/Citeproc.hs | 4 +++- test/command/7376.md | 16 ++++++++++++++++ test/command/pandoc-citeproc-356.md | 9 ++------- 3 files changed, 21 insertions(+), 8 deletions(-) create mode 100644 test/command/7376.md (limited to 'src') diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index ad3b26c0f..a5b26c9b4 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -194,7 +194,9 @@ getReferences mblocale (Pandoc meta bs) = do then const True else (`Set.member` citeIds) let inlineRefs = case lookupMeta "references" meta of - Just (MetaList rs) -> mapMaybe metaValueToReference rs + Just (MetaList rs) -> + filter (idpred . unItemId . referenceId) + $ mapMaybe metaValueToReference rs _ -> [] externalRefs <- case lookupMeta "bibliography" meta of Just (MetaList xs) -> diff --git a/test/command/7376.md b/test/command/7376.md new file mode 100644 index 000000000..229c61cfb --- /dev/null +++ b/test/command/7376.md @@ -0,0 +1,16 @@ +``` +% pandoc --citeproc -t plain +--- +references: +- id: item1 + type: book + author: + - family: Doe + given: Jane + issued: 2020 + title: The title +... +^D + + +``` diff --git a/test/command/pandoc-citeproc-356.md b/test/command/pandoc-citeproc-356.md index 4463ef63f..b4f998dae 100644 --- a/test/command/pandoc-citeproc-356.md +++ b/test/command/pandoc-citeproc-356.md @@ -15,11 +15,6 @@ references: [@bar] ^D -(Alice 2042) - -::: {#refs .references .csl-bib-body .hanging-indent} -::: {#ref-foo .csl-entry} -Alice. 2042. -::: -::: +[WARNING] Citeproc: citation bar not found +(**bar?**) ``` -- cgit v1.2.3 From ea53a1dc5c68e6a1a2e450422147a66fd2aa9efd Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 12 Jun 2021 10:20:19 -0700 Subject: Markdown writer: allow `pipe_tables` to be disabled for commonmark... (commonmark_x, gfm). Closes #7375. --- src/Text/Pandoc/Writers/Markdown.hs | 1 - 1 file changed, 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index b16c71358..425ea07ca 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -75,7 +75,6 @@ writeCommonMark opts document = -- we set them here so that escapeText will behave -- properly. enableExtension Ext_all_symbols_escapable $ - enableExtension Ext_pipe_tables $ enableExtension Ext_intraword_underscores $ writerExtensions opts } -- cgit v1.2.3 From cfa26e3ca0346397f41af9aed5b4cd1d86be1220 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sat, 12 Jun 2021 13:56:09 -0700 Subject: Docx reader: handle absolute URIs in Relationship Target. Closes #7374. --- src/Text/Pandoc/Readers/Docx/Parse.hs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Docx/Parse.hs b/src/Text/Pandoc/Readers/Docx/Parse.hs index aaa8f4ad0..dbb16a821 100644 --- a/src/Text/Pandoc/Readers/Docx/Parse.hs +++ b/src/Text/Pandoc/Readers/Docx/Parse.hs @@ -479,20 +479,26 @@ filePathToRelType path docXmlPath = then Just InDocument else Nothing -relElemToRelationship :: DocumentLocation -> Element -> Maybe Relationship -relElemToRelationship relType element | qName (elName element) == "Relationship" = +relElemToRelationship :: FilePath -> DocumentLocation -> Element + -> Maybe Relationship +relElemToRelationship fp relType element | qName (elName element) == "Relationship" = do relId <- findAttr (QName "Id" Nothing Nothing) element target <- findAttr (QName "Target" Nothing Nothing) element - return $ Relationship relType relId target -relElemToRelationship _ _ = Nothing + -- target may be relative (media/image1.jpeg) or absolute + -- (/word/media/image1.jpeg); we need to relativize it (see #7374) + let frontOfFp = T.pack $ takeWhile (/= '_') fp + let target' = fromMaybe target $ + T.stripPrefix frontOfFp $ T.dropWhile (== '/') target + return $ Relationship relType relId target' +relElemToRelationship _ _ _ = Nothing filePathToRelationships :: Archive -> FilePath -> FilePath -> [Relationship] filePathToRelationships ar docXmlPath fp | Just relType <- filePathToRelType fp docXmlPath , Just entry <- findEntryByPath fp ar , Just relElems <- parseXMLFromEntry entry = - mapMaybe (relElemToRelationship relType) $ elChildren relElems + mapMaybe (relElemToRelationship fp relType) $ elChildren relElems filePathToRelationships _ _ _ = [] archiveToRelationships :: Archive -> FilePath -> [Relationship] -- cgit v1.2.3 From 3fb5499dd638dae7156fba63ba5c1522bed5e46d Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 18 Jun 2021 12:06:20 -0700 Subject: insertMediaBag: ensure we get sane mediaPath for URLs. Long URLs cannot be treated as mediaPaths, but System.FilePath's `isRelative` often returns True for them. So we add a check for an absolute URL. We also ensure that extensions are derived only from the path portion of URLs (previously a following query was being included). Closes #7391. --- src/Text/Pandoc/MediaBag.hs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/MediaBag.hs b/src/Text/Pandoc/MediaBag.hs index 06fba5632..098e484ee 100644 --- a/src/Text/Pandoc/MediaBag.hs +++ b/src/Text/Pandoc/MediaBag.hs @@ -26,13 +26,14 @@ module Text.Pandoc.MediaBag ( import qualified Data.ByteString.Lazy as BL import Data.Data (Data) import qualified Data.Map as M -import Data.Maybe (fromMaybe) +import Data.Maybe (fromMaybe, isNothing) import Data.Typeable (Typeable) import System.FilePath import Text.Pandoc.MIME (MimeType, getMimeTypeDef, extensionFromMimeType) import Data.Text (Text) import qualified Data.Text as T import Data.Digest.Pure.SHA (sha1, showDigest) +import Network.URI (URI (..), parseURI) data MediaItem = MediaItem @@ -76,16 +77,20 @@ insertMedia fp mbMime contents (MediaBag mediamap) = , mediaContents = contents , mediaMimeType = mt } fp' = canonicalize fp - newpath = if isRelative fp && ".." `notElem` splitPath fp + uri = parseURI fp + newpath = if isRelative fp + && isNothing uri + && ".." `notElem` splitPath fp then T.unpack fp' else showDigest (sha1 contents) <> "." <> ext fallback = case takeExtension fp of ".gz" -> getMimeTypeDef $ dropExtension fp _ -> getMimeTypeDef fp mt = fromMaybe fallback mbMime - ext = case takeExtension fp of - '.':e -> e - _ -> maybe "" T.unpack $ extensionFromMimeType mt + path = maybe fp uriPath uri + ext = case takeExtension path of + '.':e -> e + _ -> maybe "" T.unpack $ extensionFromMimeType mt -- | Lookup a media item in a 'MediaBag', returning mime type and contents. -- cgit v1.2.3 From 82ad855f38b8fa8dc1cbfc14fa294dfd5f9f02ab Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 21 Jun 2021 08:49:00 -0700 Subject: Markdown writer: Fix regression in code blocks with attributes. Code blocks with a single class but nonempty attributes were having attributes drop as a result of #7242. Closes #7397. --- src/Text/Pandoc/Writers/Markdown.hs | 6 +++--- test/command/7397.md | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 test/command/7397.md (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index 425ea07ca..b13ab57ee 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -256,10 +256,10 @@ noteToMarkdown opts num blocks = do then hang (writerTabStop opts) (marker <> spacer) contents else marker <> spacer <> contents --- | (Code) blocks with a single class can just use it standalone, --- no need to bother with curly braces. +-- | (Code) blocks with a single class and no attributes can just use it +-- standalone, no need to bother with curly braces. classOrAttrsToMarkdown :: Attr -> Doc Text -classOrAttrsToMarkdown ("",[cls],_) = literal cls +classOrAttrsToMarkdown ("",[cls],[]) = literal cls classOrAttrsToMarkdown attrs = attrsToMarkdown attrs -- | Ordered list start parser for use in Para below. diff --git a/test/command/7397.md b/test/command/7397.md new file mode 100644 index 000000000..ca8b6a482 --- /dev/null +++ b/test/command/7397.md @@ -0,0 +1,14 @@ +``` +% pandoc -t markdown +~~~~ { .haskell startFrom="100"} +qsort [] = [] +qsort (x:xs) = qsort (filter (< x) xs) ++ [x] ++ + qsort (filter (>= x) xs) +~~~~ +^D +``` {.haskell startFrom="100"} +qsort [] = [] +qsort (x:xs) = qsort (filter (< x) xs) ++ [x] ++ + qsort (filter (>= x) xs) +``` +``` -- cgit v1.2.3 From 14b2eb2aeb21b3dee7d07c7348ebd2c586d6a866 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 21 Jun 2021 16:40:52 -0700 Subject: reveal.js writer: better handling of options. Previously it was impossible to specify false values for options that default to true; setting the option to false just caused the portion of the template setting the option to be omitted. Now we prepopulate all the variables with their default values, including them unconditionally and allowing them to be overridden. --- data/templates/default.revealjs | 198 ++++++++++++++-------------------------- src/Text/Pandoc/Writers/HTML.hs | 50 ++++++++++ 2 files changed, 120 insertions(+), 128 deletions(-) (limited to 'src') diff --git a/data/templates/default.revealjs b/data/templates/default.revealjs index 8a77674dd..203983522 100644 --- a/data/templates/default.revealjs +++ b/data/templates/default.revealjs @@ -90,256 +90,198 @@ $endif$ // Full list of configuration options available at: // https://revealjs.com/config/ Reveal.initialize({ -$if(center)$ - // Determines whether slide content should be vertically centered - center: $center$, -$endif$ -$if(controls)$ // Display controls in the bottom right corner controls: $controls$, -$endif$ -$if(controlsTutorial)$ + // Help the user learn the controls by providing hints, for example by // bouncing the down arrow when they first encounter a vertical slide controlsTutorial: $controlsTutorial$, -$endif$ -$if(controlsLayout)$ + // Determines where controls appear, "edges" or "bottom-right" controlsLayout: '$controlsLayout$', -$endif$ -$if(controlsBackArrows)$ + // Visibility rule for backwards navigation arrows; "faded", "hidden" // or "visible" controlsBackArrows: '$controlsBackArrows$', -$endif$ -$if(progress)$ + // Display a presentation progress bar progress: $progress$, -$endif$ -$if(slideNumber)$ + // Display the page number of the current slide slideNumber: $slideNumber$, -$endif$ -$if(showSlideNumber)$ + // 'all', 'print', or 'speaker' showSlideNumber: '$showSlideNumber$', -$endif$ + // Add the current slide number to the URL hash so that reloading the // page/copying the URL will return you to the same slide - hash: $if(hash)$$hash$$else$true$endif$, -$if(hashOneBasedIndex)$ + hash: $hash$, + // Start with 1 for the hash rather than 0 hashOneBasedIndex: $hashOneBasedIndex$, -$endif$ -$if(history)$ + + // Flags if we should monitor the hash and change slides accordingly + respondToHashChanges: $respondToHashChanges$, + // Push each slide change to the browser history history: $history$, -$endif$ -$if(keyboard)$ + // Enable keyboard shortcuts for navigation keyboard: $keyboard$, -$endif$ -$if(overview)$ + // Enable the slide overview mode overview: $overview$, -$endif$ -$if(center)$ + + // Disables the default reveal.js slide layout (scaling and centering) + // so that you can use custom CSS layout + disableLayout: false, + // Vertical centering of slides center: $center$, -$endif$ -$if(touch)$ + // Enables touch navigation on devices with touch input touch: $touch$, -$endif$ -$if(loop)$ + // Loop the presentation loop: $loop$, -$endif$ -$if(rtl)$ + // Change the presentation direction to be RTL rtl: $rtl$, -$endif$ -$if(navigationMode)$ + // see https://revealjs.com/vertical-slides/#navigation-mode navigationMode: '$navigationMode$', -$endif$ -$if(shuffle)$ + // Randomizes the order of slides each time the presentation loads shuffle: $shuffle$, -$endif$ -$if(fragments)$ + // Turns fragments on and off globally fragments: $fragments$, -$endif$ -$if(fragmentInURL)$ + // Flags whether to include the current fragment in the URL, // so that reloading brings you to the same fragment position fragmentInURL: $fragmentInURL$, -$endif$ -$if(embedded)$ + // Flags if the presentation is running in an embedded mode, // i.e. contained within a limited portion of the screen embedded: $embedded$, -$endif$ -$if(help)$ + // Flags if we should show a help overlay when the questionmark // key is pressed help: $help$, -$endif$ -$if(pause)$ - // Flags if it should be possible to pause the presentation (blackout) - pause: $pause$, -$endif$ -$if(showNotes)$ + + // Flags if it should be possible to pause the presentation (blackout) + pause: $pause$, + // Flags if speaker notes should be visible to all viewers showNotes: $showNotes$, -$endif$ -$if(autoPlayMedia)$ - // Global override for autoplaying embedded media (video/audio/iframe) - // - null: Media will only autoplay if data-autoplay is present - // - true: All media will autoplay, regardless of individual setting - // - false: No media will autoplay, regardless of individual setting + + // Global override for autoplaying embedded media (null/true/false) autoPlayMedia: $autoPlayMedia$, -$endif$ -$if(preloadIframes)$ - // Global override for preloading lazy-loaded iframes - // - null: Iframes with data-src AND data-preload will be loaded when within - // the viewDistance, iframes with only data-src will be loaded when visible - // - true: All iframes with data-src will be loaded when within the viewDistance - // - false: All iframes with data-src will be loaded only when visible + + // Global override for preloading lazy-loaded iframes (null/true/false) preloadIframes: $preloadIframes$, -$endif$ -$if(autoSlide)$ + // Number of milliseconds between automatically proceeding to the // next slide, disabled when set to 0, this value can be overwritten // by using a data-autoslide attribute on your slides autoSlide: $autoSlide$, -$endif$ -$if(autoSlideStoppable)$ + // Stop auto-sliding after user input autoSlideStoppable: $autoSlideStoppable$, -$endif$ -$if(autoSlideMethod)$ + // Use this method for navigation when auto-sliding autoSlideMethod: $autoSlideMethod$, -$endif$ -$if(defaultTiming)$ + // Specify the average time in seconds that you think you will spend // presenting each slide. This is used to show a pacing timer in the // speaker view defaultTiming: $defaultTiming$, -$endif$ -$if(totalTime)$ - // Specify the total time in seconds that is available to - // present. If this is set to a nonzero value, the pacing - // timer will work out the time available for each slide, - // instead of using the defaultTiming value - totalTime: $totalTime$, -$endif$ -$if(minimumTimePerSlide)$ - // Specify the minimum amount of time you want to allot to - // each slide, if using the totalTime calculation method. If - // the automated time allocation causes slide pacing to fall - // below this threshold, then you will see an alert in the - // speaker notes window - minimumTimePerSlide: $minimumTimePerSlide$, -$endif$ -$if(mouseWheel)$ + // Enable slide navigation via mouse wheel mouseWheel: $mouseWheel$, -$endif$ -$if(rollingLinks)$ - // Apply a 3D roll to links on hover - rollingLinks: $rollingLinks$, -$endif$ -$if(hideInactiveCursor)$ + + // The display mode that will be used to show slides + display: '$display$', + // Hide cursor if inactive hideInactiveCursor: $hideInactiveCursor$, -$endif$ -$if(hideCursorTime)$ + // Time before the cursor is hidden (in ms) hideCursorTime: $hideCursorTime$, -$endif$ -$if(hideAddressBar)$ - // Hides the address bar on mobile devices - hideAddressBar: $hideAddressBar$, -$endif$ -$if(previewLinks)$ + // Opens links in an iframe preview overlay previewLinks: $previewLinks$, -$endif$ -$if(transition)$ - // Transition style - transition: '$transition$', // none/fade/slide/convex/concave/zoom -$endif$ -$if(transitionSpeed)$ - // Transition speed - transitionSpeed: '$transitionSpeed$', // default/fast/slow -$endif$ -$if(backgroundTransition)$ + + // Transition style (none/fade/slide/convex/concave/zoom) + transition: '$transition$', + + // Transition speed (default/fast/slow) + transitionSpeed: '$transitionSpeed$', + // Transition style for full page slide backgrounds - backgroundTransition: '$backgroundTransition$', // none/fade/slide/convex/concave/zoom -$endif$ -$if(viewDistance)$ + // (none/fade/slide/convex/concave/zoom) + backgroundTransition: '$backgroundTransition$', + // Number of slides away from the current that are visible viewDistance: $viewDistance$, -$endif$ -$if(mobileViewDistance)$ + // Number of slides away from the current that are visible on mobile // devices. It is advisable to set this to a lower number than // viewDistance in order to save resources. mobileViewDistance: $mobileViewDistance$, -$endif$ $if(parallaxBackgroundImage)$ + // Parallax background image parallaxBackgroundImage: '$parallaxBackgroundImage$', // e.g. "'https://s3.amazonaws.com/hakim-static/reveal-js/reveal-parallax-1.jpg'" $else$ $if(background-image)$ + // Parallax background image parallaxBackgroundImage: '$background-image$', // e.g. "'https://s3.amazonaws.com/hakim-static/reveal-js/reveal-parallax-1.jpg'" $endif$ $endif$ $if(parallaxBackgroundSize)$ + // Parallax background size parallaxBackgroundSize: '$parallaxBackgroundSize$', // CSS syntax, e.g. "2100px 900px" $endif$ $if(parallaxBackgroundHorizontal)$ + // Amount to move parallax background (horizontal and vertical) on slide change // Number, e.g. 100 parallaxBackgroundHorizontal: $parallaxBackgroundHorizontal$, $endif$ $if(parallaxBackgroundVertical)$ + parallaxBackgroundVertical: $parallaxBackgroundVertical$, $endif$ $if(width)$ + // The "normal" size of the presentation, aspect ratio will be preserved // when the presentation is scaled to fit different resolutions. Can be // specified using percentage units. width: $width$, $endif$ $if(height)$ + height: $height$, $endif$ $if(margin)$ + // Factor of the display size that should remain empty around the content margin: $margin$, $endif$ $if(minScale)$ + // Bounds for smallest/largest possible scale to apply to content minScale: $minScale$, $endif$ $if(maxScale)$ + maxScale: $maxScale$, $endif$ -$if(zoomKey)$ - // Modifier key used to click-zoom to part of the slide - zoomKey: '$zoomKey$', -$endif$ -$if(display)$ - // The display mode that will be used to show slides - display: '$display$', -$endif$ $if(mathjax)$ + math: { mathjax: '$mathjaxurl$', config: 'TeX-AMS_HTML-full', diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index b1dd9a659..4d513df3b 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -317,6 +317,10 @@ pandocToHtml opts (Pandoc meta blocks) = do | otherwise -> mempty Nothing -> mempty let mCss :: Maybe [Text] = lookupContext "css" metadata + let true :: Text + true = "true" + let false :: Text + false = "false" let context = (if stHighlighting st then case writerHighlightStyle opts of Just sty -> defField "highlighting-css" @@ -344,6 +348,52 @@ pandocToHtml opts (Pandoc meta blocks) = do PlainMath -> defField "displaymath-css" True WebTeX _ -> defField "displaymath-css" True _ -> id) . + (if slideVariant == RevealJsSlides + then -- set boolean options explicitly, since + -- template can't distinguish False/undefined + defField "controls" true . + defField "controlsTutorial" true . + defField "controlsLayout" ("bottom-right" :: Text) . + defField "controlsBackArrows" ("faded" :: Text) . + defField "progress" true . + defField "slideNumber" false . + defField "showSlideNumber" ("all" :: Text) . + defField "hashOneBasedIndex" false . + defField "hash" false . + defField "respondToHashChanges" true . + defField "history" false . + defField "keyboard" true . + defField "overview" true . + defField "disableLayout" false . + defField "center" true . + defField "touch" true . + defField "loop" false . + defField "rtl" false . + defField "navigationMode" ("default" :: Text) . + defField "shuffle" false . + defField "fragments" true . + defField "fragmentInURL" true . + defField "embedded" false . + defField "help" true . + defField "pause" true . + defField "showNotes" false . + defField "autoPlayMedia" ("null" :: Text) . + defField "preloadIframes" ("null" :: Text) . + defField "autoSlide" ("0" :: Text) . + defField "autoSlideStoppable" true . + defField "autoSlideMethod" ("null" :: Text) . + defField "defaultTiming" ("null" :: Text) . + defField "mouseWheel" false . + defField "display" ("block" :: Text) . + defField "hideInactiveCursor" true . + defField "hideCursorTime" ("5000" :: Text) . + defField "previewLinks" false . + defField "transition" ("slide" :: Text) . + defField "transitionSpeed" ("default" :: Text) . + defField "backgroundTransition" ("fade" :: Text) . + defField "viewDistance" ("3" :: Text) . + defField "mobileViewDistance" ("2" :: Text) + else id) . defField "document-css" (isNothing mCss && slideVariant == NoSlides) . defField "quotes" (stQuotes st) . -- for backwards compatibility we populate toc -- cgit v1.2.3 From eee648447a05accb5680b5a1a6e69a7765af2db5 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 21 Jun 2021 18:25:07 -0700 Subject: LaTeX writer: Use `\strut` instead of `~` before `\\` in empty line. --- src/Text/Pandoc/Writers/LaTeX.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 978f94ea0..063e347fb 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -919,7 +919,7 @@ inlineToLaTeX il@(RawInline f str) = do inlineToLaTeX LineBreak = do emptyLine <- gets stEmptyLine setEmptyLine True - return $ (if emptyLine then "~" else "") <> "\\\\" <> cr + return $ (if emptyLine then "\\strut " else "") <> "\\\\" <> cr inlineToLaTeX SoftBreak = do wrapText <- gets (writerWrapText . stOptions) case wrapText of -- cgit v1.2.3 From ed3974a254c3e0c4e7a34d5d25ddef90c25d2092 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 21 Jun 2021 18:25:36 -0700 Subject: LaTeX writer: always use a minipage for cells with line breaks... if width information is available. Otherwise the way we treat them can lead to content that overflows a cell. Closes #7393. --- src/Text/Pandoc/Writers/LaTeX/Table.hs | 9 +++++++-- test/command/7272.md | 5 ++++- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/LaTeX/Table.hs b/src/Text/Pandoc/Writers/LaTeX/Table.hs index 8dc7d1162..abdc26649 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Table.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Table.hs @@ -26,7 +26,8 @@ import Text.DocLayout ( Doc, braces, cr, empty, hcat, hsep, isEmpty, literal, nest , text, vcat, ($$) ) import Text.Pandoc.Shared (blocksToInlines, splitBy, tshow) -import Text.Pandoc.Walk (walk) +import Text.Pandoc.Walk (walk, query) +import Data.Monoid (Any(..)) import Text.Pandoc.Writers.LaTeX.Caption (getCaption) import Text.Pandoc.Writers.LaTeX.Notes (notesToLaTeX) import Text.Pandoc.Writers.LaTeX.Types @@ -261,8 +262,12 @@ cellToLaTeX blockListToLaTeX celltype annotatedCell = do Para{} -> True Plain{} -> True _ -> False + let hasLineBreak LineBreak = Any True + hasLineBreak _ = Any False result <- - if not hasWidths || (celltype /= HeaderCell && all isPlainOrPara blocks) + if not hasWidths || (celltype /= HeaderCell + && all isPlainOrPara blocks + && not (getAny (query hasLineBreak blocks))) then blockListToLaTeX $ walk fixLineBreaks $ walk displayMathToInline blocks else do diff --git a/test/command/7272.md b/test/command/7272.md index d3a3b2137..3b9064c9c 100644 --- a/test/command/7272.md +++ b/test/command/7272.md @@ -18,7 +18,10 @@ >{\raggedright\arraybackslash}p{(\columnwidth - 0\tabcolsep) * \real{1.00}}@{}} \toprule \endhead -{\vtop{\hbox{\strut text}\hbox{\strut text2 }}} \\ +\begin{minipage}[t]{\linewidth}\raggedright +{ text\\ +text2 } +\end{minipage} \\ \bottomrule \end{longtable} ``` -- cgit v1.2.3 From 0352f7845bfa2053797850c3639414978285b63e Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 21 Jun 2021 22:35:07 -0700 Subject: Improve emailAddress in Text.Pandoc.Parsing. Previously the parser would accept characters in domains that are illegal in domains, and this sometimes caused it to gobble bits of the following text. Closes #7398. Note that this change, by itself, caused some txt2tag reader tests to fail. txt2tags allows bare email addresses with a following form query. So, in addition to the change to emailAddress, we modify the txt2tags parser so it can still handle these cases. --- src/Text/Pandoc/Parsing.hs | 7 +++---- src/Text/Pandoc/Readers/Txt2Tags.hs | 22 +++++++++++++++++++++- 2 files changed, 24 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 0bb794ba1..082d9565b 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -693,13 +693,12 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain) mailbox = intercalate "." <$> (emailWord `sepBy1'` dot) domain = intercalate "." <$> (subdomain `sepBy1'` dot) dot = char '.' - subdomain = many1 $ alphaNum <|> innerPunct + subdomain = many1 $ alphaNum <|> innerPunct (=='-') -- this excludes some valid email addresses, since an -- email could contain e.g. '__', but gives better results -- for our purposes, when combined with markdown parsing: - innerPunct = try (satisfy (\c -> isEmailPunct c || c == '@') - <* notFollowedBy space - <* notFollowedBy (satisfy isPunctuation)) + innerPunct f = try (satisfy f + <* notFollowedBy (satisfy (not . isAlphaNum))) -- technically an email address could begin with a symbol, -- but allowing this creates too many problems. -- See e.g. https://github.com/jgm/pandoc/issues/2940 diff --git a/src/Text/Pandoc/Readers/Txt2Tags.hs b/src/Text/Pandoc/Readers/Txt2Tags.hs index 6f92f0063..b5cf5a0f3 100644 --- a/src/Text/Pandoc/Readers/Txt2Tags.hs +++ b/src/Text/Pandoc/Readers/Txt2Tags.hs @@ -478,9 +478,29 @@ macro = try $ do -- raw URLs in text are automatically linked url :: T2T Inlines url = try $ do - (rawUrl, escapedUrl) <- try uri <|> emailAddress + (rawUrl, escapedUrl) <- try uri <|> emailAddress' return $ B.link rawUrl "" (B.str escapedUrl) +emailAddress' :: T2T (Text, Text) +emailAddress' = do + (base, mailURI) <- emailAddress + query <- option "" emailQuery + return (base <> query, mailURI <> query) + +emailQuery :: T2T Text +emailQuery = do + char '?' + parts <- kv `sepBy1` (char '&') + return $ "?" <> T.intercalate "&" parts + +kv :: T2T Text +kv = do + k <- T.pack <$> many1 alphaNum + char '=' + let vchar = alphaNum <|> try (oneOf "%._/~:,=$@&+-;*" <* lookAhead alphaNum) + v <- T.pack <$> many1 vchar + return (k <> "=" <> v) + uri :: T2T (Text, Text) uri = try $ do address <- t2tURI -- cgit v1.2.3 From e2a7ecb5f73b12c8141ebf873a494652fc53babd Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 21 Jun 2021 23:17:43 -0700 Subject: LaTeX writer: put a strut after a line break (`\\`). This ensures that we have proper spacing before the next line (which might e.g. be a table bottom border). This gives better results in cases like test/command/7272.md. --- src/Text/Pandoc/Writers/LaTeX.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 063e347fb..32d1bc2a5 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -919,7 +919,7 @@ inlineToLaTeX il@(RawInline f str) = do inlineToLaTeX LineBreak = do emptyLine <- gets stEmptyLine setEmptyLine True - return $ (if emptyLine then "\\strut " else "") <> "\\\\" <> cr + return $ (if emptyLine then "\\strut " else "") <> "\\\\ \\strut" <> cr inlineToLaTeX SoftBreak = do wrapText <- gets (writerWrapText . stOptions) case wrapText of -- cgit v1.2.3 From 9867231779c32ddc2cce85d869de2ef586707244 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 21 Jun 2021 23:19:40 -0700 Subject: Revert "LaTeX writer: put a strut after a line break (`\\`)." This reverts commit e2a7ecb5f73b12c8141ebf873a494652fc53babd. --- src/Text/Pandoc/Writers/LaTeX.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 32d1bc2a5..063e347fb 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -919,7 +919,7 @@ inlineToLaTeX il@(RawInline f str) = do inlineToLaTeX LineBreak = do emptyLine <- gets stEmptyLine setEmptyLine True - return $ (if emptyLine then "\\strut " else "") <> "\\\\ \\strut" <> cr + return $ (if emptyLine then "\\strut " else "") <> "\\\\" <> cr inlineToLaTeX SoftBreak = do wrapText <- gets (writerWrapText . stOptions) case wrapText of -- cgit v1.2.3 From 8eed5b90d09a4a0c2592c92215fa96c69cf35234 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 21 Jun 2021 23:31:27 -0700 Subject: LaTeX writer: add strut at end of minipage if it contains... line breaks. Without them, the last line is shorter than it should be, at least in some cases. --- src/Text/Pandoc/Writers/LaTeX/Table.hs | 7 +++++-- test/command/7272.md | 2 +- test/tables/nordics.latex | 8 ++++---- 3 files changed, 10 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/LaTeX/Table.hs b/src/Text/Pandoc/Writers/LaTeX/Table.hs index abdc26649..27a8a0257 100644 --- a/src/Text/Pandoc/Writers/LaTeX/Table.hs +++ b/src/Text/Pandoc/Writers/LaTeX/Table.hs @@ -264,10 +264,11 @@ cellToLaTeX blockListToLaTeX celltype annotatedCell = do _ -> False let hasLineBreak LineBreak = Any True hasLineBreak _ = Any False + let hasLineBreaks = getAny $ query hasLineBreak blocks result <- if not hasWidths || (celltype /= HeaderCell && all isPlainOrPara blocks - && not (getAny (query hasLineBreak blocks))) + && not hasLineBreaks) then blockListToLaTeX $ walk fixLineBreaks $ walk displayMathToInline blocks else do @@ -280,7 +281,9 @@ cellToLaTeX blockListToLaTeX celltype annotatedCell = do let halign = literal $ alignCommand align return $ "\\begin{minipage}" <> valign <> braces "\\linewidth" <> halign <> cr <> - cellContents <> cr <> + cellContents <> + (if hasLineBreaks then "\\strut" else mempty) + <> cr <> "\\end{minipage}" modify $ \st -> st{ stExternalNotes = externalNotes } when (rowspan /= RowSpan 1) $ diff --git a/test/command/7272.md b/test/command/7272.md index 3b9064c9c..d27b25143 100644 --- a/test/command/7272.md +++ b/test/command/7272.md @@ -20,7 +20,7 @@ \endhead \begin{minipage}[t]{\linewidth}\raggedright { text\\ -text2 } +text2 }\strut \end{minipage} \\ \bottomrule \end{longtable} diff --git a/test/tables/nordics.latex b/test/tables/nordics.latex index 1dcac7319..6f17a163e 100644 --- a/test/tables/nordics.latex +++ b/test/tables/nordics.latex @@ -11,10 +11,10 @@ Name Capital \end{minipage} & \begin{minipage}[b]{\linewidth}\centering Population\\ -(in 2018) +(in 2018)\strut \end{minipage} & \begin{minipage}[b]{\linewidth}\centering Area\\ -(in km\textsuperscript{2}) +(in km\textsuperscript{2})\strut \end{minipage} \\ \midrule \endfirsthead @@ -25,10 +25,10 @@ Name Capital \end{minipage} & \begin{minipage}[b]{\linewidth}\centering Population\\ -(in 2018) +(in 2018)\strut \end{minipage} & \begin{minipage}[b]{\linewidth}\centering Area\\ -(in km\textsuperscript{2}) +(in km\textsuperscript{2})\strut \end{minipage} \\ \midrule \endhead -- cgit v1.2.3 From 086790d986af35a5e6d68013a2c15ae10511db40 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 22 Jun 2021 09:49:24 -0700 Subject: Fix unneeded import --- src/Text/Pandoc/Parsing.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 082d9565b..09445622d 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -188,7 +188,7 @@ import Control.Monad.Identity import Control.Monad.Reader ( asks, runReader, MonadReader(ask), Reader, ReaderT(ReaderT) ) import Data.Char (chr, isAlphaNum, isAscii, isAsciiUpper, isAsciiLower, - isPunctuation, isSpace, ord, toLower, toUpper) + isSpace, ord, toLower, toUpper) import Data.Default ( Default(..) ) import Data.Functor (($>)) import Data.List (intercalate, transpose) -- cgit v1.2.3 From 1b07997f4a6870650f20702ed6d962f9471e3d40 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 22 Jun 2021 09:55:50 -0700 Subject: Fix regression with comment-only YAML metadata blocks. Closes #7400. --- src/Text/Pandoc/Readers/Metadata.hs | 3 +++ test/command/7400.md | 9 +++++++++ 2 files changed, 12 insertions(+) create mode 100644 test/command/7400.md (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs index 45eddf25a..cbc523b25 100644 --- a/src/Text/Pandoc/Readers/Metadata.hs +++ b/src/Text/Pandoc/Readers/Metadata.hs @@ -45,6 +45,9 @@ yamlBsToMeta pMetaValue bstr = do Right [] -> return . return $ mempty Right [YAML.Doc (YAML.Scalar _ YAML.SNull)] -> return . return $ mempty + -- the following is what we get from a comment: + Right [YAML.Doc (YAML.Scalar _ (YAML.SUnknown _ ""))] + -> return . return $ mempty Right _ -> Prelude.fail "expected YAML object" Left (yamlpos, err') -> do pos <- getPosition diff --git a/test/command/7400.md b/test/command/7400.md new file mode 100644 index 000000000..d4be32d72 --- /dev/null +++ b/test/command/7400.md @@ -0,0 +1,9 @@ +``` +% pandoc -t native -s +--- +# Comment only +... +^D +Pandoc (Meta {unMeta = fromList []}) +[] +``` -- cgit v1.2.3 From 235cdea629de95f5a6c40a043e8dff6b19a46d3d Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 23 Jun 2021 09:54:14 -0700 Subject: reveal.js writer: Go back to setting boolean values for variables. In a previous commit we used strings because boolean False wouldn't render as `false`. This is changed in the dev version ofdoctemplates, so we can go back to the more straightforward approach. --- src/Text/Pandoc/Writers/HTML.hs | 56 +++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 30 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 4d513df3b..b99b1a413 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -317,10 +317,6 @@ pandocToHtml opts (Pandoc meta blocks) = do | otherwise -> mempty Nothing -> mempty let mCss :: Maybe [Text] = lookupContext "css" metadata - let true :: Text - true = "true" - let false :: Text - false = "false" let context = (if stHighlighting st then case writerHighlightStyle opts of Just sty -> defField "highlighting-css" @@ -351,43 +347,43 @@ pandocToHtml opts (Pandoc meta blocks) = do (if slideVariant == RevealJsSlides then -- set boolean options explicitly, since -- template can't distinguish False/undefined - defField "controls" true . - defField "controlsTutorial" true . + defField "controls" True . + defField "controlsTutorial" True . defField "controlsLayout" ("bottom-right" :: Text) . defField "controlsBackArrows" ("faded" :: Text) . - defField "progress" true . - defField "slideNumber" false . + defField "progress" True . + defField "slideNumber" False . defField "showSlideNumber" ("all" :: Text) . - defField "hashOneBasedIndex" false . - defField "hash" false . - defField "respondToHashChanges" true . - defField "history" false . - defField "keyboard" true . - defField "overview" true . - defField "disableLayout" false . - defField "center" true . - defField "touch" true . - defField "loop" false . - defField "rtl" false . + defField "hashOneBasedIndex" False . + defField "hash" False . + defField "respondToHashChanges" True . + defField "history" False . + defField "keyboard" True . + defField "overview" True . + defField "disableLayout" False . + defField "center" True . + defField "touch" True . + defField "loop" False . + defField "rtl" False . defField "navigationMode" ("default" :: Text) . - defField "shuffle" false . - defField "fragments" true . - defField "fragmentInURL" true . - defField "embedded" false . - defField "help" true . - defField "pause" true . - defField "showNotes" false . + defField "shuffle" False . + defField "fragments" True . + defField "fragmentInURL" True . + defField "embedded" False . + defField "help" True . + defField "pause" True . + defField "showNotes" False . defField "autoPlayMedia" ("null" :: Text) . defField "preloadIframes" ("null" :: Text) . defField "autoSlide" ("0" :: Text) . - defField "autoSlideStoppable" true . + defField "autoSlideStoppable" True . defField "autoSlideMethod" ("null" :: Text) . defField "defaultTiming" ("null" :: Text) . - defField "mouseWheel" false . + defField "mouseWheel" False . defField "display" ("block" :: Text) . - defField "hideInactiveCursor" true . + defField "hideInactiveCursor" True . defField "hideCursorTime" ("5000" :: Text) . - defField "previewLinks" false . + defField "previewLinks" False . defField "transition" ("slide" :: Text) . defField "transitionSpeed" ("default" :: Text) . defField "backgroundTransition" ("fade" :: Text) . -- cgit v1.2.3 From 4a7a0cff294c48d1acd690ea593f4ab2c817ec27 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Wed, 23 Jun 2021 11:39:50 -0700 Subject: ImageSize: Add Tiff constructor for ImageType. [Minor API change] This allows pandoc to get size information from tiff images. Closes #7405. --- src/Text/Pandoc/ImageSize.hs | 6 +++++- src/Text/Pandoc/Writers/Docx.hs | 1 + src/Text/Pandoc/Writers/Powerpoint/Output.hs | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/ImageSize.hs b/src/Text/Pandoc/ImageSize.hs index bb1aa6351..2b7d10611 100644 --- a/src/Text/Pandoc/ImageSize.hs +++ b/src/Text/Pandoc/ImageSize.hs @@ -57,7 +57,8 @@ import Codec.Picture (decodeImageWithMetadata) -- quick and dirty functions to get image sizes -- algorithms borrowed from wwwis.pl -data ImageType = Png | Gif | Jpeg | Svg | Pdf | Eps | Emf deriving Show +data ImageType = Png | Gif | Jpeg | Svg | Pdf | Eps | Emf | Tiff + deriving Show data Direction = Width | Height instance Show Direction where show Width = "width" @@ -100,6 +101,8 @@ imageType :: ByteString -> Maybe ImageType imageType img = case B.take 4 img of "\x89\x50\x4e\x47" -> return Png "\x47\x49\x46\x38" -> return Gif + "\x49\x49\x2a\x00" -> return Tiff + "\x4D\x4D\x00\x2a" -> return Tiff "\xff\xd8\xff\xe0" -> return Jpeg -- JFIF "\xff\xd8\xff\xe1" -> return Jpeg -- Exif "%PDF" -> return Pdf @@ -124,6 +127,7 @@ imageSize opts img = checkDpi <$> Just Png -> getSize img Just Gif -> getSize img Just Jpeg -> getSize img + Just Tiff -> getSize img Just Svg -> mbToEither "could not determine SVG size" $ svgSize opts img Just Eps -> mbToEither "could not determine EPS size" $ epsSize img Just Pdf -> mbToEither "could not determine PDF size" $ pdfSize img diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index e7a49ba02..d1065eb7d 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -1289,6 +1289,7 @@ inlineToOpenXML' opts (Image attr@(imgident, _, _) alt (src, title)) = do Just Eps -> ".eps" Just Svg -> ".svg" Just Emf -> ".emf" + Just Tiff -> ".tiff" Nothing -> "" imgpath = "media/" <> ident <> imgext mbMimeType = mt <|> getMimeType (T.unpack imgpath) diff --git a/src/Text/Pandoc/Writers/Powerpoint/Output.hs b/src/Text/Pandoc/Writers/Powerpoint/Output.hs index f2f54a91c..157810216 100644 --- a/src/Text/Pandoc/Writers/Powerpoint/Output.hs +++ b/src/Text/Pandoc/Writers/Powerpoint/Output.hs @@ -537,6 +537,7 @@ registerMedia fp caption = do Just Eps -> Just ".eps" Just Svg -> Just ".svg" Just Emf -> Just ".emf" + Just Tiff -> Just ".tiff" Nothing -> Nothing let newGlobalId = fromMaybe (maxGlobalId + 1) (M.lookup fp globalIds) -- cgit v1.2.3 From dd098d4e15090d12cc71301f91a159c5bfb29b50 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 28 Jun 2021 11:27:37 -0700 Subject: Markdown writer: put space between Plain and following fenced Div. Closes #4465. --- src/Text/Pandoc/Writers/Markdown.hs | 3 +++ test/command/4465.md | 15 +++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 test/command/4465.md (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs index b13ab57ee..fda2bbcef 100644 --- a/src/Text/Pandoc/Writers/Markdown.hs +++ b/src/Text/Pandoc/Writers/Markdown.hs @@ -791,6 +791,9 @@ blockListToMarkdown opts blocks = do b1 : commentSep : fixBlocks (b2:bs) fixBlocks (Plain ils : bs@(RawBlock{}:_)) = Plain ils : fixBlocks bs + fixBlocks (Plain ils : bs@(Div{}:_)) + | isEnabled Ext_fenced_divs opts = + Para ils : fixBlocks bs fixBlocks (Plain ils : bs) | inlist = Plain ils : fixBlocks bs fixBlocks (Plain ils : bs) = diff --git a/test/command/4465.md b/test/command/4465.md new file mode 100644 index 000000000..eaffcf7f3 --- /dev/null +++ b/test/command/4465.md @@ -0,0 +1,15 @@ +``` +% pandoc -f html -t markdown +<ol> + <li>An ordered list can contain block-level elements ind html, it means that divs are also allowed.</li> + <li>Let's see the problem! <div class="example">This is an example.</div></li> +</ol> +^D +1. An ordered list can contain block-level elements ind html, it means + that divs are also allowed. +2. Let\'s see the problem! + + ::: example + This is an example. + ::: +``` -- cgit v1.2.3 From 4262898fe9cc71abecffc1ffa165dd714c04407d Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 28 Jun 2021 13:28:02 -0700 Subject: Set proper initial source name in parsing BibTeX. (For better error messages.) --- src/Text/Pandoc/Citeproc/BibTeX.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs index b17240557..1fe4a2730 100644 --- a/src/Text/Pandoc/Citeproc/BibTeX.hs +++ b/src/Text/Pandoc/Citeproc/BibTeX.hs @@ -71,9 +71,11 @@ readBibtexString variant locale idpred contents = do filter (\item -> idpred (identifier item) && entryType item /= "xdata")) (fromMaybe defaultLang $ localeLanguage locale, Map.empty) - "" (toSources contents) of + (initialSourceName sources) sources of Left err -> Left err Right xs -> return xs + where + sources = toSources contents -- | Write BibTeX or BibLaTeX given given a 'Reference'. writeBibtexString :: WriterOptions -- ^ options (for writing LaTex) -- cgit v1.2.3 From f045e59248116228093bfebf03ce9f7f4f0cc4ce Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 28 Jun 2021 13:28:22 -0700 Subject: Text.Pandoc.Error: fix line calculations in reporting parsec errors. Also remove a spurious initial newline in the error report. --- src/Text/Pandoc/Error.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Error.hs b/src/Text/Pandoc/Error.hs index 81eb41f85..9dee8356b 100644 --- a/src/Text/Pandoc/Error.hs +++ b/src/Text/Pandoc/Error.hs @@ -97,13 +97,13 @@ renderError e = [] -> "" ((pos,txt):_) -> let ls = T.lines txt <> [""] - ln = errLine - sourceLine pos - in if length ls > ln - 1 + ln = (errLine - sourceLine pos) + 1 + in if length ls > ln && ln >= 1 then T.concat ["\n", ls !! (ln - 1) ,"\n", T.replicate (errColumn - 1) " " ,"^"] else "" - in "\nError at " <> tshow err' <> errorInFile + in "Error at " <> tshow err' <> errorInFile PandocMakePDFError s -> s PandocOptionError s -> s PandocSyntaxMapError s -> s -- cgit v1.2.3 From 97b0aa667cce07e6552d0abb4f93469ba14f4eb0 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 28 Jun 2021 13:34:12 -0700 Subject: Allow `$` characters in bibtex keys. Closes #7409. --- src/Text/Pandoc/Citeproc/BibTeX.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs index 1fe4a2730..c178de6e9 100644 --- a/src/Text/Pandoc/Citeproc/BibTeX.hs +++ b/src/Text/Pandoc/Citeproc/BibTeX.hs @@ -868,7 +868,7 @@ fieldName = resolveAlias . T.toLower isBibtexKeyChar :: Char -> Bool isBibtexKeyChar c = - isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]*&" :: [Char]) + isAlphaNum c || c `elem` (".:;?!`'()$/*@_+=-[]*&" :: [Char]) bibItem :: BibParser Item bibItem = do -- cgit v1.2.3 From 851d037b3eee4516fde50b81eb8a0fc9b2f1545b Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 28 Jun 2021 22:41:14 -0700 Subject: Improve punctuation moving with `--citeproc`. Previously, using `--citeproc` could cause punctuation to move in quotes even when there aer no citations. This has been changed; now, punctuation moving is limited to citations. In addition, we only move footnotes around punctuation if the style is a note style, even if `notes-after-punctuation` is `true`. --- src/Text/Pandoc/Citeproc.hs | 29 +++++++++++++++-------------- test/command/6890.md | 4 ++-- test/command/pandoc-citeproc-322.md | 2 +- 3 files changed, 18 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index a5b26c9b4..a2fca106a 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -90,21 +90,15 @@ processCitations (Pandoc meta bs) = do walk (convertQuotes locale) . insertSpace $ out) (resultBibliography result) - let moveNotes = maybe True truish $ - lookupMeta "notes-after-punctuation" meta + let moveNotes = styleIsNoteStyle sopts && + maybe True truish (lookupMeta "notes-after-punctuation" meta) let cits = map (walk (convertQuotes locale)) $ resultCitations result - let fixQuotes = case localePunctuationInQuote locale of - Just True -> - B.toList . movePunctuationInsideQuotes . B.fromList - _ -> id - let metanocites = lookupMeta "nocite" meta let Pandoc meta'' bs' = maybe id (setMeta "nocite") metanocites . - walk (map capitalizeNoteCitation . - fixQuotes . mvPunct moveNotes locale) . + walk (map capitalizeNoteCitation . mvPunct moveNotes locale) . walk deNote . evalState (walkM insertResolvedCitations $ Pandoc meta' bs) $ cits @@ -375,7 +369,6 @@ formatFromExtension fp = case dropWhile (== '.') $ takeExtension fp of isNote :: Inline -> Bool -isNote (Note _) = True isNote (Cite _ [Note _]) = True -- the following allows citation styles that are "in-text" but use superscript -- references to be treated as if they are "notes" for the purposes of moving @@ -388,6 +381,12 @@ isSpacy Space = True isSpacy SoftBreak = True isSpacy _ = False +movePunctInsideQuotes :: Locale -> [Inline] -> [Inline] +movePunctInsideQuotes locale + | localePunctuationInQuote locale == Just True + = B.toList . movePunctuationInsideQuotes . B.fromList + | otherwise + = id mvPunct :: Bool -> Locale -> [Inline] -> [Inline] mvPunct moveNotes locale (x : xs) @@ -400,7 +399,8 @@ mvPunct moveNotes locale (q : s : x : ys) in if moveNotes then if T.null spunct then q : x : mvPunct moveNotes locale ys - else q : Str spunct : x : mvPunct moveNotes locale + else movePunctInsideQuotes locale + [q , Str spunct , x] ++ mvPunct moveNotes locale (B.toList (dropTextWhile isPunctuation (B.fromList ys))) else q : x : mvPunct moveNotes locale ys @@ -412,9 +412,10 @@ mvPunct moveNotes locale (Cite cs ils : ys) , moveNotes = let s = stringify ys spunct = T.takeWhile isPunctuation s - in Cite cs (init ils - ++ [Str spunct | not (endWithPunct False (init ils))] - ++ [last ils]) : + in Cite cs (movePunctInsideQuotes locale $ + init ils + ++ [Str spunct | not (endWithPunct False (init ils))] + ++ [last ils]) : mvPunct moveNotes locale (B.toList (dropTextWhile isPunctuation (B.fromList ys))) mvPunct moveNotes locale (s : x : ys) | isSpacy s, isNote x = diff --git a/test/command/6890.md b/test/command/6890.md index e4129e2a9..e36c12771 100644 --- a/test/command/6890.md +++ b/test/command/6890.md @@ -23,12 +23,12 @@ references: @fruchtel-sozialer-2013a -Some text [^1]. +Some text.[^1] [^1]: @fruchtel-sozialer-2013a ^D [Para [Cite [Citation {citationId = "fruchtel-sozialer-2013a", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 1, citationHash = 0}] [Str "Fr\252chtel,",Space,Str "Budde,",Space,Str "and",Space,Str "Cyprian",Space,Str "(2013)"]] -,Para [Str "Some",Space,Str "text",Str ".",Note [Para [Cite [Citation {citationId = "fruchtel-sozialer-2013a", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 2, citationHash = 0}] [Str "Fr\252chtel,",Space,Str "Budde,",Space,Str "and",Space,Str "Cyprian",Space,Str "(2013)"]]],Str ""] +,Para [Str "Some",Space,Str "text.",Note [Para [Cite [Citation {citationId = "fruchtel-sozialer-2013a", citationPrefix = [], citationSuffix = [], citationMode = AuthorInText, citationNoteNum = 2, citationHash = 0}] [Str "Fr\252chtel,",Space,Str "Budde,",Space,Str "and",Space,Str "Cyprian",Space,Str "(2013)"]]]] ,Div ("refs",["references","csl-bib-body","hanging-indent"],[]) [Div ("ref-fruchtel-sozialer-2013a",["csl-entry"],[]) [Para [Str "Fr\252chtel,",Space,Str "Frank,",Space,Str "Wolfgang",Space,Str "Budde,",Space,Str "and",Space,Str "Gudrun",Space,Str "Cyprian.",Space,Str "2013.",Space,Emph [Str "Sozialer",Space,Str "Raum",Space,Str "und",Space,Str "Soziale",Space,Str "Arbeit",Space,Str "Fieldbook:",Space,Str "Methoden",Space,Str "und",Space,Str "Techniken"],Str ".",Space,Str "3rd",Space,Str "ed.",Space,Str "Wiesbaden,",Space,Str "Germany:",Space,Str "Springer",Space,Str "VS."]]]] diff --git a/test/command/pandoc-citeproc-322.md b/test/command/pandoc-citeproc-322.md index 78494f0c4..c70eae755 100644 --- a/test/command/pandoc-citeproc-322.md +++ b/test/command/pandoc-citeproc-322.md @@ -19,7 +19,7 @@ references: type: 'article-journal' --- -Foo[@timmory__justice_1950]. +Foo [@timmory__justice_1950]. ^D Foo.[^1] -- cgit v1.2.3 From f4ef652a4165fefe4b587882cc75d1e24971ef1f Mon Sep 17 00:00:00 2001 From: Aner Lucero <4rgento@gmail.com> Date: Tue, 29 Jun 2021 08:49:36 -0300 Subject: Remove duplicated alt text in HTML output. --- src/Text/Pandoc/Writers/HTML.hs | 5 +++-- test/command/7416.md | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 test/command/7416.md (limited to 'src') diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index b99b1a413..df01fc35f 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -1433,7 +1433,7 @@ inlineToHtml opts inline = do return $ if T.null tit then link' else link' ! A.title (toValue tit) - (Image attr txt (s,tit)) -> do + (Image attr@(_, _, attrList) txt (s, tit)) -> do let alternate = stringify txt slideVariant <- gets stSlideVariant let isReveal = slideVariant == RevealJsSlides @@ -1446,7 +1446,8 @@ inlineToHtml opts inline = do [A.title $ toValue tit | not (T.null tit)] ++ attrs imageTag = (if html5 then H5.img else H.img - , [A.alt $ toValue alternate | not (null txt)] ) + , [A.alt $ toValue alternate | not (null txt) && + isNothing (lookup "alt" attrList)] ) mediaTag tg fallbackTxt = let linkTxt = if null txt then fallbackTxt diff --git a/test/command/7416.md b/test/command/7416.md new file mode 100644 index 000000000..70a0257fa --- /dev/null +++ b/test/command/7416.md @@ -0,0 +1,19 @@ +``` +% pandoc -f markdown -t html +{alt="alt"} + +^D +<figure> +<img src="../media/rId25.jpg" title="title" alt="alt" /><figcaption aria-hidden="true">caption</figcaption> +</figure> +``` + +``` +% pandoc -f markdown -t html + + +^D +<figure> +<img src="../media/rId25.jpg" title="title" alt="caption" /><figcaption aria-hidden="true">caption</figcaption> +</figure> +``` -- cgit v1.2.3 From a3d745e48560a55d9a9ea9fa43ffdd5a8b84987f Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 29 Jun 2021 09:44:37 -0700 Subject: Docx writer: support figure numbers. These are set up in such a way that they will work with Word's automatic table of figures. Closes #7392. --- src/Text/Pandoc/Writers/Docx.hs | 22 +++++++++++++++++++--- src/Text/Pandoc/Writers/Docx/Types.hs | 2 ++ test/docx/golden/image.docx | Bin 26647 -> 26774 bytes 3 files changed, 21 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index d1065eb7d..b3e008b8a 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -36,7 +36,8 @@ import Data.Time.Clock.POSIX import Data.Digest.Pure.SHA (sha1, showDigest) import Skylighting import Text.Collate.Lang (renderLang) -import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang) +import Text.Pandoc.Class.PandocMonad (PandocMonad, report, toLang, translateTerm) +import qualified Text.Pandoc.Translations as Term import qualified Text.Pandoc.Class.PandocMonad as P import Data.Time import Text.Pandoc.UTF8 (fromTextLazy) @@ -854,14 +855,29 @@ blockToOpenXML' opts (Plain lst) = do -- title beginning with fig: indicates that the image is a figure blockToOpenXML' opts (Para [Image attr alt (src,T.stripPrefix "fig:" -> Just tit)]) = do setFirstPara + fignum <- gets stNextFigureNum + modify $ \st -> st{ stNextFigureNum = fignum + 1 } + let figid = "fig" <> tshow fignum + figname <- translateTerm Term.Figure prop <- pStyleM $ if null alt then "Figure" else "Captioned Figure" paraProps <- local (\env -> env { envParaProperties = EnvProps (Just prop) [] <> envParaProperties env }) (getParaProps False) contents <- inlinesToOpenXML opts [Image attr alt (src,tit)] - captionNode <- withParaPropM (pStyleM "Image Caption") - $ blockToOpenXML opts (Para alt) + captionNode <- if null alt + then return [] + else withParaPropM (pStyleM "Image Caption") + $ blockToOpenXML opts + (Para $ Span (figid,[],[]) + [Str "Figure\160", + RawInline (Format "openxml") + ("<w:fldSimple w:instr=\"SEQ " + <> figname + <> " \\* ARABIC \"><w:r><w:t>" + <> tshow fignum + <> "</w:t></w:r></w:fldSimple>"), + Str ":", Space] : alt) return $ Elem (mknode "w:p" [] (map Elem paraProps ++ contents)) : captionNode diff --git a/src/Text/Pandoc/Writers/Docx/Types.hs b/src/Text/Pandoc/Writers/Docx/Types.hs index 006584c30..36ac45ad2 100644 --- a/src/Text/Pandoc/Writers/Docx/Types.hs +++ b/src/Text/Pandoc/Writers/Docx/Types.hs @@ -117,6 +117,7 @@ data WriterState = WriterState{ , stDynamicParaProps :: Set.Set ParaStyleName , stDynamicTextProps :: Set.Set CharStyleName , stCurId :: Int + , stNextFigureNum :: Int } defaultWriterState :: WriterState @@ -137,6 +138,7 @@ defaultWriterState = WriterState{ , stDynamicParaProps = Set.empty , stDynamicTextProps = Set.empty , stCurId = 20 + , stNextFigureNum = 1 } setFirstPara :: PandocMonad m => WS m () diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx index 48b72e283..9fe65326f 100644 Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ -- cgit v1.2.3 From a01ba4463f1f0d14a8032f147cddb76dadb4b853 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 29 Jun 2021 11:15:13 -0700 Subject: Docx writer: Fixed a couple bugs in Figure numbering. --- src/Text/Pandoc/Writers/Docx.hs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs index b3e008b8a..a3c4b6be1 100644 --- a/src/Text/Pandoc/Writers/Docx.hs +++ b/src/Text/Pandoc/Writers/Docx.hs @@ -856,7 +856,7 @@ blockToOpenXML' opts (Plain lst) = do blockToOpenXML' opts (Para [Image attr alt (src,T.stripPrefix "fig:" -> Just tit)]) = do setFirstPara fignum <- gets stNextFigureNum - modify $ \st -> st{ stNextFigureNum = fignum + 1 } + unless (null alt) $ modify $ \st -> st{ stNextFigureNum = fignum + 1 } let figid = "fig" <> tshow fignum figname <- translateTerm Term.Figure prop <- pStyleM $ @@ -870,10 +870,9 @@ blockToOpenXML' opts (Para [Image attr alt (src,T.stripPrefix "fig:" -> Just tit else withParaPropM (pStyleM "Image Caption") $ blockToOpenXML opts (Para $ Span (figid,[],[]) - [Str "Figure\160", + [Str (figname <> "\160"), RawInline (Format "openxml") - ("<w:fldSimple w:instr=\"SEQ " - <> figname + ("<w:fldSimple w:instr=\"SEQ Figure" <> " \\* ARABIC \"><w:r><w:t>" <> tshow fignum <> "</w:t></w:r></w:fldSimple>"), -- cgit v1.2.3 From 0948af9cc549f0ea3b85fa760aa521b8deaad2c0 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 29 Jun 2021 11:15:40 -0700 Subject: Docx writer: Add table numbering for captioned tables. The numbers are added using fields, so that Word can create a list of tables that will update automatically. --- src/Text/Pandoc/Writers/Docx/Table.hs | 31 ++++++++++++++++++++++++++++--- src/Text/Pandoc/Writers/Docx/Types.hs | 2 ++ test/Tests/Writers/OOXML.hs | 4 +++- test/docx/golden/image.docx | Bin 26774 -> 26776 bytes 4 files changed, 33 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/Docx/Table.hs b/src/Text/Pandoc/Writers/Docx/Table.hs index 49917e315..7a84c5278 100644 --- a/src/Text/Pandoc/Writers/Docx/Table.hs +++ b/src/Text/Pandoc/Writers/Docx/Table.hs @@ -17,7 +17,7 @@ import Control.Monad.State.Strict import Data.Array import Data.Text (Text) import Text.Pandoc.Definition -import Text.Pandoc.Class.PandocMonad (PandocMonad) +import Text.Pandoc.Class.PandocMonad (PandocMonad, translateTerm) import Text.Pandoc.Writers.Docx.Types import Text.Pandoc.Shared import Text.Printf (printf) @@ -25,6 +25,7 @@ import Text.Pandoc.Writers.GridTable hiding (Table) import Text.Pandoc.Writers.OOXML import Text.Pandoc.XML.Light as XML hiding (Attr) import qualified Data.Text as T +import qualified Text.Pandoc.Translations as Term import qualified Text.Pandoc.Writers.GridTable as Grid tableToOpenXML :: PandocMonad m @@ -33,15 +34,23 @@ tableToOpenXML :: PandocMonad m -> WS m [Content] tableToOpenXML blocksToOpenXML gridTable = do setFirstPara - let (Grid.Table _attr caption colspecs _rowheads thead tbodies tfoot) = + let (Grid.Table (ident,_,_) caption colspecs _rowheads thead tbodies tfoot) = gridTable let (Caption _maybeShortCaption captionBlocks) = caption + tablenum <- gets stNextTableNum + unless (null captionBlocks) $ + modify $ \st -> st{ stNextTableNum = tablenum + 1 } + let tableid = if T.null ident + then "table" <> tshow tablenum + else ident + tablename <- translateTerm Term.Table let captionStr = stringify captionBlocks let aligns = map fst $ elems colspecs captionXml <- if null captionBlocks then return [] else withParaPropM (pStyleM "Table Caption") - $ blocksToOpenXML captionBlocks + $ blocksToOpenXML + $ addLabel tableid tablename tablenum captionBlocks -- We set "in table" after processing the caption, because we don't -- want the "Table Caption" style to be overwritten with "Compact". modify $ \s -> s { stInTable = True } @@ -81,6 +90,22 @@ tableToOpenXML blocksToOpenXML gridTable = do modify $ \s -> s { stInTable = False } return $ captionXml ++ [Elem tbl] +addLabel :: Text -> Text -> Int -> [Block] -> [Block] +addLabel tableid tablename tablenum bs = + case bs of + (Para ils : rest) -> Para (label : Space : ils) : rest + (Plain ils : rest) -> Plain (label : Space : ils) : rest + _ -> Para [label] : bs + where + label = Span (tableid,[],[]) + [Str (tablename <> "\160"), + RawInline (Format "openxml") + ("<w:fldSimple w:instr=\"SEQ Table" + <> " \\* ARABIC \"><w:r><w:t>" + <> tshow tablenum + <> "</w:t></w:r></w:fldSimple>"), + Str ":"] + -- | Parts of a table data RowType = HeadRow | BodyRow | FootRow diff --git a/src/Text/Pandoc/Writers/Docx/Types.hs b/src/Text/Pandoc/Writers/Docx/Types.hs index 36ac45ad2..74b8d2753 100644 --- a/src/Text/Pandoc/Writers/Docx/Types.hs +++ b/src/Text/Pandoc/Writers/Docx/Types.hs @@ -118,6 +118,7 @@ data WriterState = WriterState{ , stDynamicTextProps :: Set.Set CharStyleName , stCurId :: Int , stNextFigureNum :: Int + , stNextTableNum :: Int } defaultWriterState :: WriterState @@ -139,6 +140,7 @@ defaultWriterState = WriterState{ , stDynamicTextProps = Set.empty , stCurId = 20 , stNextFigureNum = 1 + , stNextTableNum = 1 } setFirstPara :: PandocMonad m => WS m () diff --git a/test/Tests/Writers/OOXML.hs b/test/Tests/Writers/OOXML.hs index c1e47622d..83f05cfec 100644 --- a/test/Tests/Writers/OOXML.hs +++ b/test/Tests/Writers/OOXML.hs @@ -55,7 +55,9 @@ testArchive :: (WriterOptions -> Pandoc -> PandocIO BL.ByteString) -> IO Archive testArchive writerFn opts fp = do txt <- T.readFile fp - bs <- runIOorExplode $ readNative def txt >>= writerFn opts + bs <- runIOorExplode $ do + setTranslations "en-US" + readNative def txt >>= writerFn opts return $ toArchive bs compareFileList :: FilePath -> Archive -> Archive -> Maybe String diff --git a/test/docx/golden/image.docx b/test/docx/golden/image.docx index 9fe65326f..7c2d8a9ac 100644 Binary files a/test/docx/golden/image.docx and b/test/docx/golden/image.docx differ -- cgit v1.2.3 From cb038bb3125028e17a5b05c32495219a17fb6537 Mon Sep 17 00:00:00 2001 From: Aner Lucero <4rgento@gmail.com> Date: Fri, 2 Jul 2021 11:17:14 -0300 Subject: HTML5 writer, remove aria-hidden when explicit atl text is provided. --- src/Text/Pandoc/Writers/HTML.hs | 11 +++++++---- test/command/7416.md | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index df01fc35f..7eb8dfe12 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -659,17 +659,20 @@ dimensionsToAttrList attr = consolidateStyles $ go Width ++ go Height figure :: PandocMonad m => WriterOptions -> Attr -> [Inline] -> (Text, Text) -> StateT WriterState m Html -figure opts attr txt (s,tit) = do +figure opts attr@(_, _, attrList) txt (s,tit) = do html5 <- gets stHtml5 -- Screen-readers will normally read the @alt@ text and the figure; we -- want to avoid them reading the same text twice. With HTML5 we can -- use aria-hidden for the caption; with HTML4, we use an empty -- alt-text instead. + -- When the alt text differs from the caption both should be read. let alt = if html5 then txt else [Str ""] let tocapt = if html5 - then H5.figcaption ! - H5.customAttribute (textTag "aria-hidden") - (toValue @Text "true") + then (H5.figcaption !) $ + if isJust (lookup "alt" attrList) + then mempty + else H5.customAttribute (textTag "aria-hidden") + (toValue @Text "true") else H.p ! A.class_ "caption" img <- inlineToHtml opts (Image attr alt (s,tit)) capt <- if null txt diff --git a/test/command/7416.md b/test/command/7416.md index 70a0257fa..2f9577f10 100644 --- a/test/command/7416.md +++ b/test/command/7416.md @@ -4,7 +4,7 @@ ^D <figure> -<img src="../media/rId25.jpg" title="title" alt="alt" /><figcaption aria-hidden="true">caption</figcaption> +<img src="../media/rId25.jpg" title="title" alt="alt" /><figcaption>caption</figcaption> </figure> ``` -- cgit v1.2.3 From ff26af59acda8d57d1f0d5faf24a2da203dc2f4e Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 5 Jul 2021 13:18:25 -0700 Subject: Revamp note citation handling. Use latest citeproc, which uses a Span with a class rather than a Note for notes. This helps us distinguish between user notes and citation notes. Don't put citations at the beginning of a note in parentheses. (Closes #7394.) --- cabal.project | 2 +- src/Text/Pandoc/Citeproc.hs | 44 ++++++++++++++++++++++++++++++-------------- stack.yaml | 2 +- 3 files changed, 32 insertions(+), 16 deletions(-) (limited to 'src') diff --git a/cabal.project b/cabal.project index 99c7b3556..e18577471 100644 --- a/cabal.project +++ b/cabal.project @@ -10,4 +10,4 @@ source-repository-package source-repository-package type: git location: https://github.com/jgm/citeproc - tag: 85277c4baa6c0350baf29247583ae0f438f7c9c5 + tag: 4ce0501cd6f9c86eee087afcf878c256e49b9615 diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index a2fca106a..b19494dc0 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -27,7 +27,7 @@ import Text.Pandoc.Error (PandocError(..)) import Text.Pandoc.Extensions (pandocExtensions) import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Options (ReaderOptions(..)) -import Text.Pandoc.Shared (stringify, ordNub, blocksToInlines, tshow) +import Text.Pandoc.Shared (stringify, ordNub, tshow) import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.Walk (query, walk, walkM) import Control.Applicative ((<|>)) @@ -49,6 +49,7 @@ import qualified Data.Text as T import System.FilePath (takeExtension) import Safe (lastMay, initSafe) + processCitations :: PandocMonad m => Pandoc -> m Pandoc processCitations (Pandoc meta bs) = do style <- getStyle (Pandoc meta bs) @@ -99,7 +100,9 @@ processCitations (Pandoc meta bs) = do let Pandoc meta'' bs' = maybe id (setMeta "nocite") metanocites . walk (map capitalizeNoteCitation . mvPunct moveNotes locale) . - walk deNote . + (if styleIsNoteStyle sopts + then walk addNote . walk deNote + else id) . evalState (walkM insertResolvedCitations $ Pandoc meta' bs) $ cits return $ Pandoc meta'' @@ -568,33 +571,46 @@ capitalizeNoteCitation (Cite cs [Note [Para ils]]) = $ B.fromList ils]] capitalizeNoteCitation x = x +addNote :: Inline -> Inline +addNote (Span ("",["csl-note"],[]) ils) = Note [Para ils] +addNote x = x + deNote :: [Inline] -> [Inline] deNote [] = [] deNote (Note bs:rest) = - Note (walk go bs) : deNote rest + case bs of + [Para (cit@(Cite (c:_) _) : ils)] + | citationMode c /= AuthorInText -> + -- if citation is first in note, no need to parenthesize. + Note [Para (walk removeNotes $ cit : walk addParens ils)] + : deNote rest + _ -> Note (walk removeNotes . walk addParens $ bs) : deNote rest where - go [] = [] - go (Cite (c:cs) ils : zs) + addParens [] = [] + addParens (Cite (c:cs) ils : zs) | citationMode c == AuthorInText - = Cite (c:cs) (concatMap (noteAfterComma (needsPeriod zs)) ils) : go zs + = Cite (c:cs) (concatMap (noteAfterComma (needsPeriod zs)) ils) : + addParens zs | otherwise - = Cite (c:cs) (concatMap noteInParens ils) : go zs - go (x:xs) = x : go xs + = Cite (c:cs) (concatMap noteInParens ils) : addParens zs + addParens (x:xs) = x : addParens xs + removeNotes (Span ("",["csl-note"],[]) ils) = Span ("",[],[]) ils + removeNotes x = x needsPeriod [] = True needsPeriod (Str t:_) = case T.uncons t of Nothing -> False Just (c,_) -> isUpper c needsPeriod (Space:zs) = needsPeriod zs needsPeriod _ = False - noteInParens (Note bs') + noteInParens (Span ("",["csl-note"],[]) ils) = Space : Str "(" : - removeFinalPeriod (blocksToInlines bs') ++ [Str ")"] + removeFinalPeriod ils ++ [Str ")"] noteInParens x = [x] - noteAfterComma needsPer (Note bs') + noteAfterComma needsPer (Span ("",["csl-note"],[]) ils) = Str "," : Space : - (if needsPer - then id - else removeFinalPeriod) (blocksToInlines bs') + if needsPer + then ils + else removeFinalPeriod ils noteAfterComma _ x = [x] deNote (x:xs) = x : deNote xs diff --git a/stack.yaml b/stack.yaml index 2db0a626f..48a608db3 100644 --- a/stack.yaml +++ b/stack.yaml @@ -14,7 +14,7 @@ extra-deps: - git: https://github.com/jgm/doctemplates commit: 428c26d5303cf7a2b1051fe1ffd9aafe9ba71c81 - git: https://github.com/jgm/citeproc - commit: 85277c4baa6c0350baf29247583ae0f438f7c9c5 + commit: 4ce0501cd6f9c86eee087afcf878c256e49b9615 ghc-options: "$locals": -fhide-source-paths -Wno-missing-home-modules resolver: lts-18.0 -- cgit v1.2.3 From 77537b17657e29d047a2f8df1feda2ed540c6e08 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 5 Jul 2021 13:28:58 -0700 Subject: Citeproc: cleanup and efficiency improvement in deNote. --- src/Text/Pandoc/Citeproc.hs | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index b19494dc0..6e2d9ce7e 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -99,7 +99,7 @@ processCitations (Pandoc meta bs) = do let metanocites = lookupMeta "nocite" meta let Pandoc meta'' bs' = maybe id (setMeta "nocite") metanocites . - walk (map capitalizeNoteCitation . mvPunct moveNotes locale) . + walk (mvPunct moveNotes locale) . (if styleIsNoteStyle sopts then walk addNote . walk deNote else id) . @@ -564,27 +564,28 @@ extractText (FancyVal x) = toText x extractText (NumVal n) = T.pack (show n) extractText _ = mempty -capitalizeNoteCitation :: Inline -> Inline -capitalizeNoteCitation (Cite cs [Note [Para ils]]) = - Cite cs - [Note [Para $ B.toList $ addTextCase Nothing CapitalizeFirst - $ B.fromList ils]] -capitalizeNoteCitation x = x - +-- Here we take the Spans with class csl-note that are left +-- after deNote has removed nested ones, and convert them +-- into real notes. addNote :: Inline -> Inline -addNote (Span ("",["csl-note"],[]) ils) = Note [Para ils] +addNote (Span ("",["csl-note"],[]) ils) = + Note [Para $ + B.toList . addTextCase Nothing CapitalizeFirst . B.fromList $ ils] addNote x = x -deNote :: [Inline] -> [Inline] -deNote [] = [] -deNote (Note bs:rest) = +-- Here we handle citation notes that occur inside footnotes +-- or other citation notes, in a note style. We don't want +-- notes inside notes, so we convert these to parenthesized +-- or comma-separated citations. +deNote :: Inline -> Inline +deNote (Note bs) = case bs of [Para (cit@(Cite (c:_) _) : ils)] | citationMode c /= AuthorInText -> -- if citation is first in note, no need to parenthesize. Note [Para (walk removeNotes $ cit : walk addParens ils)] - : deNote rest - _ -> Note (walk removeNotes . walk addParens $ bs) : deNote rest + _ -> Note (walk removeNotes . walk addParens $ bs) + where addParens [] = [] addParens (Cite (c:cs) ils : zs) @@ -594,25 +595,30 @@ deNote (Note bs:rest) = | otherwise = Cite (c:cs) (concatMap noteInParens ils) : addParens zs addParens (x:xs) = x : addParens xs + removeNotes (Span ("",["csl-note"],[]) ils) = Span ("",[],[]) ils removeNotes x = x + needsPeriod [] = True needsPeriod (Str t:_) = case T.uncons t of Nothing -> False Just (c,_) -> isUpper c needsPeriod (Space:zs) = needsPeriod zs needsPeriod _ = False + noteInParens (Span ("",["csl-note"],[]) ils) = Space : Str "(" : removeFinalPeriod ils ++ [Str ")"] noteInParens x = [x] + noteAfterComma needsPer (Span ("",["csl-note"],[]) ils) = Str "," : Space : if needsPer then ils else removeFinalPeriod ils noteAfterComma _ x = [x] -deNote (x:xs) = x : deNote xs + +deNote x = x -- Note: we can't use dropTextWhileEnd indiscriminately, -- because this would remove the final period on abbreviations like Ibid. -- cgit v1.2.3 From 3a31fe68efcf80888294a1d3d33751d266d4de07 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Mon, 5 Jul 2021 14:45:07 -0700 Subject: Add command test for #7394. And fix a small bug in handling of citations in notes, which led to commas at the end of sentences in some cases. --- src/Text/Pandoc/Citeproc.hs | 1 + test/command/7394.md | 85 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 test/command/7394.md (limited to 'src') diff --git a/src/Text/Pandoc/Citeproc.hs b/src/Text/Pandoc/Citeproc.hs index 6e2d9ce7e..246f54516 100644 --- a/src/Text/Pandoc/Citeproc.hs +++ b/src/Text/Pandoc/Citeproc.hs @@ -612,6 +612,7 @@ deNote (Note bs) = noteInParens x = [x] noteAfterComma needsPer (Span ("",["csl-note"],[]) ils) + | not (null ils) = Str "," : Space : if needsPer then ils diff --git a/test/command/7394.md b/test/command/7394.md new file mode 100644 index 000000000..d6eb769b9 --- /dev/null +++ b/test/command/7394.md @@ -0,0 +1,85 @@ +``` +% pandoc -f markdown -t plain --citeproc +--- +csl: command/chicago-fullnote-bibliography.csl +references: +- author: + - family: Wandt + given: Manfred + edition: 6 + id: wandt2014ges-sv + issued: 2014 + publisher: Franz Vahlen + publisher-place: München + title: Gesetzliche schuldverhältnisse + title-short: Gesetzl SV + type: book +- author: + - family: Smith + given: Zenda + edition: 6 + id: smith2015 + issued: 2015 + publisher: Macmillan + publisher-place: New York + title: A verb and a noun + type: book +--- + +Hi^[@wandt2014ges-sv.]. + +Hi^[[@wandt2014ges-sv].]. + +Hi^[[See also @wandt2014ges-sv].]. + +Hi^[See also @wandt2014ges-sv.]. + +Hi^[@wandt2014ges-sv [@smith2015].]. + +Hi^[[@wandt2014ges-sv; @smith2015].]. + +Hi [@wandt2014ges-sv]. + +Hi [see also @wandt2014ges-sv]. + +^D +Hi[1]. + +Hi[2]. + +Hi[3]. + +Hi[4]. + +Hi[5]. + +Hi[6]. + +Hi.[7] + +Hi.[8] + +Smith, Zenda. A Verb and a Noun. 6th ed. New York: Macmillan, 2015. + +Wandt, Manfred. Gesetzliche Schuldverhältnisse. 6th ed. München: Franz +Vahlen, 2014. + +[1] Manfred Wandt, Gesetzliche Schuldverhältnisse, 6th ed. (München: +Franz Vahlen, 2014). + +[2] Wandt. + +[3] See also Wandt. + +[4] See also Wandt. + +[5] Wandt, Zenda Smith, A Verb and a Noun, 6th ed. (New York: Macmillan, +2015). + +[6] Wandt, Gesetzl SV; Smith, A Verb and a Noun. + +[7] Wandt, Gesetzl SV. + +[8] See also Wandt. +``` + -- cgit v1.2.3 From 3ed37f00771d20a1b7516f2a37b7b424b3b2f1d8 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 6 Jul 2021 10:21:59 -0700 Subject: HTML reader: add col, colgroup to 'closes' definitions --- src/Text/Pandoc/Readers/HTML/Parsing.hs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/HTML/Parsing.hs b/src/Text/Pandoc/Readers/HTML/Parsing.hs index 4c069e2c3..bd8d7c96c 100644 --- a/src/Text/Pandoc/Readers/HTML/Parsing.hs +++ b/src/Text/Pandoc/Readers/HTML/Parsing.hs @@ -161,10 +161,12 @@ _ `closes` "html" = False "li" `closes` "li" = True "th" `closes` t | t `elem` ["th","td"] = True "td" `closes` t | t `elem` ["th","td"] = True -"tr" `closes` t | t `elem` ["th","td","tr"] = True +"tr" `closes` t | t `elem` ["th","td","tr","colgroup"] = True "dd" `closes` t | t `elem` ["dt", "dd"] = True "dt" `closes` t | t `elem` ["dt","dd"] = True "rt" `closes` t | t `elem` ["rb", "rt", "rtc"] = True +"col" `closes` "col" = True +"colgroup" `closes` "col" = True "optgroup" `closes` "optgroup" = True "optgroup" `closes` "option" = True "option" `closes` "option" = True -- cgit v1.2.3 From f88ebf3ebf49e00ffa12778caf6817cc34459e6a Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 6 Jul 2021 10:22:07 -0700 Subject: Markdown reader: don't try to read contents in self-closing HTML tag. Previously we had problems parsing raw HTML with self-closing tags like `<col/>`. The problem was that pandoc would look for a closing tag to close the markdown contents, but the closing tag had, in effect, already been parsed by `htmlTag`. This fixes the issue described in <https://groups.google.com/d/msgid/pandoc-discuss/297bc662-7841-4423-bcbb-534e99bbba09n%40googlegroups.com>. --- src/Text/Pandoc/Readers/Markdown.hs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 1e9867d07..2dc7ddf52 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1121,6 +1121,7 @@ rawTeXBlock = do rawHtmlBlocks :: PandocMonad m => MarkdownParser m (F Blocks) rawHtmlBlocks = do (TagOpen tagtype _, raw) <- htmlTag isBlockTag + let selfClosing = "/>" `T.isSuffixOf` raw -- we don't want '<td> text' to be a code block: skipMany spaceChar indentlevel <- (blankline >> length <$> many (char ' ')) <|> return 0 @@ -1134,7 +1135,9 @@ rawHtmlBlocks = do gobbleAtMostSpaces indentlevel notFollowedBy' closer block - contents <- mconcat <$> many block' + contents <- if selfClosing + then return mempty + else mconcat <$> many block' result <- try (do gobbleAtMostSpaces indentlevel -- cgit v1.2.3 From e7f8cc57866b61ed354c4c3812aaced33832a0e0 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Tue, 6 Jul 2021 10:39:47 -0700 Subject: T.P.PDF, convertImage: normalize paths. This will avoid paths on Windows with mixed path separators, which may cause problems with SVG conversion. See #7431. --- src/Text/Pandoc/PDF.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index f85ef5b1f..aae3f9806 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -215,9 +215,9 @@ convertImage opts tmpdir fname = do E.catch (Right pngOut <$ JP.savePngImage pngOut img) $ \(e :: E.SomeException) -> return (Left (tshow e)) where - pngOut = replaceDirectory (replaceExtension fname ".png") tmpdir - pdfOut = replaceDirectory (replaceExtension fname ".pdf") tmpdir - svgIn = tmpdir </> fname + pngOut = normalise $ replaceDirectory (replaceExtension fname ".png") tmpdir + pdfOut = normalise $ replaceDirectory (replaceExtension fname ".pdf") tmpdir + svgIn = normalise $ tmpdir </> fname mime = getMimeType fname doNothing = return (Right fname) -- cgit v1.2.3 From e56e2b0e0be9256ddef798d28f5d2af6e756508d Mon Sep 17 00:00:00 2001 From: Michael Hoffmann <brennan.brisad@gmail.com> Date: Wed, 7 Jul 2021 01:06:29 +0200 Subject: Recognize data-external when reading HTML img tags (#7429) Preserve all attributes in img tags. If attributes have a `data-` prefix, it will be stripped. In particular, this preserves a `data-external` attribute as an `external` attribute in the pandoc AST. --- src/Text/Pandoc/Readers/HTML.hs | 11 +++-------- test/Tests/Readers/HTML.hs | 6 ++++++ 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index fc4575f2d..fdf4f28e0 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -722,17 +722,12 @@ pLink = try $ do pImage :: PandocMonad m => TagParser m Inlines pImage = do - tag <- pSelfClosing (=="img") (isJust . lookup "src") + tag@(TagOpen _ attr') <- pSelfClosing (=="img") (isJust . lookup "src") url <- canonicalizeUrl $ fromAttrib "src" tag let title = fromAttrib "title" tag let alt = fromAttrib "alt" tag - let uid = fromAttrib "id" tag - let cls = T.words $ fromAttrib "class" tag - let getAtt k = case fromAttrib k tag of - "" -> [] - v -> [(k, v)] - let kvs = concatMap getAtt ["width", "height", "sizes", "srcset"] - return $ B.imageWith (uid, cls, kvs) (escapeURI url) title (B.text alt) + let attr = toAttr $ filter (\(k,_) -> k /= "alt" && k /= "title" && k /= "src") attr' + return $ B.imageWith attr (escapeURI url) title (B.text alt) pSvg :: PandocMonad m => TagParser m Inlines pSvg = do diff --git a/test/Tests/Readers/HTML.hs b/test/Tests/Readers/HTML.hs index 9bf567194..4ed1e44af 100644 --- a/test/Tests/Readers/HTML.hs +++ b/test/Tests/Readers/HTML.hs @@ -74,6 +74,12 @@ tests = [ testGroup "base tag" [ test html "anchor without href" $ "<a name=\"anchor\"/>" =?> plain (spanWith ("anchor",[],[]) mempty) ] + , testGroup "img" + [ test html "data-external attribute" $ "<img data-external=\"1\" src=\"http://example.com/stickman.gif\">" =?> + plain (imageWith ("", [], [("external", "1")]) "http://example.com/stickman.gif" "" "") + , test html "title" $ "<img title=\"The title\" src=\"http://example.com/stickman.gif\">" =?> + plain (imageWith ("", [], []) "http://example.com/stickman.gif" "The title" "") + ] , testGroup "lang" [ test html "lang on <html>" $ "<html lang=\"es\">hola" =?> setMeta "lang" (text "es") (doc (plain (text "hola"))) -- cgit v1.2.3 From 565330033a623ed7bf4d0a3b57dd14710cf27703 Mon Sep 17 00:00:00 2001 From: Michael Hoffmann <brennan.brisad@gmail.com> Date: Wed, 7 Jul 2021 18:26:37 +0200 Subject: Don't incorporate externally linked images in EPUB documents (#7430) Just like it is possible to avoid incorporating an image in EPUB by passing `data-external="1"` to a raw HTML snippet, this makes the same possible for native Images, by looking for an associated `external` attribute. --- MANUAL.txt | 6 ++++++ src/Text/Pandoc/Writers/EPUB.hs | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/MANUAL.txt b/MANUAL.txt index 52563a8fa..c9dc9e62b 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -6177,6 +6177,12 @@ with the `src` attribute. For example: </source> </audio> +If the input format already is HTML then `data-external="1"` will work +as expected for `<img>` elements. Similarly, for Markdown, external +images can be declared with `{external=1}`. Note that this +only works for images; the other media elements have no native +representation in pandoc's AST and requires the use of raw HTML. + # Jupyter notebooks When creating a [Jupyter notebook], pandoc will try to infer the diff --git a/src/Text/Pandoc/Writers/EPUB.hs b/src/Text/Pandoc/Writers/EPUB.hs index 3c092a2c1..508fb6a98 100644 --- a/src/Text/Pandoc/Writers/EPUB.hs +++ b/src/Text/Pandoc/Writers/EPUB.hs @@ -1131,7 +1131,8 @@ transformInline :: PandocMonad m => WriterOptions -> Inline -> E m Inline -transformInline _opts (Image attr lab (src,tit)) = do +transformInline _opts (Image attr@(_,_,kvs) lab (src,tit)) + | isNothing (lookup "external" kvs) = do newsrc <- modifyMediaRef $ T.unpack src return $ Image attr lab ("../" <> newsrc, tit) transformInline opts x@(Math t m) -- cgit v1.2.3 From ae22b1e977cfb1357bb21fabc227e76a6adb0599 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 9 Jul 2021 12:27:41 -0700 Subject: RST reader: fix regression with code includes. With the recent changes to include infrastructure, included code blocks were getting an extra newline. Closes #7436. Added regression test. --- pandoc.cabal | 1 + src/Text/Pandoc/Readers/RST.hs | 6 +++++- test/command/7436.md | 14 ++++++++++++++ test/command/three.txt | 3 +++ 4 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 test/command/7436.md create mode 100644 test/command/three.txt (limited to 'src') diff --git a/pandoc.cabal b/pandoc.cabal index 84a04c6b6..e3cd7e54f 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -213,6 +213,7 @@ extra-source-files: test/command/B.txt test/command/C.txt test/command/D.txt + test/command/three.txt test/command/01.csv test/command/chap1/spider.png test/command/chap2/spider.png diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 35292d949..3990f0cb5 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -474,6 +474,7 @@ includeDirective top fields body = do case lookup "literal" fields of Just _ -> B.rawBlock "rst" . sourcesToText <$> getInput Nothing -> parseBlocks + let isLiteral = isJust (lookup "code" fields `mplus` lookup "literal" fields) let selectLines = (case trim <$> lookup "end-before" fields of Just patt -> takeWhile (not . (patt `T.isInfixOf`)) @@ -482,8 +483,11 @@ includeDirective top fields body = do Just patt -> drop 1 . dropWhile (not . (patt `T.isInfixOf`)) Nothing -> id) + let toStream t = - toSources [(f, T.unlines . selectLines . T.lines $ t)] + Sources [(initialPos f, + (T.unlines . selectLines . T.lines $ t) <> + if isLiteral then mempty else "\n")] -- see #7436 currentDir <- takeDirectory . sourceName <$> getPosition insertIncludedFile parser toStream [currentDir] f startLine endLine diff --git a/test/command/7436.md b/test/command/7436.md new file mode 100644 index 000000000..ad4cb8c2f --- /dev/null +++ b/test/command/7436.md @@ -0,0 +1,14 @@ +``` +% pandoc -f rst -t native +.. include:: command/three.txt + :code: + +.. include:: command/three.txt + :literal: + +.. include:: command/three.txt +^D +[CodeBlock ("",[""],[("code","")]) "1st line.\n2nd line.\n3rd line.\n" +,RawBlock (Format "rst") "1st line.\n2nd line.\n3rd line.\n" +,Para [Str "1st",Space,Str "line.",SoftBreak,Str "2nd",Space,Str "line.",SoftBreak,Str "3rd",Space,Str "line."]] +``` diff --git a/test/command/three.txt b/test/command/three.txt new file mode 100644 index 000000000..3ca3fdd4e --- /dev/null +++ b/test/command/three.txt @@ -0,0 +1,3 @@ +1st line. +2nd line. +3rd line. -- cgit v1.2.3 From 477a67061f06827b7e807319404cc277a417e9d0 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 9 Jul 2021 14:14:19 -0700 Subject: Always use / when adding directory to image path with extractMedia. Even on Windows. May help with #7431. --- src/Text/Pandoc/Class/IO.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Class/IO.hs b/src/Text/Pandoc/Class/IO.hs index 169074860..f4cfc8682 100644 --- a/src/Text/Pandoc/Class/IO.hs +++ b/src/Text/Pandoc/Class/IO.hs @@ -226,6 +226,6 @@ adjustImagePath dir mediabag (Image attr lab (src, tit)) = case lookupMedia (T.unpack src) mediabag of Nothing -> Image attr lab (src, tit) Just item -> - let fullpath = dir </> mediaPath item + let fullpath = dir <> "/" <> mediaPath item in Image attr lab (T.pack fullpath, tit) adjustImagePath _ _ x = x -- cgit v1.2.3 From ac0a9da6d85e9b7a73973a20019caa324b2c1aff Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Sun, 11 Jul 2021 13:01:45 -0700 Subject: Improved parsing of raw LaTeX from Text streams (rawLaTeXParser). We now use source positions from the token stream to tell us how much of the text stream to consume. Getting this to work required a few other changes to make token source positions accurate. Closes #7434. --- src/Text/Pandoc/Readers/LaTeX.hs | 9 +++----- src/Text/Pandoc/Readers/LaTeX/Parsing.hs | 39 ++++++++++++++++++++++++++++---- test/command/7434.md | 15 ++++++++++++ 3 files changed, 52 insertions(+), 11 deletions(-) create mode 100644 test/command/7434.md (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 9e14c159a..31c8d9095 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -136,8 +136,7 @@ rawLaTeXBlock :: (PandocMonad m, HasMacros s, HasReaderOptions s) => ParserT Sources s m Text rawLaTeXBlock = do lookAhead (try (char '\\' >> letter)) - inp <- getInput - let toks = tokenizeSources inp + toks <- getInputTokens snd <$> (rawLaTeXParser toks False (macroDef (const mempty)) blocks <|> rawLaTeXParser toks True (do choice (map controlSeq @@ -167,8 +166,7 @@ rawLaTeXInline :: (PandocMonad m, HasMacros s, HasReaderOptions s) => ParserT Sources s m Text rawLaTeXInline = do lookAhead (try (char '\\' >> letter)) - inp <- getInput - let toks = tokenizeSources inp + toks <- getInputTokens raw <- snd <$> ( rawLaTeXParser toks True (mempty <$ (controlSeq "input" >> skipMany rawopt >> braced)) @@ -182,8 +180,7 @@ rawLaTeXInline = do inlineCommand :: PandocMonad m => ParserT Sources ParserState m Inlines inlineCommand = do lookAhead (try (char '\\' >> letter)) - inp <- getInput - let toks = tokenizeSources inp + toks <- getInputTokens fst <$> rawLaTeXParser toks True (inlineEnvironment <|> inlineCommand') inlines diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs index a17b1f324..9dac4d6ef 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs @@ -1,4 +1,5 @@ {-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedStrings #-} @@ -28,6 +29,7 @@ module Text.Pandoc.Readers.LaTeX.Parsing , applyMacros , tokenize , tokenizeSources + , getInputTokens , untokenize , untoken , totoks @@ -246,18 +248,23 @@ withVerbatimMode parser = do updateState $ \st -> st{ sVerbatimMode = False } return result -rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s) +rawLaTeXParser :: (PandocMonad m, HasMacros s, HasReaderOptions s, Show a) => [Tok] -> Bool -> LP m a -> LP m a -> ParserT Sources s m (a, Text) rawLaTeXParser toks retokenize parser valParser = do pstate <- getState let lstate = def{ sOptions = extractReaderOptions pstate } let lstate' = lstate { sMacros = extractMacros pstate } + let setStartPos = case toks of + Tok pos _ _ : _ -> setPosition pos + _ -> return () + let preparser = setStartPos >> parser let rawparser = (,) <$> withRaw valParser <*> getState - res' <- lift $ runParserT (snd <$> withRaw parser) lstate "chunk" toks + res' <- lift $ runParserT (withRaw (preparser >> getPosition)) + lstate "chunk" toks case res' of Left _ -> mzero - Right toks' -> do + Right (endpos, toks') -> do res <- lift $ runParserT (do when retokenize $ do -- retokenize, applying macros ts <- many (satisfyTok (const True)) @@ -268,7 +275,13 @@ rawLaTeXParser toks retokenize parser valParser = do Left _ -> mzero Right ((val, raw), st) -> do updateState (updateMacros (sMacros st <>)) - void $ count (T.length (untokenize toks')) anyChar + let skipTilPos stopPos = do + anyChar + pos <- getPosition + if pos >= stopPos + then return () + else skipTilPos stopPos + skipTilPos endpos let result = untokenize raw -- ensure we end with space if input did, see #4442 let result' = @@ -306,6 +319,17 @@ tokenizeSources = concatMap tokenizeSource . unSources where tokenizeSource (pos, t) = totoks pos t +-- Return tokens from input sources. Ensure that starting position is +-- correct. +getInputTokens :: PandocMonad m => ParserT Sources s m [Tok] +getInputTokens = do + pos <- getPosition + ss <- getInput + return $ + case ss of + Sources [] -> [] + Sources ((_,t):rest) -> tokenizeSources $ Sources ((pos,t):rest) + tokenize :: SourceName -> Text -> [Tok] tokenize sourcename = totoks (initialPos sourcename) @@ -433,8 +457,13 @@ parseFromToks :: PandocMonad m => LP m a -> [Tok] -> LP m a parseFromToks parser toks = do oldInput <- getInput setInput toks + oldpos <- getPosition + case toks of + Tok pos _ _ : _ -> setPosition pos + _ -> return () result <- disablingWithRaw parser setInput oldInput + setPosition oldpos return result disablingWithRaw :: PandocMonad m => LP m a -> LP m a @@ -458,7 +487,7 @@ satisfyTok f = do | otherwise = Nothing updatePos :: SourcePos -> Tok -> [Tok] -> SourcePos updatePos _spos _ (Tok pos _ _ : _) = pos - updatePos spos _ [] = incSourceColumn spos 1 + updatePos spos (Tok _ _ t) [] = incSourceColumn spos (T.length t) doMacros :: PandocMonad m => LP m () doMacros = do diff --git a/test/command/7434.md b/test/command/7434.md new file mode 100644 index 000000000..be8732bc2 --- /dev/null +++ b/test/command/7434.md @@ -0,0 +1,15 @@ +``` +% pandoc -f markdown -t native +\begin{proof} +\newcommand{\x}{\left.\right.} +\x +\end{proof} + +1234567890abcdefghi + +[\*\a](x) +^D +[RawBlock (Format "tex") "\\begin{proof}\n\\newcommand{\\x}{\\left.\\right.}\n\\left.\\right.\n\\end{proof}" +,Para [Str "1234567890abcdefghi"] +,Para [Link ("",[],[]) [Str "*",RawInline (Format "tex") "\\a"] ("x","")]] +``` -- cgit v1.2.3 From 06408d08e5ccf06a6a04c9b77470e6a67d98e52c Mon Sep 17 00:00:00 2001 From: Jan Tojnar <jtojnar@gmail.com> Date: Mon, 12 Jul 2021 00:28:52 +0200 Subject: DocBook reader: add support for citerefentry (#7437) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Originally intended for referring to UNIX manual pages, either part of the same DocBook document as refentry element, or external – hence the manvolnum element. These days, refentry is more general, for example the element documentation pages linked below are each a refentry. As per the *Processing expectations* section of citerefentry, the element is supposed to be a hyperlink to a refentry (when in the same document) but pandoc does not support refentry tag at the moment so that is moot. https://tdg.docbook.org/tdg/5.1/citerefentry.html https://tdg.docbook.org/tdg/5.1/manvolnum.html https://tdg.docbook.org/tdg/5.1/refentry.html This roughly corresponds to a `manpage` role in rST syntax, which produces a `Code` AST node with attributes `.interpreted-text role=manpage` but that does not fit DocBook parser. https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#role-manpage --- src/Text/Pandoc/Readers/DocBook.hs | 6 +++++- test/docbook-reader.docbook | 3 +++ test/docbook-reader.native | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index 6ac1c99f9..a31819ece 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -97,7 +97,7 @@ List of all DocBook tags, with [x] indicating implemented, [x] chapterinfo - Meta-information for a Chapter [ ] citation - An inline bibliographic reference to another published work [ ] citebiblioid - A citation of a bibliographic identifier -[ ] citerefentry - A citation to a reference page +[x] citerefentry - A citation to a reference page [ ] citetitle - The title of a cited work [ ] city - The name of a city in an address [x] classname - The name of a class, in the object-oriented programming sense @@ -1112,6 +1112,10 @@ parseInline (Elem e) = "segmentedlist" -> segmentedList "classname" -> codeWithLang "code" -> codeWithLang + "citerefentry" -> do + let title = maybe mempty strContent $ filterChild (named "refentrytitle") e + let manvolnum = maybe mempty (\el -> "(" <> strContent el <> ")") $ filterChild (named "manvolnum") e + return $ codeWith ("",["citerefentry"],[]) (title <> manvolnum) "filename" -> codeWithLang "envar" -> codeWithLang "literal" -> codeWithLang diff --git a/test/docbook-reader.docbook b/test/docbook-reader.docbook index 51e62942b..f021dc8be 100644 --- a/test/docbook-reader.docbook +++ b/test/docbook-reader.docbook @@ -725,6 +725,9 @@ These should not be escaped: \$ \\ \> \[ \{ <para> More code: <classname>Class</classname> and <type>Type</type> </para> + <para> + Referencing a man page: <citerefentry><refentrytitle>nix.conf</refentrytitle><manvolnum>5</manvolnum></citerefentry> + </para> <para> <emphasis role="strikethrough">This is <emphasis>strikeout</emphasis>.</emphasis> diff --git a/test/docbook-reader.native b/test/docbook-reader.native index 1961949d9..b1f5fd085 100644 --- a/test/docbook-reader.native +++ b/test/docbook-reader.native @@ -188,6 +188,7 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Sof ,Para [Str "So",Space,Str "is",Space,Strong [Emph [Str "this"]],Space,Str "word."] ,Para [Str "This",Space,Str "is",Space,Str "code:",Space,Code ("",[],[]) ">",Str ",",Space,Code ("",[],[]) "$",Str ",",SoftBreak,Code ("",[],[]) "\\",Str ",",Space,Code ("",[],[]) "\\$",Str ",",SoftBreak,Code ("",[],[]) "<html>",Str "."] ,Para [Str "More",Space,Str "code:",Space,Code ("",[],[]) "Class",Space,Str "and",Space,Code ("",[],[]) "Type"] +,Para [Str "Referencing",Space,Str "a",Space,Str "man",Space,Str "page:",Space,Code ("",["citerefentry"],[]) "nix.conf(5)"] ,Para [Strikeout [Str "This",Space,Str "is",SoftBreak,Emph [Str "strikeout"],Str "."]] ,Para [Str "Superscripts:",Space,Str "a",Superscript [Str "bc"],Str "d",SoftBreak,Str "a",Superscript [Emph [Str "hello"]],SoftBreak,Str "a",Superscript [Str "hello\160there"],Str "."] ,Para [Str "Subscripts:",Space,Str "H",Subscript [Str "2"],Str "O,",Space,Str "H",Subscript [Str "23"],Str "O,",SoftBreak,Str "H",Subscript [Str "many\160of\160them"],Str "O."] -- cgit v1.2.3 From 18270c7a390b5cb101e16790ccb5e50dd9f34a7a Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 16 Jul 2021 11:39:02 -0700 Subject: PDF: Fix svgIn path error. We were duplicating the temp directory; this didn't show up on macOS or linux because there we use absolute paths for the temp directory. Closes #7431. --- src/Text/Pandoc/PDF.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index aae3f9806..c4e30af34 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -217,7 +217,7 @@ convertImage opts tmpdir fname = do where pngOut = normalise $ replaceDirectory (replaceExtension fname ".png") tmpdir pdfOut = normalise $ replaceDirectory (replaceExtension fname ".pdf") tmpdir - svgIn = normalise $ tmpdir </> fname + svgIn = normalise fname mime = getMimeType fname doNothing = return (Right fname) -- cgit v1.2.3 From 493522c5626464fb7a27216ccee381af4a199eef Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 16 Jul 2021 12:04:43 -0700 Subject: LaTeX reader: Support `\cline` in LaTeX tables. Closes #7442. --- src/Text/Pandoc/Readers/LaTeX/Table.hs | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/LaTeX/Table.hs b/src/Text/Pandoc/Readers/LaTeX/Table.hs index 7833da081..f56728fe1 100644 --- a/src/Text/Pandoc/Readers/LaTeX/Table.hs +++ b/src/Text/Pandoc/Readers/LaTeX/Table.hs @@ -41,6 +41,7 @@ hline :: PandocMonad m => LP m () hline = try $ do spaces controlSeq "hline" <|> + (controlSeq "cline" <* braced) <|> -- booktabs rules: controlSeq "toprule" <|> controlSeq "bottomrule" <|> -- cgit v1.2.3 From 46099e79defe662e541b12548200caf29063c1c6 Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 16 Jul 2021 13:10:45 -0700 Subject: DocBook reader: handle images with imageobjectco elements. Closes #7440. --- src/Text/Pandoc/Readers/DocBook.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/Text/Pandoc/Readers/DocBook.hs b/src/Text/Pandoc/Readers/DocBook.hs index a31819ece..c49b82ccf 100644 --- a/src/Text/Pandoc/Readers/DocBook.hs +++ b/src/Text/Pandoc/Readers/DocBook.hs @@ -741,9 +741,9 @@ getMediaobject e = do figTitle <- gets dbFigureTitle ident <- gets dbFigureId (imageUrl, attr) <- - case filterChild (named "imageobject") e of - Nothing -> return (mempty, nullAttr) - Just z -> case filterChild (named "imagedata") z of + case filterElements (named "imageobject") e of + [] -> return (mempty, nullAttr) + (z:_) -> case filterChild (named "imagedata") z of Nothing -> return (mempty, nullAttr) Just i -> let atVal a = attrValue a i w = case atVal "width" of -- cgit v1.2.3