aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Writers
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2020-09-06 16:25:16 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2020-09-21 10:15:50 -0700
commite0984a43a99231e72c02a0a716c8d0315de9abdf (patch)
tree8531ef58c2470d372ff2427a6ae09a6284461471 /src/Text/Pandoc/Writers
parent89c577befb78b32a0884b6092e0415c0dcadab72 (diff)
downloadpandoc-e0984a43a99231e72c02a0a716c8d0315de9abdf.tar.gz
Add built-in citation support using new citeproc library.
This deprecates the use of the external pandoc-citeproc filter; citation processing is now built in to pandoc. * Add dependency on citeproc library. * Add Text.Pandoc.Citeproc module (and some associated unexported modules under Text.Pandoc.Citeproc). Exports `processCitations`. [API change] * Add data files needed for Text.Pandoc.Citeproc: default.csl in the data directory, and a citeproc directory that is just used at compile-time. Note that we've added file-embed as a mandatory rather than a conditional depedency, because of the biblatex localization files. We might eventually want to use readDataFile for this, but it would take some code reorganization. * Text.Pandoc.Loging: Add `CiteprocWarning` to `LogMessage` and use it in `processCitations`. [API change] * Add tests from the pandoc-citeproc package as command tests (including some tests pandoc-citeproc did not pass). * Remove instructions for building pandoc-citeproc from CI and release binary build instructions. We will no longer distribute pandoc-citeproc. * Markdown reader: tweak abbreviation support. Don't insert a nonbreaking space after a potential abbreviation if it comes right before a note or citation. This messes up several things, including citeproc's moving of note citations. * Add `csljson` as and input and output format. This allows pandoc to convert between `csljson` and other bibliography formats, and to generate formatted versions of CSL JSON bibliographies. * Add module Text.Pandoc.Writers.CslJson, exporting `writeCslJson`. [API change] * Add module Text.Pandoc.Readers.CslJson, exporting `readCslJson`. [API change] * Added `bibtex`, `biblatex` as input formats. This allows pandoc to convert between BibLaTeX and BibTeX and other bibliography formats, and to generated formatted versions of BibTeX/BibLaTeX bibliographies. * Add module Text.Pandoc.Readers.BibTeX, exporting `readBibTeX` and `readBibLaTeX`. [API change] * Make "standalone" implicit if output format is a bibliography format. This is needed because pandoc readers for bibliography formats put the bibliographic information in the `references` field of metadata; and unless standalone is specified, metadata gets ignored. (TODO: This needs improvement. We should trigger standalone for the reader when the input format is bibliographic, and for the writer when the output format is markdown.) * Carry over `citationNoteNum` to `citationNoteNumber`. This was just ignored in pandoc-citeproc. * Text.Pandoc.Filter: Add `CiteprocFilter` constructor to Filter. [API change] This runs the processCitations transformation. We need to treat it like a filter so it can be placed in the sequence of filter runs (after some, before others). In FromYAML, this is parsed from `citeproc` or `{type: citeproc}`, so this special filter may be specified either way in a defaults file (or by `citeproc: true`, though this gives no control of positioning relative to other filters). TODO: we need to add something to the manual section on defaults files for this. * Add deprecation warning if `upandoc-citeproc` filter is used. * Add `--citeproc/-C` option to trigger citation processing. This behaves like a filter and will be positioned relative to filters as they appear on the command line. * Rewrote the manual on citatations, adding a dedicated Citations section which also includes some information formerly found in the pandoc-citeproc man page. * Look for CSL styles in the `csl` subdirectory of the pandoc user data directory. This changes the old pandoc-citeproc behavior, which looked in `~/.csl`. Users can simply symlink `~/.csl` to the `csl` subdirectory of their pandoc user data directory if they want the old behavior. * Add support for CSL bibliography entry formatting to LaTeX, HTML, Ms writers. Added CSL-related CSS to styles.html.
Diffstat (limited to 'src/Text/Pandoc/Writers')
-rw-r--r--src/Text/Pandoc/Writers/CslJson.hs87
-rw-r--r--src/Text/Pandoc/Writers/Docx.hs12
-rw-r--r--src/Text/Pandoc/Writers/HTML.hs79
-rw-r--r--src/Text/Pandoc/Writers/LaTeX.hs43
-rw-r--r--src/Text/Pandoc/Writers/Ms.hs64
5 files changed, 248 insertions, 37 deletions
diff --git a/src/Text/Pandoc/Writers/CslJson.hs b/src/Text/Pandoc/Writers/CslJson.hs
new file mode 100644
index 000000000..9f6f2f8ea
--- /dev/null
+++ b/src/Text/Pandoc/Writers/CslJson.hs
@@ -0,0 +1,87 @@
+{-# LANGUAGE OverloadedStrings #-}
+{- |
+ Module : Text.Pandoc.Writers.CslJson
+ Copyright : Copyright (C) 2020 John MacFarlane
+ License : GNU GPL, version 2 or above
+
+ Maintainer : John MacFarlane <jgm@berkeley.edu>
+ Stability : alpha
+ Portability : portable
+
+Conversion of references from 'Pandoc' metadata to CSL JSON:
+<https://citeproc-js.readthedocs.io/en/latest/csl-json/markup.html>.
+
+Note that this writer ignores everything in the body of the
+document and everything in the metadata except `references`.
+It assumes that the `references` field is a list with the structure
+of a CSL JSON bibliography.
+-}
+module Text.Pandoc.Writers.CslJson ( writeCslJson )
+where
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Text.Pandoc.UTF8 as UTF8
+import Text.Pandoc.Error
+import Text.Pandoc.Class
+import Control.Monad.Except (throwError)
+import Data.ByteString.Lazy (toStrict)
+import Data.ByteString (ByteString)
+import Text.Pandoc.Definition
+import Text.Pandoc.Builder as B
+import Text.Pandoc.Citeproc.MetaValue (metaValueToReference, metaValueToText)
+import Citeproc (parseLang, Locale, Reference(..), Lang(..))
+import Control.Monad.Identity
+import Citeproc.Locale (getLocale)
+import Citeproc.CslJson
+import Text.Pandoc.Options (WriterOptions)
+import Data.Maybe (fromMaybe, mapMaybe)
+import Data.Aeson.Encode.Pretty (Config (..), Indent (Spaces),
+ NumberFormat (Generic),
+ defConfig, encodePretty')
+
+writeCslJson :: PandocMonad m => WriterOptions -> Pandoc -> m Text
+writeCslJson _opts (Pandoc meta _) = do
+ let lang = fromMaybe (Lang "en" (Just "US")) $
+ parseLang <$> (lookupMeta "lang" meta >>= metaValueToText)
+ locale <- case getLocale lang of
+ Left e -> throwError $ PandocCiteprocError e
+ Right l -> return l
+ case lookupMeta "references" meta of
+ Just (MetaList rs) -> return $ (UTF8.toText $
+ toCslJson locale (mapMaybe metaValueToReference rs)) <> "\n"
+ _ -> throwError $ PandocAppError "No references field"
+
+fromInlines :: [Inline] -> CslJson Text
+fromInlines = foldMap fromInline . B.fromList
+
+fromInline :: Inline -> CslJson Text
+fromInline (Str t) = CslText t
+fromInline (Emph ils) = CslItalic (fromInlines ils)
+fromInline (Strong ils) = CslBold (fromInlines ils)
+fromInline (Underline ils) = CslUnderline (fromInlines ils)
+fromInline (Strikeout ils) = fromInlines ils
+fromInline (Superscript ils) = CslSup (fromInlines ils)
+fromInline (Subscript ils) = CslSub (fromInlines ils)
+fromInline (SmallCaps ils) = CslSmallCaps (fromInlines ils)
+fromInline (Quoted _ ils) = CslQuoted (fromInlines ils)
+fromInline (Cite _ ils) = fromInlines ils
+fromInline (Code _ t) = CslText t
+fromInline Space = CslText " "
+fromInline SoftBreak = CslText " "
+fromInline LineBreak = CslText "\n"
+fromInline (Math _ t) = CslText t
+fromInline (RawInline _ _) = CslEmpty
+fromInline (Link _ ils _) = fromInlines ils
+fromInline (Image _ ils _) = fromInlines ils
+fromInline (Note _) = CslEmpty
+fromInline (Span (_,[cl],_) ils)
+ | "csl-" `T.isPrefixOf` cl = CslDiv cl (fromInlines ils)
+fromInline (Span _ ils) = fromInlines ils
+
+toCslJson :: Locale -> [Reference Inlines] -> ByteString
+toCslJson locale = toStrict .
+ encodePretty' defConfig{ confIndent = Spaces 2
+ , confCompare = compare
+ , confNumFormat = Generic }
+ . map (runIdentity . traverse (return . renderCslJson locale . foldMap fromInline))
+
diff --git a/src/Text/Pandoc/Writers/Docx.hs b/src/Text/Pandoc/Writers/Docx.hs
index fa7e2ceea..89a50125b 100644
--- a/src/Text/Pandoc/Writers/Docx.hs
+++ b/src/Text/Pandoc/Writers/Docx.hs
@@ -1184,6 +1184,18 @@ inlineToOpenXML' _ (Str str) =
formattedString str
inlineToOpenXML' opts Space = inlineToOpenXML opts (Str " ")
inlineToOpenXML' opts SoftBreak = inlineToOpenXML opts (Str " ")
+inlineToOpenXML' opts (Span ("",["csl-block"],[]) ils) =
+ inlinesToOpenXML opts ils
+inlineToOpenXML' opts (Span ("",["csl-left-margin"],[]) ils) =
+ inlinesToOpenXML opts ils
+inlineToOpenXML' opts (Span ("",["csl-right-inline"],[]) ils) =
+ ([mknode "w:r" []
+ (mknode "w:t"
+ [("xml:space","preserve")]
+ ("\t" :: String))] ++)
+ <$> inlinesToOpenXML opts ils
+inlineToOpenXML' opts (Span ("",["csl-indent"],[]) ils) =
+ inlinesToOpenXML opts ils
inlineToOpenXML' _ (Span (ident,["comment-start"],kvs) ils) = do
-- prefer the "id" in kvs, since that is the one produced by the docx
-- reader.
diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs
index b40765145..b6bde7f8f 100644
--- a/src/Text/Pandoc/Writers/HTML.hs
+++ b/src/Text/Pandoc/Writers/HTML.hs
@@ -86,6 +86,8 @@ data WriterState = WriterState
, stSlideLevel :: Int -- ^ Slide level
, stInSection :: Bool -- ^ Content is in a section (revealjs)
, stCodeBlockNum :: Int -- ^ Number of code block
+ , stCsl :: Bool -- ^ Has CSL references
+ , stCslEntrySpacing :: Maybe Int -- ^ CSL entry spacing
}
defaultWriterState :: WriterState
@@ -96,7 +98,9 @@ defaultWriterState = WriterState {stNotes= [], stMath = False, stQuotes = False,
stSlideVariant = NoSlides,
stSlideLevel = 1,
stInSection = False,
- stCodeBlockNum = 0}
+ stCodeBlockNum = 0,
+ stCsl = False,
+ stCslEntrySpacing = Nothing}
-- Helpers to render HTML with the appropriate function.
@@ -316,39 +320,48 @@ pandocToHtml opts (Pandoc meta blocks) = do
Just sty -> defField "highlighting-css"
(T.pack $ styleToCss sty)
Nothing -> id
- else id) $
+ else id) .
+ (if stCsl st
+ then defField "csl-css" True .
+ (case stCslEntrySpacing st of
+ Nothing -> id
+ Just 0 -> id
+ Just n ->
+ defField "csl-entry-spacing"
+ (tshow n <> "em"))
+ else id) .
(if stMath st
then defField "math" (renderHtml' math)
- else id) $
+ else id) .
(case writerHTMLMathMethod opts of
MathJax u -> defField "mathjax" True .
defField "mathjaxurl"
(T.takeWhile (/='?') u)
- _ -> defField "mathjax" False) $
+ _ -> defField "mathjax" False) .
(case writerHTMLMathMethod opts of
PlainMath -> defField "displaymath-css" True
WebTeX _ -> defField "displaymath-css" True
- _ -> id) $
- defField "document-css" (isNothing mCss && slideVariant == NoSlides) $
- defField "quotes" (stQuotes st) $
+ _ -> id) .
+ defField "document-css" (isNothing mCss && slideVariant == NoSlides) .
+ defField "quotes" (stQuotes st) .
-- for backwards compatibility we populate toc
-- with the contents of the toc, rather than a
-- boolean:
- maybe id (defField "toc") toc $
- maybe id (defField "table-of-contents") toc $
- defField "author-meta" authsMeta $
+ maybe id (defField "toc") toc .
+ maybe id (defField "table-of-contents") toc .
+ defField "author-meta" authsMeta .
maybe id (defField "date-meta")
- (normalizeDate dateMeta) $
+ (normalizeDate dateMeta) .
defField "pagetitle"
- (stringifyHTML . docTitle $ meta) $
- defField "idprefix" (writerIdentifierPrefix opts) $
+ (stringifyHTML . docTitle $ meta) .
+ defField "idprefix" (writerIdentifierPrefix opts) .
-- these should maybe be set in pandoc.hs
defField "slidy-url"
- ("https://www.w3.org/Talks/Tools/Slidy2" :: Text) $
- defField "slideous-url" ("slideous" :: Text) $
+ ("https://www.w3.org/Talks/Tools/Slidy2" :: Text) .
+ defField "slideous-url" ("slideous" :: Text) .
defField "revealjs-url" ("https://unpkg.com/reveal.js@^4/" :: Text) $
- defField "s5-url" ("s5/default" :: Text) $
- defField "html5" (stHtml5 st)
+ defField "s5-url" ("s5/default" :: Text) .
+ defField "html5" (stHtml5 st) $
metadata
return (thebody, context)
@@ -743,12 +756,17 @@ blockToHtml opts (Div (ident, "section":dclasses, dkvs)
blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do
html5 <- gets stHtml5
slideVariant <- gets stSlideVariant
+ let isCslBibBody = ident == "refs" || "csl-bib-body" `elem` classes
+ when isCslBibBody $ modify $ \st -> st{ stCsl = True
+ , stCslEntrySpacing =
+ lookup "entry-spacing" kvs' >>=
+ safeRead }
+ let isCslBibEntry = "csl-entry" `elem` classes
let kvs = [(k,v) | (k,v) <- kvs', k /= "width"] ++
[("style", "width:" <> w <> ";") | "column" `elem` classes,
("width", w) <- kvs'] ++
- [("role", "doc-bibliography") | ident == "refs" && html5] ++
- [("role", "doc-biblioentry")
- | "ref-" `T.isPrefixOf` ident && html5]
+ [("role", "doc-bibliography") | isCslBibBody && html5] ++
+ [("role", "doc-biblioentry") | isCslBibEntry && html5]
let speakerNotes = "notes" `elem` classes
-- we don't want incremental output inside speaker notes, see #1394
let opts' = if | speakerNotes -> opts{ writerIncremental = False }
@@ -765,7 +783,9 @@ blockToHtml opts (Div attr@(ident, classes, kvs') bs) = do
-- a newline between the column divs, which throws
-- off widths! see #4028
mconcat <$> mapM (blockToHtml opts) bs
- else blockListToHtml opts' bs
+ else if isCslBibEntry
+ then mconcat <$> mapM (cslEntryToHtml opts') bs
+ else blockListToHtml opts' bs
let contents' = nl opts >> contents >> nl opts
let (divtag, classes'') = if html5 && "section" `elem` classes'
then (H5.section, filter (/= "section") classes')
@@ -1439,6 +1459,23 @@ blockListToNote opts ref blocks = do
_ -> noteItem
return $ nl opts >> noteItem'
+cslEntryToHtml :: PandocMonad m
+ => WriterOptions
+ -> Block
+ -> StateT WriterState m Html
+cslEntryToHtml opts (Para xs) = do
+ html5 <- gets stHtml5
+ let inDiv :: Text -> Html -> Html
+ inDiv cls x = ((if html5 then H5.div else H.div)
+ x ! A.class_ (toValue cls))
+ let go (Span ("",[cls],[]) ils)
+ | cls == "csl-block" || cls == "csl-left-margin" ||
+ cls == "csl-right-inline" || cls == "csl-indent"
+ = inDiv cls <$> inlineListToHtml opts ils
+ go il = inlineToHtml opts il
+ mconcat <$> mapM go xs
+cslEntryToHtml opts x = blockToHtml opts x
+
isMathEnvironment :: Text -> Bool
isMathEnvironment s = "\\begin{" `T.isPrefixOf` s &&
envName `elem` mathmlenvs
diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs
index 228b34d09..a4003b672 100644
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@@ -71,7 +71,6 @@ data WriterState =
, stBeamer :: Bool -- produce beamer
, stEmptyLine :: Bool -- true if no content on line
, stHasCslRefs :: Bool -- has a Div with class refs
- , stCslHangingIndent :: Bool -- use hanging indent for bib
, stIsFirstInDefinition :: Bool -- first block in a defn list
}
@@ -103,7 +102,6 @@ startingState options = WriterState {
, stBeamer = False
, stEmptyLine = True
, stHasCslRefs = False
- , stCslHangingIndent = False
, stIsFirstInDefinition = False }
-- | Convert Pandoc to LaTeX.
@@ -243,7 +241,6 @@ pandocToLaTeX options (Pandoc meta blocks) = do
else defField "dir" ("ltr" :: Text)) $
defField "section-titles" True $
defField "csl-refs" (stHasCslRefs st) $
- defField "csl-hanging-indent" (stCslHangingIndent st) $
defField "geometry" geometryFromMargins $
(case T.uncons . render Nothing <$>
getField "papersize" metadata of
@@ -541,16 +538,23 @@ blockToLaTeX (Div (identifier,classes,kvs) bs) = do
then modify $ \st -> st{ stIncremental = True }
else when (beamer && "nonincremental" `elem` classes) $
modify $ \st -> st { stIncremental = False }
- result <- if identifier == "refs"
+ result <- if identifier == "refs" || -- <- for backwards compatibility
+ "csl-bib-body" `elem` classes
then do
+ modify $ \st -> st{ stHasCslRefs = True }
inner <- blockListToLaTeX bs
- modify $ \st -> st{ stHasCslRefs = True
- , stCslHangingIndent =
- "hanging-indent" `elem` classes }
- return $ "\\begin{cslreferences}" $$
- inner $$
- "\\end{cslreferences}"
- else blockListToLaTeX bs
+ return $ "\\begin{CSLReferences}" <>
+ (if "hanging-indent" `elem` classes
+ then braces "1"
+ else braces "0") <>
+ (case lookup "entry-spacing" kvs of
+ Nothing -> braces "0"
+ Just s -> braces (literal s))
+ $$ inner
+ $+$ "\\end{CSLReferences}"
+ else if "csl-entry" `elem` classes
+ then vcat <$> mapM cslEntryToLaTeX bs
+ else blockListToLaTeX bs
modify $ \st -> st{ stIncremental = oldIncremental }
linkAnchor' <- hypertarget True identifier empty
-- see #2704 for the motivation for adding \leavevmode:
@@ -1151,6 +1155,23 @@ isQuoted :: Inline -> Bool
isQuoted (Quoted _ _) = True
isQuoted _ = False
+cslEntryToLaTeX :: PandocMonad m
+ => Block
+ -> LW m (Doc Text)
+cslEntryToLaTeX (Para xs) =
+ mconcat <$> mapM go xs
+ where
+ go (Span ("",["csl-block"],[]) ils) =
+ (cr <>) . inCmd "CSLBlock" <$> inlineListToLaTeX ils
+ go (Span ("",["csl-left-margin"],[]) ils) =
+ inCmd "CSLLeftMargin" <$> inlineListToLaTeX ils
+ go (Span ("",["csl-right-inline"],[]) ils) =
+ (cr <>) . inCmd "CSLRightInline" <$> inlineListToLaTeX ils
+ go (Span ("",["csl-indent"],[]) ils) =
+ (cr <>) . inCmd "CSLIndent" <$> inlineListToLaTeX ils
+ go il = inlineToLaTeX il
+cslEntryToLaTeX x = blockToLaTeX x
+
-- | Convert inline element to LaTeX
inlineToLaTeX :: PandocMonad m
=> Inline -- ^ Inline to convert
diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs
index f3aadde59..dbf7a3d79 100644
--- a/src/Text/Pandoc/Writers/Ms.hs
+++ b/src/Text/Pandoc/Writers/Ms.hs
@@ -110,16 +110,37 @@ blockToMs :: PandocMonad m
-> Block -- ^ Block element
-> MS m (Doc Text)
blockToMs _ Null = return empty
-blockToMs opts (Div (ident,_,_) bs) = do
+blockToMs opts (Div (ident,cls,kvs) bs) = do
let anchor = if T.null ident
then empty
else nowrap $
literal ".pdfhref M "
<> doubleQuotes (literal (toAscii ident))
- setFirstPara
- res <- blockListToMs opts bs
- setFirstPara
- return $ anchor $$ res
+ case cls of
+ _ | "csl-entry" `elem` cls ->
+ (".CSLENTRY" $$) . vcat <$> mapM (cslEntryToMs True opts) bs
+ | "csl-bib-body" `elem` cls -> do
+ res <- blockListToMs opts bs
+ return $ anchor $$
+ -- so that XP paragraphs are indented:
+ ".nr PI 3n" $$
+ -- space between entries
+ ".de CSLENTRY" $$
+ (case lookup "entry-spacing" kvs >>= safeRead of
+ Just n | n > (0 :: Int) -> ".sp"
+ _ -> mempty) $$
+ ".." $$
+ ".de CSLP" $$
+ (if "hanging-indent" `elem` cls
+ then ".XP"
+ else ".LP") $$
+ ".." $$
+ res
+ _ -> do
+ setFirstPara
+ res <- blockListToMs opts bs
+ setFirstPara
+ return $ anchor $$ res
blockToMs opts (Plain inlines) =
liftM vcat $ mapM (inlineListToMs' opts) $ splitSentences inlines
blockToMs opts (Para [Image attr alt (src,_tit)])
@@ -440,6 +461,39 @@ inlineToMs _ (Note contents) = do
modify $ \st -> st{ stNotes = contents : stNotes st }
return $ literal "\\**"
+cslEntryToMs :: PandocMonad m
+ => Bool
+ -> WriterOptions
+ -> Block
+ -> MS m (Doc Text)
+cslEntryToMs atStart opts (Para xs) =
+ case xs of
+ (Span ("",["csl-left-margin"],[]) lils :
+ rest@(Span ("",["csl-right-inline"],[]) _ : _))
+ -> do lils' <- inlineListToMs' opts lils
+ ((cr <> literal ".IP " <>
+ doubleQuotes (nowrap lils') <>
+ literal " 5") $$)
+ <$> cslEntryToMs False opts (Para rest)
+ (Span ("",["csl-block"],[]) ils : rest)
+ -> ((cr <> literal ".LP") $$)
+ <$> cslEntryToMs False opts (Para (ils ++ rest))
+ (Span ("",["csl-left-margin"],[]) ils : rest)
+ -> ((cr <> literal ".LP") $$)
+ <$> cslEntryToMs False opts (Para (ils ++ rest))
+ (Span ("",["csl-indented"],[]) ils : rest)
+ -> ((cr <> literal ".LP") $$)
+ <$> cslEntryToMs False opts (Para (ils ++ rest))
+ _ | atStart
+ -> (".CSLP" $$) <$> cslEntryToMs False opts (Para xs)
+ | otherwise
+ -> case xs of
+ [] -> return mempty
+ (x:rest) -> (<>) <$> (inlineToMs opts x)
+ <*> (cslEntryToMs False opts (Para rest))
+cslEntryToMs _ opts x = blockToMs opts x
+
+
handleNotes :: PandocMonad m => WriterOptions -> Doc Text -> MS m (Doc Text)
handleNotes opts fallback = do
notes <- gets stNotes