authorJohn MacFarlane <jgm@berkeley.edu>2021-01-26 22:45:57 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2021-01-26 22:45:57 -0800
commit98c2a52b4ee6c833c0a2f2652386cec024e377eb (patch)
tree0ab472a108a7ea001eb322e690dc5a2449caa554 /src/Text/Pandoc
parent12bc6625352aaece955f9f0700f88e9280721ced (diff)
Clean up BibTeX parsing.
Previously there was a messy code path that gave strange results in some cases, not passing through raw tex but trying to extract a string content. This was an artefact of trying to handle some special bibtex-specific commands in the BibTeX reader. Now we just handle these in the LaTeX reader and simplify parsing in the BibTeX reader. This does mean that more raw tex will be passed through (and currently this is not sensitive to the `raw_tex` extension; this should be fixed). Closes #7049.
diff --git a/src/Text/Pandoc/Citeproc/BibTeX.hs b/src/Text/Pandoc/Citeproc/BibTeX.hs
index 10730a1e9..5b9068378 100644
--- a/src/Text/Pandoc/Citeproc/BibTeX.hs
+++ b/src/Text/Pandoc/Citeproc/BibTeX.hs
@@ -750,41 +750,10 @@ blocksToInlines bs =
_ -> B.fromList $ Walk.query (:[]) bs
adjustSpans :: Lang -> Inline -> Inline
-adjustSpans lang (RawInline (Format "latex") s)
- | s == "\\hyphen" || s == "\\hyphen " = Str "-"
- | otherwise = parseRawLaTeX lang s
+adjustSpans lang (Span ("",[],[("bibstring",s)]) _) = Str $ resolveKey' lang s
adjustSpans _ SoftBreak = Space
adjustSpans _ x = x
-parseRawLaTeX :: Lang -> Text -> Inline
-parseRawLaTeX lang t@(T.stripPrefix "\\" -> Just xs) =
- case parseLaTeX lang contents of
- Right [Para ys] -> f command ys
- Right [Plain ys] -> f command ys
- Right [] -> f command []
- _ -> RawInline (Format "latex") t
- where (command', contents') = T.break (\c -> c =='{' || c =='\\') xs
- command = T.strip command'
- contents = T.drop 1 $ T.dropEnd 1 contents'
- f "mkbibquote" ils = Span nullAttr [Quoted DoubleQuote ils]
- f "mkbibemph" ils = Span nullAttr [Emph ils]
- f "mkbibitalic" ils = Span nullAttr [Emph ils]
- f "mkbibbold" ils = Span nullAttr [Strong ils]
- f "mkbibparens" ils = Span nullAttr $
- [Str "("] ++ ils ++ [Str ")"]
- f "mkbibbrackets" ils = Span nullAttr $
- [Str "["] ++ ils ++ [Str "]"]
- -- ... both should be nestable & should work in year fields
- f "autocap" ils = Span nullAttr ils
- -- TODO: should work in year fields
- f "textnormal" ils = Span ("",["nodecor"],[]) ils
- f "bibstring" [Str s] = Str $ resolveKey' lang s
- f "adddot" [] = Str "."
- f "adddotspace" [] = Span nullAttr [Str ".", Space]
- f "addabbrvspace" [] = Space
- f _ ils = Span nullAttr ils
-parseRawLaTeX _ t = RawInline (Format "latex") t
latex' :: Text -> Bib [Block]
latex' t = do
lang <- gets localeLang
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index f49323996..91c71c000 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -940,6 +940,24 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
, ("uline", underline <$> tok)
-- plain tex stuff that should just be passed through as raw tex
, ("ifdim", ifdim)
+ -- bibtex
+ , ("mkbibquote", spanWith nullAttr . doubleQuoted <$> tok)
+ , ("mkbibemph", spanWith nullAttr . emph <$> tok)
+ , ("mkbibitalic", spanWith nullAttr . emph <$> tok)
+ , ("mkbibbold", spanWith nullAttr . strong <$> tok)
+ , ("mkbibparens",
+ spanWith nullAttr . (\x -> str "(" <> x <> str ")") <$> tok)
+ , ("mkbibbrackets",
+ spanWith nullAttr . (\x -> str "[" <> x <> str "]") <$> tok)
+ , ("autocap", spanWith nullAttr <$> tok)
+ , ("textnormal", spanWith ("",["nodecor"],[]) <$> tok)
+ , ("bibstring",
+ (\x -> spanWith ("",[],[("bibstring",x)]) (str x)) . untokenize
+ <$> braced)
+ , ("adddot", pure (str "."))
+ , ("adddotspace", pure (spanWith nullAttr (str "." <> space)))
+ , ("addabbrvspace", pure space)
+ , ("hyphen", pure (str "-"))
accent :: PandocMonad m => Char -> Maybe Char -> LP m Inlines