diff options
author | John MacFarlane <jgm@berkeley.edu> | 2018-09-30 22:32:00 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2018-09-30 22:32:00 -0700 |
commit | 36f1846cc3130dbe4168789cc03f916ebf5828c8 (patch) | |
tree | 8b28a2616f8948fcdb0e8d42a176234e9e3784aa /src/Text/Pandoc/Writers | |
parent | 0a8d212a097267cafc4cd5a64691b8e85aadb5c3 (diff) | |
download | pandoc-36f1846cc3130dbe4168789cc03f916ebf5828c8.tar.gz |
Implement `--ascii` (`writerPreferAscii`) in writers, not App.
Now the `write*` functions for Docbook, HTML, ICML, JATS,
Man, Ms, OPML are sensitive to `writerPreferAscii`. Previously
the to-ascii translation was done in Text.Pandoc.App, and
thus not available to those using the writer functions
directly.
In addition, the LaTeX writer is now sensitive to
`writerPreferAscii` and to `--ascii`. 100% ASCII
output can't be guaranteed, but the writer will use
commands like `\"{a}` and `\l` whenever possible,
to avoid emiting a non-ASCII character.
A new unexported module, Text.Pandoc.Groff, has been
added to store functions used in the different groff-based
writers.
Diffstat (limited to 'src/Text/Pandoc/Writers')
-rw-r--r-- | src/Text/Pandoc/Writers/Docbook.hs | 7 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 18 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/ICML.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/JATS.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/LaTeX.hs | 144 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Man.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/Ms.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/OPML.hs | 3 |
8 files changed, 132 insertions, 54 deletions
diff --git a/src/Text/Pandoc/Writers/Docbook.hs b/src/Text/Pandoc/Writers/Docbook.hs index f6e814095..3306e4f31 100644 --- a/src/Text/Pandoc/Writers/Docbook.hs +++ b/src/Text/Pandoc/Writers/Docbook.hs @@ -126,9 +126,10 @@ writeDocbook opts (Pandoc meta blocks) = do defField "mathml" (case writerHTMLMathMethod opts of MathML -> True _ -> False) metadata - case writerTemplate opts of - Nothing -> return main - Just tpl -> renderTemplate' tpl context + (if writerPreferAscii opts then toEntities else id) <$> + case writerTemplate opts of + Nothing -> return main + Just tpl -> renderTemplate' tpl context -- | Convert an Element to Docbook. elementToDocbook :: PandocMonad m => WriterOptions -> Int -> Element -> DB m Doc diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index c7f25197f..19ec4692e 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -75,7 +75,7 @@ import Text.Pandoc.Templates import Text.Pandoc.Walk import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared -import Text.Pandoc.XML (escapeStringForXML, fromEntities) +import Text.Pandoc.XML (escapeStringForXML, fromEntities, toEntities) #if MIN_VERSION_blaze_markup(0,6,3) #else import Text.Blaze.Internal (preEscapedString, preEscapedText) @@ -206,7 +206,8 @@ writeHtmlString' :: PandocMonad m => WriterState -> WriterOptions -> Pandoc -> m Text writeHtmlString' st opts d = do (body, context) <- evalStateT (pandocToHtml opts d) st - case writerTemplate opts of + (if writerPreferAscii opts then toEntities else id) <$> + case writerTemplate opts of Nothing -> return $ renderHtml' body Just tpl -> do -- warn if empty lang @@ -221,16 +222,19 @@ writeHtmlString' st opts d = do lookup "sourcefile" (writerVariables opts) report $ NoTitleElement fallback return $ resetField "pagetitle" fallback context - renderTemplate' tpl $ - defField "body" (renderHtml' body) context' + renderTemplate' tpl + (defField "body" (renderHtml' body) context') writeHtml' :: PandocMonad m => WriterState -> WriterOptions -> Pandoc -> m Html writeHtml' st opts d = case writerTemplate opts of Just _ -> preEscapedText <$> writeHtmlString' st opts d - Nothing -> do - (body, _) <- evalStateT (pandocToHtml opts d) st - return body + Nothing + | writerPreferAscii opts + -> preEscapedText <$> writeHtmlString' st opts d + | otherwise -> do + (body, _) <- evalStateT (pandocToHtml opts d) st + return body -- result is (title, authors, date, toc, body, new variables) pandocToHtml :: PandocMonad m diff --git a/src/Text/Pandoc/Writers/ICML.hs b/src/Text/Pandoc/Writers/ICML.hs index b8fc0dc94..ef1e2af0a 100644 --- a/src/Text/Pandoc/Writers/ICML.hs +++ b/src/Text/Pandoc/Writers/ICML.hs @@ -149,7 +149,8 @@ writeICML opts (Pandoc meta blocks) = do $ defField "charStyles" (render' $ charStylesToDoc st) $ defField "parStyles" (render' $ parStylesToDoc st) $ defField "hyperlinks" (render' $ hyperlinksToDoc $ links st) metadata - case writerTemplate opts of + (if writerPreferAscii opts then toEntities else id) <$> + case writerTemplate opts of Nothing -> return main Just tpl -> renderTemplate' tpl context diff --git a/src/Text/Pandoc/Writers/JATS.hs b/src/Text/Pandoc/Writers/JATS.hs index f55a49d4e..4e78a4cce 100644 --- a/src/Text/Pandoc/Writers/JATS.hs +++ b/src/Text/Pandoc/Writers/JATS.hs @@ -102,7 +102,8 @@ docToJATS opts (Pandoc meta blocks) = do $ defField "mathml" (case writerHTMLMathMethod opts of MathML -> True _ -> False) metadata - case writerTemplate opts of + (if writerPreferAscii opts then toEntities else id) <$> + case writerTemplate opts of Nothing -> return main Just tpl -> renderTemplate' tpl context diff --git a/src/Text/Pandoc/Writers/LaTeX.hs b/src/Text/Pandoc/Writers/LaTeX.hs index 7be3fce28..c1b5d0fa4 100644 --- a/src/Text/Pandoc/Writers/LaTeX.hs +++ b/src/Text/Pandoc/Writers/LaTeX.hs @@ -42,8 +42,9 @@ import Data.Aeson (FromJSON, object, (.=)) import Data.Char (isAlphaNum, isAscii, isDigit, isLetter, isPunctuation, ord, toLower) import Data.List (foldl', intercalate, intersperse, isInfixOf, nubBy, - stripPrefix, (\\)) + stripPrefix, (\\), uncons) import Data.Maybe (catMaybes, fromMaybe, isJust, mapMaybe, isNothing) +import qualified Data.Map as M import Data.Text (Text) import qualified Data.Text as T import Network.URI (unEscapeString) @@ -63,6 +64,7 @@ import Text.Pandoc.Walk import Text.Pandoc.Writers.Shared import qualified Text.Parsec as P import Text.Printf (printf) +import qualified Data.Text.Normalize as Normalize data WriterState = WriterState { stInNote :: Bool -- true if we're in a note @@ -318,46 +320,110 @@ data StringContext = TextString -- escape things as needed for LaTeX stringToLaTeX :: PandocMonad m => StringContext -> String -> LW m String -stringToLaTeX _ [] = return "" -stringToLaTeX ctx (x:xs) = do +stringToLaTeX context zs = do opts <- gets stOptions - rest <- stringToLaTeX ctx xs - let ligatures = isEnabled Ext_smart opts && ctx == TextString - let isUrl = ctx == URLString - return $ + go opts context $ + if writerPreferAscii opts + then T.unpack $ Normalize.normalize Normalize.NFD $ T.pack zs + else zs + where + go _ _ [] = return "" + go opts ctx (x:xs) = do + let ligatures = isEnabled Ext_smart opts && ctx == TextString + let isUrl = ctx == URLString + let mbAccentCmd = + if writerPreferAscii opts && ctx == TextString + then uncons xs >>= \(c,_) -> M.lookup c accents + else Nothing + let emits s = + case mbAccentCmd of + Just cmd -> ((cmd ++ "{" ++ s ++ "}") ++) + <$> go opts ctx (drop 1 xs) -- drop combining accent + Nothing -> (s++) <$> go opts ctx xs + let emitc c = + case mbAccentCmd of + Just cmd -> ((cmd ++ "{" ++ [c] ++ "}") ++) + <$> go opts ctx (drop 1 xs) -- drop combining accent + Nothing -> (c:) <$> go opts ctx xs case x of - '{' -> "\\{" ++ rest - '}' -> "\\}" ++ rest - '`' | ctx == CodeString -> "\\textasciigrave{}" ++ rest - '$' | not isUrl -> "\\$" ++ rest - '%' -> "\\%" ++ rest - '&' -> "\\&" ++ rest - '_' | not isUrl -> "\\_" ++ rest - '#' -> "\\#" ++ rest - '-' | not isUrl -> case xs of - -- prevent adjacent hyphens from forming ligatures - ('-':_) -> "-\\/" ++ rest - _ -> '-' : rest - '~' | not isUrl -> "\\textasciitilde{}" ++ rest - '^' -> "\\^{}" ++ rest - '\\'| isUrl -> '/' : rest -- NB. / works as path sep even on Windows - | otherwise -> "\\textbackslash{}" ++ rest - '|' | not isUrl -> "\\textbar{}" ++ rest - '<' -> "\\textless{}" ++ rest - '>' -> "\\textgreater{}" ++ rest - '[' -> "{[}" ++ rest -- to avoid interpretation as - ']' -> "{]}" ++ rest -- optional arguments - '\'' | ctx == CodeString -> "\\textquotesingle{}" ++ rest - '\160' -> "~" ++ rest - '\x202F' -> "\\," ++ rest - '\x2026' -> "\\ldots{}" ++ rest - '\x2018' | ligatures -> "`" ++ rest - '\x2019' | ligatures -> "'" ++ rest - '\x201C' | ligatures -> "``" ++ rest - '\x201D' | ligatures -> "''" ++ rest - '\x2014' | ligatures -> "---" ++ rest - '\x2013' | ligatures -> "--" ++ rest - _ -> x : rest + '{' -> emits "\\{" + '}' -> emits "\\}" + '`' | ctx == CodeString -> emits "\\textasciigrave{}" + '$' | not isUrl -> emits "\\$" + '%' -> emits "\\%" + '&' -> emits "\\&" + '_' | not isUrl -> emits "\\_" + '#' -> emits "\\#" + '-' | not isUrl -> case xs of + -- prevent adjacent hyphens from forming ligatures + ('-':_) -> emits "-\\/" + _ -> emitc '-' + '~' | not isUrl -> emits "\\textasciitilde{}" + '^' -> emits "\\^{}" + '\\'| isUrl -> emitc '/' -- NB. / works as path sep even on Windows + | otherwise -> emits "\\textbackslash{}" + '|' | not isUrl -> emits "\\textbar{}" + '<' -> emits "\\textless{}" + '>' -> emits "\\textgreater{}" + '[' -> emits "{[}" -- to avoid interpretation as + ']' -> emits "{]}" -- optional arguments + '\'' | ctx == CodeString -> emits "\\textquotesingle{}" + '\160' -> emits "~" + '\x202F' -> emits "\\," + '\x2026' -> emits "\\ldots{}" + '\x2018' | ligatures -> emits "`" + '\x2019' | ligatures -> emits "'" + '\x201C' | ligatures -> emits "``" + '\x201D' | ligatures -> emits "''" + '\x2014' | ligatures -> emits "---" + '\x2013' | ligatures -> emits "--" + _ | writerPreferAscii opts + -> case x of + 'ı' -> emits "\\i " + 'ȷ' -> emits "\\j " + 'å' -> emits "\\aa " + 'Å' -> emits "\\AA " + 'ß' -> emits "\\ss " + 'ø' -> emits "\\o " + 'Ø' -> emits "\\O " + 'Ł' -> emits "\\L " + 'ł' -> emits "\\l " + 'æ' -> emits "\\ae " + 'Æ' -> emits "\\AE " + 'œ' -> emits "\\oe " + 'Œ' -> emits "\\OE " + '£' -> emits "\\pounds " + '€' -> emits "\\euro " + '©' -> emits "\\copyright " + _ -> emitc x + | otherwise -> emitc x + +accents :: M.Map Char String +accents = M.fromList + [ ('\779' , "\\H") + , ('\768' , "\\`") + , ('\769' , "\\'") + , ('\770' , "\\^") + , ('\771' , "\\~") + , ('\776' , "\\\"") + , ('\775' , "\\.") + , ('\772' , "\\=") + , ('\781' , "\\|") + , ('\817' , "\\b") + , ('\807' , "\\c") + , ('\783' , "\\G") + , ('\777' , "\\h") + , ('\803' , "\\d") + , ('\785' , "\\f") + , ('\778' , "\\r") + , ('\865' , "\\t") + , ('\782' , "\\U") + , ('\780' , "\\v") + , ('\774' , "\\u") + , ('\808' , "\\k") + , ('\785' , "\\newtie") + , ('\8413', "\\textcircled") + ] toLabel :: PandocMonad m => String -> LW m String toLabel z = go `fmap` stringToLaTeX URLString z diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index be490bf22..b6b72d07f 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -45,6 +45,7 @@ import Text.Pandoc.Logging import Text.Pandoc.Options import Text.Pandoc.Pretty import Text.Pandoc.Shared +import Text.Pandoc.Groff (groffEscape) import Text.Pandoc.Templates import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared @@ -107,7 +108,8 @@ pandocToMan opts (Pandoc meta blocks) = do $ defField "has-tables" hasTables $ defField "hyphenate" True $ defField "pandoc-version" pandocVersion metadata - case writerTemplate opts of + (if writerPreferAscii opts then groffEscape else id) <$> + case writerTemplate opts of Nothing -> return main Just tpl -> renderTemplate' tpl context diff --git a/src/Text/Pandoc/Writers/Ms.hs b/src/Text/Pandoc/Writers/Ms.hs index 3dcf816b8..a29524bbb 100644 --- a/src/Text/Pandoc/Writers/Ms.hs +++ b/src/Text/Pandoc/Writers/Ms.hs @@ -60,6 +60,7 @@ import Text.Pandoc.Shared import Text.Pandoc.Templates import Text.Pandoc.Writers.Math import Text.Pandoc.Writers.Shared +import Text.Pandoc.Groff (groffEscape) import Text.Printf (printf) import Text.TeXMath (writeEqn) @@ -127,7 +128,8 @@ pandocToMs opts (Pandoc meta blocks) = do $ defField "title-meta" titleMeta $ defField "author-meta" (intercalate "; " authorsMeta) $ defField "highlighting-macros" highlightingMacros metadata - case writerTemplate opts of + (if writerPreferAscii opts then groffEscape else id) <$> + case writerTemplate opts of Nothing -> return main Just tpl -> renderTemplate' tpl context diff --git a/src/Text/Pandoc/Writers/OPML.hs b/src/Text/Pandoc/Writers/OPML.hs index 6c48046a2..716c5cbad 100644 --- a/src/Text/Pandoc/Writers/OPML.hs +++ b/src/Text/Pandoc/Writers/OPML.hs @@ -62,7 +62,8 @@ writeOPML opts (Pandoc meta blocks) = do meta' main <- (render colwidth . vcat) <$> mapM (elementToOPML opts) elements let context = defField "body" main metadata - case writerTemplate opts of + (if writerPreferAscii opts then toEntities else id) <$> + case writerTemplate opts of Nothing -> return main Just tpl -> renderTemplate' tpl context |