From 8f408e4b7b24bfcf0f6a48522a2d77b0b878d34d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 6 Feb 2011 09:27:03 -0800 Subject: Added --ascii option. Currently supported only in HTML writer. --- README | 5 +++++ src/Text/Pandoc/Shared.hs | 2 ++ src/Text/Pandoc/Writers/HTML.hs | 37 ++++++++++++++++++++----------------- src/pandoc.hs | 13 +++++++++++-- 4 files changed, 38 insertions(+), 19 deletions(-) diff --git a/README b/README index 897d8b000..d14fd8938 100644 --- a/README +++ b/README @@ -292,6 +292,11 @@ Options `--columns`=*NUMBER* : Specify length of lines in characters (for text wrapping). +`--ascii` +: Use only ascii characters in output. Currently supported only + for HTML output (which uses numerical entities instead of + UTF-8 when this option is selected). + `--email-obfuscation=`*none|javascript|references* : Specify a method for obfuscating `mailto:` links in HTML documents. *none* leaves `mailto:` links as they are. *javascript* obfuscates diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index fec04a6c5..c4bc66830 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -490,6 +490,7 @@ data WriterOptions = WriterOptions , writerHtml5 :: Bool -- ^ Produce HTML5 , writerChapters :: Bool -- ^ Use "chapter" for top-level sects , writerListings :: Bool -- ^ Use listings package for code + , writerAscii :: Bool -- ^ Avoid non-ascii characters } deriving Show -- | Default writer options. @@ -522,6 +523,7 @@ defaultWriterOptions = , writerHtml5 = False , writerChapters = False , writerListings = False + , writerAscii = False } -- diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 1faeeea80..f9a10f355 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -43,6 +43,7 @@ import Data.List ( isPrefixOf, intersperse ) import Data.Maybe ( catMaybes ) import Control.Monad.State import Text.XHtml.Transitional hiding ( stringToHtml, unordList, ordList ) +import qualified Text.XHtml.Transitional as XHtml import Text.TeXMath import Text.XML.Light.Output @@ -60,8 +61,10 @@ defaultWriterState = WriterState {stNotes= [], stMath = False, stHighlighting = -- | Modified version of Text.XHtml's stringToHtml. -- Use unicode characters wherever possible. -stringToHtml :: String -> Html -stringToHtml = primHtml . escapeStringForXML +stringToHtml :: WriterOptions -> String -> Html +stringToHtml opts = if writerAscii opts + then XHtml.stringToHtml + else primHtml . escapeStringForXML -- | Hard linebreak. nl :: WriterOptions -> Html @@ -219,7 +222,7 @@ elementToListItem _ (Blk _) = return Nothing elementToListItem opts (Sec _ num id' headerText subsecs) = do let sectnum = if writerNumberSections opts then (thespan ! [theclass "toc-section-number"] << showSecNum num) +++ - stringToHtml " " + stringToHtml opts" " else noHtml txt <- liftM (sectnum +++) $ inlineListToHtml opts headerText subHeads <- mapM (elementToListItem opts) subsecs >>= return . catMaybes @@ -301,7 +304,7 @@ obfuscateLink opts txt s = linkText ++ "+'<\\/'+'a'+'>');\n// -->\n")) +++ noscript (primHtml $ obfuscateString altText) _ -> error $ "Unknown obfuscation method: " ++ show meth - _ -> anchor ! [href s] $ stringToHtml txt -- malformed email + _ -> anchor ! [href s] $ stringToHtml opts txt -- malformed email -- | Obfuscate character as entity. obfuscateChar :: Char -> String @@ -353,7 +356,7 @@ blockToHtml opts (CodeBlock (id',classes,keyvals) rawCode) = do else unlines . lines in return $ pre ! attrs $ thecode << (replicate (length leadingBreaks) br +++ - [stringToHtml $ addBird rawCode']) + [stringToHtml opts $ addBird rawCode']) Right h -> modify (\st -> st{ stHighlighting = True }) >> return h blockToHtml opts (BlockQuote blocks) = @@ -379,7 +382,7 @@ blockToHtml opts (Header level lst) = do secnum <- liftM stSecNum get let contents' = if writerNumberSections opts then (thespan ! [theclass "header-section-number"] << showSecNum secnum) +++ - stringToHtml " " +++ contents + stringToHtml opts " " +++ contents else contents let contents'' = if writerTableOfContents opts then anchor ! [href $ "#" ++ writerIdentifierPrefix opts ++ "TOC"] $ contents' @@ -512,19 +515,19 @@ inlineListToHtml opts lst = inlineToHtml :: WriterOptions -> Inline -> State WriterState Html inlineToHtml opts inline = case inline of - (Str str) -> return $ stringToHtml str - (Space) -> return $ stringToHtml " " + (Str str) -> return $ stringToHtml opts str + (Space) -> return $ stringToHtml opts " " (LineBreak) -> return br - (EmDash) -> return $ stringToHtml "—" - (EnDash) -> return $ stringToHtml "–" - (Ellipses) -> return $ stringToHtml "…" - (Apostrophe) -> return $ stringToHtml "’" + (EmDash) -> return $ stringToHtml opts "—" + (EnDash) -> return $ stringToHtml opts "–" + (Ellipses) -> return $ stringToHtml opts "…" + (Apostrophe) -> return $ stringToHtml opts "’" (Emph lst) -> inlineListToHtml opts lst >>= return . emphasize (Strong lst) -> inlineListToHtml opts lst >>= return . strong (Code attr str) -> case highlightHtml True attr str of Left _ -> return $ thecode ! (attrsToHtml opts attr) - $ stringToHtml str + $ stringToHtml opts str Right h -> return h (Strikeout lst) -> inlineListToHtml opts lst >>= return . (thespan ! [thestyle "text-decoration: line-through;"]) @@ -534,10 +537,10 @@ inlineToHtml opts inline = (Subscript lst) -> inlineListToHtml opts lst >>= return . sub (Quoted quoteType lst) -> let (leftQuote, rightQuote) = case quoteType of - SingleQuote -> (stringToHtml "‘", - stringToHtml "’") - DoubleQuote -> (stringToHtml "“", - stringToHtml "”") + SingleQuote -> (stringToHtml opts "‘", + stringToHtml opts "’") + DoubleQuote -> (stringToHtml opts "“", + stringToHtml opts "”") in do contents <- inlineListToHtml opts lst return $ leftQuote +++ contents +++ rightQuote (Math t str) -> modify (\st -> st {stMath = True}) >> diff --git a/src/pandoc.hs b/src/pandoc.hs index c0f457449..e73f05965 100644 --- a/src/pandoc.hs +++ b/src/pandoc.hs @@ -123,6 +123,7 @@ data Opt = Opt , optBibliography :: [String] , optCslFile :: FilePath , optListings :: Bool -- ^ Use listings package for code blocks + , optAscii :: Bool -- ^ Avoid using nonascii characters } -- | Defaults for command-line options. @@ -166,6 +167,7 @@ defaultOpts = Opt , optBibliography = [] , optCslFile = "" , optListings = False + , optAscii = False } -- | A list of functions, each transforming the options data structure @@ -347,6 +349,11 @@ options = "NUMBER") "" -- "Length of line in characters" + , Option "" ["ascii"] + (NoArg + (\opt -> return opt { optAscii = True })) + "" -- "Avoid using non-ascii characters in output" + , Option "" ["email-obfuscation"] (ReqArg (\arg opt -> do @@ -681,7 +688,8 @@ main = do , optBibliography = reffiles , optCslFile = cslfile , optCiteMethod = citeMethod - , optListings = listings + , optListings = listings + , optAscii = ascii } = opts when dumpArgs $ @@ -803,7 +811,8 @@ main = do writerHtml5 = html5 && "html" `isPrefixOf` writerName', writerChapters = chapters, - writerListings = listings } + writerListings = listings, + writerAscii = ascii } when (isNonTextOutput writerName' && outputFile == "-") $ do UTF8.hPutStrLn stderr ("Error: Cannot write " ++ writerName ++ " output to stdout.\n" ++ -- cgit v1.2.3