From 81881ce4708f06ca53088d95ff249b48f2b2e88d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 9 May 2018 10:00:34 -0700 Subject: Parsing: Lookahead for non-whitespace after single/double quote start. Closes #4637. --- src/Text/Pandoc/Parsing.hs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index fa6baf1c7..05f4f7d36 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -1366,7 +1366,9 @@ singleQuoteStart = do failIfInQuoteContext InSingleQuote -- single quote start can't be right after str guard =<< notAfterString - () <$ charOrRef "'\8216\145" + try $ do + charOrRef "'\8216\145" + notFollowedBy (oneOf [' ', '\t', '\n']) singleQuoteEnd :: Stream s m Char => ParserT s st m () @@ -1379,7 +1381,7 @@ doubleQuoteStart :: (HasQuoteContext st m, Stream s m Char) doubleQuoteStart = do failIfInQuoteContext InDoubleQuote try $ do charOrRef "\"\8220\147" - notFollowedBy . satisfy $ flip elem [' ', '\t', '\n'] + notFollowedBy (oneOf [' ', '\t', '\n']) doubleQuoteEnd :: Stream s m Char => ParserT s st m () -- cgit v1.2.3 From d3be567a73478298485f66b2d2af5ca066eae052 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 9 May 2018 10:37:04 -0700 Subject: Fix regression with tex math environments in HTML + MathJax. Closes #4639. --- src/Text/Pandoc/Writers/HTML.hs | 16 +++++----------- test/command/4639.md | 10 ++++++++++ 2 files changed, 15 insertions(+), 11 deletions(-) create mode 100644 test/command/4639.md (limited to 'src/Text') diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 646168c72..535071ae2 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -665,16 +665,11 @@ blockToHtml opts (Para [Image attr@(_,classes,_) txt (src,tit)]) -- title beginning with fig: indicates that the image is a figure blockToHtml opts (Para [Image attr txt (s,'f':'i':'g':':':tit)]) = figure opts attr txt (s,tit) -blockToHtml opts (Para lst) - | isEmptyRaw lst = return mempty - | null lst && not (isEnabled Ext_empty_paragraphs opts) = return mempty - | otherwise = do - contents <- inlineListToHtml opts lst - return $ H.p contents - where - isEmptyRaw [RawInline f _] = f `notElem` [Format "html", - Format "html4", Format "html5"] - isEmptyRaw _ = False +blockToHtml opts (Para lst) = do + contents <- inlineListToHtml opts lst + case contents of + Empty _ | not (isEnabled Ext_empty_paragraphs opts) -> return mempty + _ -> return $ H.p contents blockToHtml opts (LineBlock lns) = if writerWrapText opts == WrapNone then blockToHtml opts $ linesToPara lns @@ -1063,7 +1058,6 @@ inlineToHtml opts inline = do if ishtml then return $ preEscapedString str else if (f == Format "latex" || f == Format "tex") && - "\\begin" `isPrefixOf` str && allowsMathEnvironments (writerHTMLMathMethod opts) && isMathEnvironment str then inlineToHtml opts $ Math DisplayMath str diff --git a/test/command/4639.md b/test/command/4639.md new file mode 100644 index 000000000..c35df1749 --- /dev/null +++ b/test/command/4639.md @@ -0,0 +1,10 @@ +``` +% pandoc -t html --mathjax +\begin{equation} + E=mc^2 +\end{equation} +^D +

\[\begin{equation} + E=mc^2 +\end{equation}\]

+``` -- cgit v1.2.3 From b7356d3ddf788d83b41b1100d718087bd83a7506 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 9 May 2018 11:29:20 -0700 Subject: Restored and undeprecated gladtex for HTML math. - Added `GladTeX` constructor to `Text.Pandoc.Options.HTMLMathMethod` [API change, reverts removal in v2.2] - Restored and undeprecated `--gladtex` option, removed in v2.2. Closes #4607. --- MANUAL.txt | 12 +++++++++++ man/pandoc.1 | 46 +++++++++++++++++++++++++++++++++++------ src/Text/Pandoc/App.hs | 6 ++++++ src/Text/Pandoc/Options.hs | 1 + src/Text/Pandoc/Writers/HTML.hs | 9 +++++++- 5 files changed, 67 insertions(+), 7 deletions(-) (limited to 'src/Text') diff --git a/MANUAL.txt b/MANUAL.txt index 42b658af9..7a9871d8f 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -1217,9 +1217,21 @@ of the following options. not specified, a link to the KaTeX CDN will be inserted. Note that this option does not imply `--katex`. +`--gladtex` + +: Enclose TeX math in `` tags in HTML output. The resulting HTML + can then be processed by [GladTeX] to produce images of the typeset + formulas and an HTML file with links to these images. + So, the procedure is: + + pandoc -s --gladtex input.md -o myfile.htex + gladtex -d myfile-images myfile.htex + # produces myfile.html and images in myfile-images + [MathML]: http://www.w3.org/Math/ [MathJax]: https://www.mathjax.org [KaTeX]: https://github.com/Khan/KaTeX +[GladTeX]: http://humenda.github.io/GladTeX/ Options for wrapper scripts --------------------------- diff --git a/man/pandoc.1 b/man/pandoc.1 index ed9fdb009..5c560f11b 100644 --- a/man/pandoc.1 +++ b/man/pandoc.1 @@ -647,8 +647,7 @@ a specified full or relative path (executable or non\-executable) .B \f[C]\-M\f[] \f[I]KEY\f[][\f[C]=\f[]\f[I]VAL\f[]], \f[C]\-\-metadata=\f[]\f[I]KEY\f[][\f[C]:\f[]\f[I]VAL\f[]] Set the metadata field \f[I]KEY\f[] to the value \f[I]VAL\f[]. A value specified on the command line overrides a value specified in the -document using [YAML metadata -blocks][Extension:\f[C]yaml_metadata_block\f[]]. +document using YAML metadata blocks. Values will be parsed as YAML boolean or string values. If no value is specified, the value will be treated as Boolean true. Like \f[C]\-\-variable\f[], \f[C]\-\-metadata\f[] causes template @@ -727,6 +726,8 @@ Produce output with an appropriate header and footer (e.g. a standalone HTML, LaTeX, TEI, or RTF file, not a fragment). This option is set automatically for \f[C]pdf\f[], \f[C]epub\f[], \f[C]epub3\f[], \f[C]fb2\f[], \f[C]docx\f[], and \f[C]odt\f[] output. +For \f[C]native\f[] output, this option causes metadata to be included; +otherwise, metadata is suppressed. .RS .RE .TP @@ -1413,6 +1414,22 @@ inserted. Note that this option does not imply \f[C]\-\-katex\f[]. .RS .RE +.TP +.B \f[C]\-\-gladtex\f[] +Enclose TeX math in \f[C]\f[] tags in HTML output. +The resulting HTML can then be processed by GladTeX to produce images of +the typeset formulas and an HTML file with links to these images. +So, the procedure is: +.RS +.IP +.nf +\f[C] +pandoc\ \-s\ \-\-gladtex\ input.md\ \-o\ myfile.htex +gladtex\ \-d\ myfile\-images\ myfile.htex +#\ produces\ myfile.html\ and\ images\ in\ myfile\-images +\f[] +.fi +.RE .SS Options for wrapper scripts .TP .B \f[C]\-\-dump\-args\f[] @@ -1489,9 +1506,8 @@ arbitrary information at any point in the file. They may be set at the command line using the \f[C]\-V/\-\-variable\f[] option. If a variable is not set, pandoc will look for the key in the -document\[aq]s metadata \[en] which can be set using either [YAML -metadata blocks][Extension:\f[C]yaml_metadata_block\f[]] or with the -\f[C]\-\-metadata\f[] option. +document\[aq]s metadata \[en] which can be set using either YAML +metadata blocks or with the \f[C]\-\-metadata\f[] option. .SS Variables set by pandoc .PP Some variables are set automatically by pandoc. @@ -2081,7 +2097,25 @@ $endif$ .fi .PP This will include \f[C]X\f[] in the template if \f[C]variable\f[] has a -non\-null value; otherwise it will include \f[C]Y\f[]. +truthy value; otherwise it will include \f[C]Y\f[]. +Here a truthy value is any of the following: +.IP \[bu] 2 +a string that is not entirely white space, +.IP \[bu] 2 +a non\-empty array where the first value is truthy, +.IP \[bu] 2 +any number (including zero), +.IP \[bu] 2 +any object, +.IP \[bu] 2 +the boolean \f[C]true\f[] (to specify the boolean \f[C]true\f[] value +using YAML metadata or the \f[C]\-\-metadata\f[] flag, use \f[C]y\f[], +\f[C]Y\f[], \f[C]yes\f[], \f[C]Yes\f[], \f[C]YES\f[], \f[C]true\f[], +\f[C]True\f[], \f[C]TRUE\f[], \f[C]on\f[], \f[C]On\f[], or \f[C]ON\f[]; +with the \f[C]\-\-variable\f[] flag, simply omit a value for the +variable, e.g. +\f[C]\-\-variable\ draft\f[]). +.PP \f[C]X\f[] and \f[C]Y\f[] are placeholders for any valid template text, and may include interpolated variables or other conditionals. The \f[C]$else$\f[] section may be omitted. diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 920462d48..a59fd9bbe 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -1403,6 +1403,12 @@ options = "URL") "" -- Use KaTeX for HTML Math + , Option "" ["gladtex"] + (NoArg + (\opt -> + return opt { optHTMLMathMethod = GladTeX })) + "" -- "Use gladtex for HTML math" + , Option "" ["abbreviations"] (ReqArg (\arg opt -> return opt { optAbbreviations = Just arg }) diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index 4797a3094..e5ca1764c 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -107,6 +107,7 @@ data EPUBVersion = EPUB2 | EPUB3 deriving (Eq, Show, Read, Data, Typeable, Gener data HTMLMathMethod = PlainMath | WebTeX String -- url of TeX->image script. + | GladTeX | MathML | MathJax String -- url of MathJax.js | KaTeX String -- url of KaTeX files diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 535071ae2..a09ad2fda 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -58,7 +58,7 @@ import qualified Data.Text.Lazy as TL import Network.HTTP (urlEncode) import Network.URI (URI (..), parseURIReference, unEscapeString) import Numeric (showHex) -import Text.Blaze.Internal (customLeaf, MarkupM(Empty)) +import Text.Blaze.Internal (customLeaf, customParent, MarkupM(Empty)) #if MIN_VERSION_blaze_markup(0,6,3) #else import Text.Blaze.Internal (preEscapedString, preEscapedText) @@ -1029,6 +1029,13 @@ inlineToHtml opts inline = do return $ case t of InlineMath -> m DisplayMath -> brtag >> m >> brtag + GladTeX -> + return $ + customParent (textTag "eq") ! + customAttribute "env" + (toValue $ if t == InlineMath + then ("math" :: Text) + else "displaymath") $ strToHtml str MathML -> do let conf = useShortEmptyTags (const False) defaultConfigPP -- cgit v1.2.3 From d30fbc2879f58be7951eeb1cf8f9acfc83f1ebe6 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Wed, 9 May 2018 23:03:09 +0200 Subject: Org reader: fix image filename recognition Use a function from the *filepath* library to check whether a string is a valid file name. The custom validity checker that was used before gave wrong results, e.g. for absolute file paths on Windows (kawabata/ox-pandoc#52). --- src/Text/Pandoc/Readers/Org/Shared.hs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'src/Text') diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs index 07dbeca2a..17fe34738 100644 --- a/src/Text/Pandoc/Readers/Org/Shared.hs +++ b/src/Text/Pandoc/Readers/Org/Shared.hs @@ -36,17 +36,18 @@ module Text.Pandoc.Readers.Org.Shared import Prelude import Data.Char (isAlphaNum) -import Data.List (isPrefixOf, isSuffixOf) +import Data.List (isPrefixOf) +import System.FilePath (isValid, takeExtension) -- | Check whether the given string looks like the path to of URL of an image. isImageFilename :: String -> Bool -isImageFilename filename = - any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions && - (any (\x -> (x ++ "://") `isPrefixOf` filename) protocols || - ':' `notElem` filename) +isImageFilename fp = hasImageExtension && (isValid fp || isKnownProtocolUri) where - imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ] + hasImageExtension = takeExtension fp `elem` imageExtensions + isKnownProtocolUri = any (\x -> (x ++ "://") `isPrefixOf` fp) protocols + + imageExtensions = [ ".jpeg", ".jpg", ".png", ".gif", ".svg" ] protocols = [ "file", "http", "https" ] -- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if -- cgit v1.2.3