From a5eb02f6a7feb4b3dee913fd395c98736638c127 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Mon, 20 Oct 2014 19:54:21 +0200 Subject: Org reader: parse LaTeX-style MathML entities MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Org supports special symbols which can be included using LaTeX syntax, but are actually MathML entities. Examples for this are `\nbsp` (non-breaking space), `\Aacute` (the letter A with accent acute) or `\copy` (the copyright sign ©). This fixes #1657. --- src/Text/Pandoc/Readers/Org.hs | 9 ++++++++- tests/Tests/Readers/Org.hs | 12 ++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs index a6ebf65dc..1ddfeab87 100644 --- a/src/Text/Pandoc/Readers/Org.hs +++ b/src/Text/Pandoc/Readers/Org.hs @@ -42,6 +42,7 @@ import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) import Text.Pandoc.Shared (compactify', compactify'DL) import Text.TeXMath (readTeX, writePandoc, DisplayType(..)) +import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap import Control.Applicative ( Applicative, pure , (<$>), (<$), (<*>), (<*), (*>) ) @@ -1431,7 +1432,8 @@ simpleSubOrSuperString = try $ inlineLaTeX :: OrgParser (F Inlines) inlineLaTeX = try $ do cmd <- inlineLaTeXCommand - maybe mzero returnF $ parseAsMath cmd `mplus` parseAsInlineLaTeX cmd + maybe mzero returnF $ + parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` parseAsInlineLaTeX cmd where parseAsMath :: String -> Maybe Inlines parseAsMath cs = B.fromList <$> texMathToPandoc cs @@ -1439,6 +1441,11 @@ inlineLaTeX = try $ do parseAsInlineLaTeX :: String -> Maybe Inlines parseAsInlineLaTeX cs = maybeRight $ runParser inlineCommand state "" cs + parseAsMathMLSym :: String -> Maybe Inlines + parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs) + -- dropWhileEnd would be nice here, but it's not available before base 4.5 + where clean = reverse . dropWhile (`elem` "{}") . reverse . drop 1 + state :: ParserState state = def{ stateOptions = def{ readerParseRaw = True }} diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index ef2f2d1ae..392388ec0 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -276,6 +276,18 @@ tests = "\\notacommand{foo}" =?> para (rawInline "latex" "\\notacommand{foo}") + , "MathML symbol in LaTeX-style" =: + "There is a hackerspace in Lübeck, Germany, called nbsp (unicode symbol: '\\nbsp')." =?> + para ("There is a hackerspace in Lübeck, Germany, called nbsp (unicode symbol: ' ').") + + , "MathML symbol in LaTeX-style, including braces" =: + "\\Aacute{}stor" =?> + para "Ástor" + + , "MathML copy sign" =: + "\\copy" =?> + para "©" + , "LaTeX citation" =: "\\cite{Coffee}" =?> let citation = Citation -- cgit v1.2.3