aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-02-27 17:02:44 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2021-02-27 17:02:44 -0800
commit925815bb33b462e1a4c19a8e2c617d403dec0ce7 (patch)
tree870501cafb8473198796257db15ba903cd0f2320 /src
parent63123db22ffc242589bda4203d13a7ca8c6bd3a6 (diff)
downloadpandoc-925815bb33b462e1a4c19a8e2c617d403dec0ce7.tar.gz
Split off T.P.Readers.LaTeX.Accent.
To help reduce memory demands compiling the main LaTeX reader.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Readers/LaTeX.hs68
-rw-r--r--src/Text/Pandoc/Readers/LaTeX/Accent.hs78
2 files changed, 86 insertions, 60 deletions
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 7d8dfab0e..51c031f78 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -57,6 +57,7 @@ import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline,
import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..),
ArgSpec (..), Tok (..), TokType (..))
import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Readers.LaTeX.Accent (accentCommands)
import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47,
babelLangToBCP47)
import Text.Pandoc.Readers.LaTeX.SIunitx
@@ -64,7 +65,6 @@ import Text.Pandoc.Shared
import qualified Text.Pandoc.Translations as Translations
import Text.Pandoc.Walk
import qualified Text.Pandoc.Builder as B
-import qualified Data.Text.Normalize as Normalize
import Safe
-- for debugging:
@@ -247,9 +247,6 @@ doxspace =
startsWithLetter _ = False
-lit :: Text -> LP m Inlines
-lit = pure . str
-
removeDoubleQuotes :: Text -> Text
removeDoubleQuotes t =
Data.Maybe.fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\""
@@ -296,6 +293,9 @@ quoted' f starter ender = do
cs -> cs)
else lit startchs
+lit :: Text -> LP m Inlines
+lit = pure . str
+
enquote :: PandocMonad m => Bool -> Maybe Text -> LP m Inlines
enquote starred mblang = do
skipopts
@@ -631,7 +631,10 @@ inlineEnvironments = M.fromList [
]
inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines)
-inlineCommands = M.union inlineLanguageCommands $ M.fromList
+inlineCommands =
+ M.union inlineLanguageCommands $
+ M.union (accentCommands tok) $
+ M.fromList
[ ("emph", extractSpaces emph <$> tok)
, ("textit", extractSpaces emph <$> tok)
, ("textsl", extractSpaces emph <$> tok)
@@ -703,48 +706,6 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
, ("MakeTextLowercase", makeLowercase <$> tok)
, ("lowercase", makeLowercase <$> tok)
, ("/", pure mempty) -- italic correction
- , ("aa", lit "å")
- , ("AA", lit "Å")
- , ("ss", lit "ß")
- , ("o", lit "ø")
- , ("O", lit "Ø")
- , ("L", lit "Ł")
- , ("l", lit "ł")
- , ("ae", lit "æ")
- , ("AE", lit "Æ")
- , ("oe", lit "œ")
- , ("OE", lit "Œ")
- , ("pounds", lit "£")
- , ("euro", lit "€")
- , ("copyright", lit "©")
- , ("textasciicircum", lit "^")
- , ("textasciitilde", lit "~")
- , ("H", accent '\779' Nothing) -- hungarumlaut
- , ("`", accent '\768' (Just '`')) -- grave
- , ("'", accent '\769' (Just '\'')) -- acute
- , ("^", accent '\770' (Just '^')) -- circ
- , ("~", accent '\771' (Just '~')) -- tilde
- , ("\"", accent '\776' Nothing) -- umlaut
- , (".", accent '\775' Nothing) -- dot
- , ("=", accent '\772' Nothing) -- macron
- , ("|", accent '\781' Nothing) -- vertical line above
- , ("b", accent '\817' Nothing) -- macron below
- , ("c", accent '\807' Nothing) -- cedilla
- , ("G", accent '\783' Nothing) -- doublegrave
- , ("h", accent '\777' Nothing) -- hookabove
- , ("d", accent '\803' Nothing) -- dotbelow
- , ("f", accent '\785' Nothing) -- inverted breve
- , ("r", accent '\778' Nothing) -- ringabove
- , ("t", accent '\865' Nothing) -- double inverted breve
- , ("U", accent '\782' Nothing) -- double vertical line above
- , ("v", accent '\780' Nothing) -- hacek
- , ("u", accent '\774' Nothing) -- breve
- , ("k", accent '\808' Nothing) -- ogonek
- , ("textogonekcentered", accent '\808' Nothing) -- ogonek
- , ("i", lit "ı") -- dotless i
- , ("j", lit "ȷ") -- dotless j
- , ("newtie", accent '\785' Nothing) -- inverted breve
- , ("textcircled", accent '\8413' Nothing) -- combining circle
, ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState
guard $ not inTableCell
optional opt
@@ -960,19 +921,6 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList
, ("hyphen", pure (str "-"))
]
-accent :: PandocMonad m => Char -> Maybe Char -> LP m Inlines
-accent combiningAccent fallBack = try $ do
- ils <- tok
- case toList ils of
- (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $
- -- try to normalize to the combined character:
- Str (Normalize.normalize Normalize.NFC
- (T.pack [x, combiningAccent]) <> xs) : ys
- [Space] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
- [] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
- _ -> return ils
-
-
lettrine :: PandocMonad m => LP m Inlines
lettrine = do
optional opt
diff --git a/src/Text/Pandoc/Readers/LaTeX/Accent.hs b/src/Text/Pandoc/Readers/LaTeX/Accent.hs
new file mode 100644
index 000000000..f8c53491c
--- /dev/null
+++ b/src/Text/Pandoc/Readers/LaTeX/Accent.hs
@@ -0,0 +1,78 @@
+{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
+module Text.Pandoc.Readers.LaTeX.Accent
+ ( accentCommands )
+where
+
+import Text.Pandoc.Class
+import Text.Pandoc.Readers.LaTeX.Parsing
+import Text.Pandoc.Builder as B
+import qualified Data.Map as M
+import Data.Text (Text)
+import Data.Maybe (fromMaybe)
+import Text.Pandoc.Parsing
+import qualified Data.Text as T
+import qualified Data.Text.Normalize as Normalize
+
+accentCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines)
+accentCommands tok =
+ let accent = accentWith tok
+ lit = pure . str
+ in M.fromList
+ [ ("aa", lit "å")
+ , ("AA", lit "Å")
+ , ("ss", lit "ß")
+ , ("o", lit "ø")
+ , ("O", lit "Ø")
+ , ("L", lit "Ł")
+ , ("l", lit "ł")
+ , ("ae", lit "æ")
+ , ("AE", lit "Æ")
+ , ("oe", lit "œ")
+ , ("OE", lit "Œ")
+ , ("pounds", lit "£")
+ , ("euro", lit "€")
+ , ("copyright", lit "©")
+ , ("textasciicircum", lit "^")
+ , ("textasciitilde", lit "~")
+ , ("H", accent '\779' Nothing) -- hungarumlaut
+ , ("`", accent '\768' (Just '`')) -- grave
+ , ("'", accent '\769' (Just '\'')) -- acute
+ , ("^", accent '\770' (Just '^')) -- circ
+ , ("~", accent '\771' (Just '~')) -- tilde
+ , ("\"", accent '\776' Nothing) -- umlaut
+ , (".", accent '\775' Nothing) -- dot
+ , ("=", accent '\772' Nothing) -- macron
+ , ("|", accent '\781' Nothing) -- vertical line above
+ , ("b", accent '\817' Nothing) -- macron below
+ , ("c", accent '\807' Nothing) -- cedilla
+ , ("G", accent '\783' Nothing) -- doublegrave
+ , ("h", accent '\777' Nothing) -- hookabove
+ , ("d", accent '\803' Nothing) -- dotbelow
+ , ("f", accent '\785' Nothing) -- inverted breve
+ , ("r", accent '\778' Nothing) -- ringabove
+ , ("t", accent '\865' Nothing) -- double inverted breve
+ , ("U", accent '\782' Nothing) -- double vertical line above
+ , ("v", accent '\780' Nothing) -- hacek
+ , ("u", accent '\774' Nothing) -- breve
+ , ("k", accent '\808' Nothing) -- ogonek
+ , ("textogonekcentered", accent '\808' Nothing) -- ogonek
+ , ("i", lit "ı") -- dotless i
+ , ("j", lit "ȷ") -- dotless j
+ , ("newtie", accent '\785' Nothing) -- inverted breve
+ , ("textcircled", accent '\8413' Nothing) -- combining circle
+ ]
+
+accentWith :: PandocMonad m
+ => LP m Inlines -> Char -> Maybe Char -> LP m Inlines
+accentWith tok combiningAccent fallBack = try $ do
+ ils <- tok
+ case toList ils of
+ (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $
+ -- try to normalize to the combined character:
+ Str (Normalize.normalize Normalize.NFC
+ (T.pack [x, combiningAccent]) <> xs) : ys
+ [Space] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
+ [] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack
+ _ -> return ils
+