diff options
author | John MacFarlane <jgm@berkeley.edu> | 2021-02-27 17:02:44 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2021-02-27 17:02:44 -0800 |
commit | 925815bb33b462e1a4c19a8e2c617d403dec0ce7 (patch) | |
tree | 870501cafb8473198796257db15ba903cd0f2320 /src/Text | |
parent | 63123db22ffc242589bda4203d13a7ca8c6bd3a6 (diff) | |
download | pandoc-925815bb33b462e1a4c19a8e2c617d403dec0ce7.tar.gz |
Split off T.P.Readers.LaTeX.Accent.
To help reduce memory demands compiling the main LaTeX reader.
Diffstat (limited to 'src/Text')
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 68 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX/Accent.hs | 78 |
2 files changed, 86 insertions, 60 deletions
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 7d8dfab0e..51c031f78 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -57,6 +57,7 @@ import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline, import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..), ArgSpec (..), Tok (..), TokType (..)) import Text.Pandoc.Readers.LaTeX.Parsing +import Text.Pandoc.Readers.LaTeX.Accent (accentCommands) import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47, babelLangToBCP47) import Text.Pandoc.Readers.LaTeX.SIunitx @@ -64,7 +65,6 @@ import Text.Pandoc.Shared import qualified Text.Pandoc.Translations as Translations import Text.Pandoc.Walk import qualified Text.Pandoc.Builder as B -import qualified Data.Text.Normalize as Normalize import Safe -- for debugging: @@ -247,9 +247,6 @@ doxspace = startsWithLetter _ = False -lit :: Text -> LP m Inlines -lit = pure . str - removeDoubleQuotes :: Text -> Text removeDoubleQuotes t = Data.Maybe.fromMaybe t $ T.stripPrefix "\"" t >>= T.stripSuffix "\"" @@ -296,6 +293,9 @@ quoted' f starter ender = do cs -> cs) else lit startchs +lit :: Text -> LP m Inlines +lit = pure . str + enquote :: PandocMonad m => Bool -> Maybe Text -> LP m Inlines enquote starred mblang = do skipopts @@ -631,7 +631,10 @@ inlineEnvironments = M.fromList [ ] inlineCommands :: PandocMonad m => M.Map Text (LP m Inlines) -inlineCommands = M.union inlineLanguageCommands $ M.fromList +inlineCommands = + M.union inlineLanguageCommands $ + M.union (accentCommands tok) $ + M.fromList [ ("emph", extractSpaces emph <$> tok) , ("textit", extractSpaces emph <$> tok) , ("textsl", extractSpaces emph <$> tok) @@ -703,48 +706,6 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList , ("MakeTextLowercase", makeLowercase <$> tok) , ("lowercase", makeLowercase <$> tok) , ("/", pure mempty) -- italic correction - , ("aa", lit "å") - , ("AA", lit "Å") - , ("ss", lit "ß") - , ("o", lit "ø") - , ("O", lit "Ø") - , ("L", lit "Ł") - , ("l", lit "ł") - , ("ae", lit "æ") - , ("AE", lit "Æ") - , ("oe", lit "œ") - , ("OE", lit "Œ") - , ("pounds", lit "£") - , ("euro", lit "€") - , ("copyright", lit "©") - , ("textasciicircum", lit "^") - , ("textasciitilde", lit "~") - , ("H", accent '\779' Nothing) -- hungarumlaut - , ("`", accent '\768' (Just '`')) -- grave - , ("'", accent '\769' (Just '\'')) -- acute - , ("^", accent '\770' (Just '^')) -- circ - , ("~", accent '\771' (Just '~')) -- tilde - , ("\"", accent '\776' Nothing) -- umlaut - , (".", accent '\775' Nothing) -- dot - , ("=", accent '\772' Nothing) -- macron - , ("|", accent '\781' Nothing) -- vertical line above - , ("b", accent '\817' Nothing) -- macron below - , ("c", accent '\807' Nothing) -- cedilla - , ("G", accent '\783' Nothing) -- doublegrave - , ("h", accent '\777' Nothing) -- hookabove - , ("d", accent '\803' Nothing) -- dotbelow - , ("f", accent '\785' Nothing) -- inverted breve - , ("r", accent '\778' Nothing) -- ringabove - , ("t", accent '\865' Nothing) -- double inverted breve - , ("U", accent '\782' Nothing) -- double vertical line above - , ("v", accent '\780' Nothing) -- hacek - , ("u", accent '\774' Nothing) -- breve - , ("k", accent '\808' Nothing) -- ogonek - , ("textogonekcentered", accent '\808' Nothing) -- ogonek - , ("i", lit "ı") -- dotless i - , ("j", lit "ȷ") -- dotless j - , ("newtie", accent '\785' Nothing) -- inverted breve - , ("textcircled", accent '\8413' Nothing) -- combining circle , ("\\", linebreak <$ (do inTableCell <- sInTableCell <$> getState guard $ not inTableCell optional opt @@ -960,19 +921,6 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList , ("hyphen", pure (str "-")) ] -accent :: PandocMonad m => Char -> Maybe Char -> LP m Inlines -accent combiningAccent fallBack = try $ do - ils <- tok - case toList ils of - (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $ - -- try to normalize to the combined character: - Str (Normalize.normalize Normalize.NFC - (T.pack [x, combiningAccent]) <> xs) : ys - [Space] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack - [] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack - _ -> return ils - - lettrine :: PandocMonad m => LP m Inlines lettrine = do optional opt diff --git a/src/Text/Pandoc/Readers/LaTeX/Accent.hs b/src/Text/Pandoc/Readers/LaTeX/Accent.hs new file mode 100644 index 000000000..f8c53491c --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX/Accent.hs @@ -0,0 +1,78 @@ +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} +module Text.Pandoc.Readers.LaTeX.Accent + ( accentCommands ) +where + +import Text.Pandoc.Class +import Text.Pandoc.Readers.LaTeX.Parsing +import Text.Pandoc.Builder as B +import qualified Data.Map as M +import Data.Text (Text) +import Data.Maybe (fromMaybe) +import Text.Pandoc.Parsing +import qualified Data.Text as T +import qualified Data.Text.Normalize as Normalize + +accentCommands :: PandocMonad m => LP m Inlines -> M.Map Text (LP m Inlines) +accentCommands tok = + let accent = accentWith tok + lit = pure . str + in M.fromList + [ ("aa", lit "å") + , ("AA", lit "Å") + , ("ss", lit "ß") + , ("o", lit "ø") + , ("O", lit "Ø") + , ("L", lit "Ł") + , ("l", lit "ł") + , ("ae", lit "æ") + , ("AE", lit "Æ") + , ("oe", lit "œ") + , ("OE", lit "Œ") + , ("pounds", lit "£") + , ("euro", lit "€") + , ("copyright", lit "©") + , ("textasciicircum", lit "^") + , ("textasciitilde", lit "~") + , ("H", accent '\779' Nothing) -- hungarumlaut + , ("`", accent '\768' (Just '`')) -- grave + , ("'", accent '\769' (Just '\'')) -- acute + , ("^", accent '\770' (Just '^')) -- circ + , ("~", accent '\771' (Just '~')) -- tilde + , ("\"", accent '\776' Nothing) -- umlaut + , (".", accent '\775' Nothing) -- dot + , ("=", accent '\772' Nothing) -- macron + , ("|", accent '\781' Nothing) -- vertical line above + , ("b", accent '\817' Nothing) -- macron below + , ("c", accent '\807' Nothing) -- cedilla + , ("G", accent '\783' Nothing) -- doublegrave + , ("h", accent '\777' Nothing) -- hookabove + , ("d", accent '\803' Nothing) -- dotbelow + , ("f", accent '\785' Nothing) -- inverted breve + , ("r", accent '\778' Nothing) -- ringabove + , ("t", accent '\865' Nothing) -- double inverted breve + , ("U", accent '\782' Nothing) -- double vertical line above + , ("v", accent '\780' Nothing) -- hacek + , ("u", accent '\774' Nothing) -- breve + , ("k", accent '\808' Nothing) -- ogonek + , ("textogonekcentered", accent '\808' Nothing) -- ogonek + , ("i", lit "ı") -- dotless i + , ("j", lit "ȷ") -- dotless j + , ("newtie", accent '\785' Nothing) -- inverted breve + , ("textcircled", accent '\8413' Nothing) -- combining circle + ] + +accentWith :: PandocMonad m + => LP m Inlines -> Char -> Maybe Char -> LP m Inlines +accentWith tok combiningAccent fallBack = try $ do + ils <- tok + case toList ils of + (Str (T.uncons -> Just (x, xs)) : ys) -> return $ fromList $ + -- try to normalize to the combined character: + Str (Normalize.normalize Normalize.NFC + (T.pack [x, combiningAccent]) <> xs) : ys + [Space] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack + [] -> return $ str $ T.singleton $ fromMaybe combiningAccent fallBack + _ -> return ils + |