diff options
author | John MacFarlane <jgm@berkeley.edu> | 2018-08-12 16:45:44 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2018-08-12 16:46:48 -0700 |
commit | 6d14f53bd96f123acb5d8030cf9402ddb2e41f01 (patch) | |
tree | 1535641fcf990d33526372dd03f86dfce27ba071 /src/Text/Pandoc/Readers | |
parent | 81131ef5d19052948b4ac12a727e2ceef8a98186 (diff) | |
download | pandoc-6d14f53bd96f123acb5d8030cf9402ddb2e41f01.tar.gz |
LaTeX reader: Allow `%` characters in URLs.
This affects `\href` and `\url`. Closes #4832.
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/LaTeX.hs | 44 |
1 files changed, 31 insertions, 13 deletions
diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 1dd31d402..e9869290f 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -615,21 +615,28 @@ grouped parser = try $ do -- {{a,b}} should be parsed the same as {a,b} try (grouped parser <* egroup) <|> (mconcat <$> manyTill parser egroup) -braced :: PandocMonad m => LP m [Tok] -braced = bgroup *> braced' 1 - where braced' (n :: Int) = - handleEgroup n <|> handleBgroup n <|> handleOther n - handleEgroup n = do +braced' :: PandocMonad m => LP m Tok -> Int -> LP m [Tok] +braced' getTok n = + handleEgroup <|> handleBgroup <|> handleOther + where handleEgroup = do t <- egroup if n == 1 then return [] - else (t:) <$> braced' (n - 1) - handleBgroup n = do + else (t:) <$> braced' getTok (n - 1) + handleBgroup = do t <- bgroup - (t:) <$> braced' (n + 1) - handleOther n = do - t <- anyTok - (t:) <$> braced' n + (t:) <$> braced' getTok (n + 1) + handleOther = do + t <- getTok + (t:) <$> braced' getTok n + +braced :: PandocMonad m => LP m [Tok] +braced = bgroup *> braced' anyTok 1 + +-- URLs require special handling, because they can contain % +-- characters. So we retonenize comments as we go... +bracedUrl :: PandocMonad m => LP m [Tok] +bracedUrl = bgroup *> braced' (retokenizeComment >> anyTok) 1 bracketed :: PandocMonad m => Monoid a => LP m a -> LP m a bracketed parser = try $ do @@ -1290,6 +1297,17 @@ unescapeURL ('\\':x:xs) | isEscapable x = x:unescapeURL xs unescapeURL (x:xs) = x:unescapeURL xs unescapeURL [] = "" +-- For handling URLs, which allow literal % characters... +retokenizeComment :: PandocMonad m => LP m () +retokenizeComment = (do + Tok pos Comment txt <- satisfyTok isCommentTok + let updPos (Tok pos' toktype' txt') = + Tok (incSourceColumn (incSourceLine pos' (sourceLine pos - 1)) + (sourceColumn pos)) toktype' txt' + let newtoks = map updPos $ tokenize (sourceName pos) $ T.tail txt + getInput >>= setInput . ((Tok pos Symbol "%" : newtoks) ++)) + <|> return () + mathEnvWith :: PandocMonad m => (Inlines -> a) -> Maybe Text -> Text -> LP m a mathEnvWith f innerEnv name = f . mathDisplay . inner <$> mathEnv name @@ -1445,10 +1463,10 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList , ("verb", doverb) , ("lstinline", dolstinline) , ("Verb", doverb) - , ("url", ((unescapeURL . T.unpack . untokenize) <$> braced) >>= \url -> + , ("url", ((unescapeURL . T.unpack . untokenize) <$> bracedUrl) >>= \url -> pure (link url "" (str url))) , ("href", (unescapeURL . toksToString <$> - braced <* optional sp) >>= \url -> + bracedUrl <* optional sp) >>= \url -> tok >>= \lab -> pure (link url "" lab)) , ("includegraphics", do options <- option [] keyvals src <- unescapeURL . T.unpack . |