aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYan Pas <yanp.bugz@gmail.com>2018-05-26 20:12:41 +0300
committerYan Pas <yanp.bugz@gmail.com>2018-05-26 20:12:41 +0300
commit1d7c71189a5760d6674b02c13d0db6dae8eff10d (patch)
treeddab02fa8a8f1934aa7c7fca7aa9c72eee55d473
parent1ce067fc2a4f70963c2c879eac74807bb4fa9e7c (diff)
downloadpandoc-1d7c71189a5760d6674b02c13d0db6dae8eff10d.tar.gz
escape chars in macros
-rw-r--r--src/Text/Pandoc/Readers/Man.hs115
1 files changed, 66 insertions, 49 deletions
diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs
index 7b752373f..8977c9df4 100644
--- a/src/Text/Pandoc/Readers/Man.hs
+++ b/src/Text/Pandoc/Readers/Man.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE FlexibleContexts #-}
{-
Copyright (C) 2018 Yan Pashkovsky <yanp.bugz@gmail.com>
@@ -65,6 +66,9 @@ data MacroKind = KTitle
| KTabEnd
deriving (Show, Eq)
+-- TODO header strings
+-- TODO remove MStr
+-- TODO filter skipped content
data ManToken = MStr String FontKind
| MLine [(String, FontKind)]
| MMaybeLink String
@@ -159,14 +163,59 @@ parseMan = do
isNull Null = True
isNull _ = False
-eofline :: PandocMonad m => ManLexer m ()
+eofline :: Stream s m Char => ParsecT s u m ()
eofline = (newline >> return ()) <|> eof
--- TODO escape characters in arguments
+spacetab :: Stream s m Char => ParsecT s u m Char
+spacetab = char ' ' <|> char '\t'
+
+-- TODO handle more cases
+escapeLexer :: PandocMonad m => ManLexer m EscapeThing
+escapeLexer = do
+ char '\\'
+ choice [escChar, escFont]
+ where
+
+ escChar :: PandocMonad m => ManLexer m EscapeThing
+ escChar =
+ let skipSeqs = ["%", "{", "}", "&"]
+ subsSeqs = [ ("-", '-'), (" ", ' '), ("\\", '\\'), ("[lq]", '“'), ("[rq]", '”')
+ , ("[em]", '—'), ("[en]", '–') ]
+ substitute :: PandocMonad m => (String,Char) -> ManLexer m EscapeThing
+ substitute (from,to) = try $ string from >> return (EChar to)
+ skip :: PandocMonad m => String -> ManLexer m EscapeThing
+ skip seq' = try $ string seq' >> return ENothing
+ in choice $ (substitute <$> subsSeqs) ++
+ (skip <$> skipSeqs) ++
+ [ char '(' >> anyChar >> return ENothing
+ , char '[' >> many alphaNum >> char ']' >> return ENothing
+ ]
+
+ escFont :: PandocMonad m => ManLexer m EscapeThing
+ escFont = do
+ char 'f'
+ font <- choice [ letterFont
+ , char '(' >> anyChar >> anyChar >> return Regular
+ , try (char '[' >> letterFont >>= \f -> char ']' >> return f)
+ , try $ string "[BI]" >> return ItalicBold
+ , char '[' >> many letter >> char ']' >> return Regular
+ ]
+ modifyState (\r -> r {fontKind = font})
+ return $ EFont font
+
+ where
+
+ letterFont :: PandocMonad m => ManLexer m FontKind
+ letterFont = choice [
+ char 'B' >> return Bold
+ , char 'I' >> return Italic
+ , char 'P' >> return Regular
+ ]
+
lexMacro :: PandocMonad m => ManLexer m ManToken
lexMacro = do
char '.' <|> char '\''
- many space
+ many spacetab
macroName <- many1 (letter <|> oneOf ['\\', '"'])
args <- lexArgs
let joinedArgs = unwords args
@@ -175,6 +224,7 @@ lexMacro = do
let tok = case macroName of
x | x `elem` ["\\\"", "\\#"] -> MComment joinedArgs
"TH" -> knownMacro KTitle
+ "IP" -> knownMacro KTab
"TP" -> knownMacro KTab
"RE" -> knownMacro KTabEnd
"nf" -> knownMacro KCodeBlStart
@@ -201,61 +251,28 @@ lexMacro = do
oneArg :: PandocMonad m => ManLexer m String
oneArg = do
- many1 $ char ' '
- try quotedArg <|> plainArg
+ many1 spacetab
+ quotedArg <|> plainArg
plainArg :: PandocMonad m => ManLexer m String
- plainArg = many1 $ noneOf " \t\n"
+ plainArg = fmap catMaybes . many1 $ escChar <|> (Just <$> noneOf " \t\n")
quotedArg :: PandocMonad m => ManLexer m String
quotedArg = do
char '"'
- val <- many1 quotedChar
+ val <- catMaybes <$> many quotedChar
char '"'
return val
- quotedChar :: PandocMonad m => ManLexer m Char
- quotedChar = noneOf "\"\n" <|> try (string "\"\"" >> return '"')
-
--- TODO handle more cases
-escapeLexer :: PandocMonad m => ManLexer m EscapeThing
-escapeLexer = do
- char '\\'
- choice [escChar, escFont]
- where
-
- escChar :: PandocMonad m => ManLexer m EscapeThing
- escChar =
- let skipChars = ['%', '{', '}', '&']
- subsChars = [ ("-", '-'), (" ", ' '), ("\\", '\\'), ("[lq]", '“'), ("[rq]", '”')
- , ("[em]", '—'), ("[en]", '–') ]
- substitute :: PandocMonad m => (String,Char) -> ManLexer m EscapeThing
- substitute (from,to) = try $ string from >> return (EChar to)
- others = [ oneOf skipChars >> return ENothing
- , char '[' >> many alphaNum >> char ']' >> return ENothing
- ]
- in choice $ (substitute <$> subsChars) ++ others
-
- escFont :: PandocMonad m => ManLexer m EscapeThing
- escFont = do
- char 'f'
- font <- choice [ letterFont
- , char '(' >> anyChar >> anyChar >> return Regular
- , try (char '[' >> letterFont >>= \f -> char ']' >> return f)
- , try $ string "[BI]" >> return ItalicBold
- , char '[' >> many letter >> char ']' >> return Regular
- ]
- modifyState (\r -> r {fontKind = font})
- return $ EFont font
-
- where
+ quotedChar :: PandocMonad m => ManLexer m (Maybe Char)
+ quotedChar = escChar <|> (Just <$> noneOf "\"\n") <|> (Just <$> try (string "\"\"" >> return '"'))
- letterFont :: PandocMonad m => ManLexer m FontKind
- letterFont = choice [
- char 'B' >> return Bold
- , char 'I' >> return Italic
- , char 'P' >> return Regular
- ]
+ escChar :: PandocMonad m => ManLexer m (Maybe Char)
+ escChar = do
+ ec <- escapeLexer
+ case ec of
+ (EChar c) -> return $ Just c
+ _ -> return Nothing
lexLine :: PandocMonad m => ManLexer m ManToken
lexLine = do
@@ -416,7 +433,7 @@ parsePara = do
linkParser :: Parsec String () [Inline]
linkParser = do
mpage <- many1 (alphaNum <|> char '_')
- space
+ spacetab
char '('
mansect <- digit
char ')'