diff options
author | John MacFarlane <jgm@berkeley.edu> | 2018-10-20 10:37:58 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2018-10-20 10:46:08 -0700 |
commit | 08179695fa9741b9d617f1b7729f2dd2476b4f27 (patch) | |
tree | 2de7f7d2ee12c2be14da68a8d15685e780f3fe02 | |
parent | 02b33a7d63ed2c930deda358c226031a42b10249 (diff) | |
download | pandoc-08179695fa9741b9d617f1b7729f2dd2476b4f27.tar.gz |
Man reader: support '..' (end macro).
Also give feedback for unknown character codes,
and return a replacement character U+FFFD.
-rw-r--r-- | src/Text/Pandoc/Readers/Man.hs | 18 |
1 files changed, 10 insertions, 8 deletions
diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs index 21325eb4f..7c54944e3 100644 --- a/src/Text/Pandoc/Readers/Man.hs +++ b/src/Text/Pandoc/Readers/Man.hs @@ -22,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA {- | Module : Text.Pandoc.Readers.Man - Copyright : Copyright (C) 2018 Yan Pashkovsky + Copyright : Copyright (C) 2018 Yan Pashkovsky and John MacFarlane License : GNU GPL, version 2 or above Maintainer : Yan Pashkovsky <yanp.bugz@gmail.com> @@ -74,6 +74,7 @@ data ManToken = MStr RoffStr | MEmptyLine | MMacro MacroKind [RoffStr] | MComment String + | MEndMacro deriving Show data RoffState = RoffState { fontKind :: Font @@ -159,7 +160,7 @@ escapeLexer = do cs <- count 2 anyChar case M.lookup cs characterCodeMap of Just c -> return [c] - Nothing -> escUnknown ('(':cs) + Nothing -> escUnknown ('\\':'(':cs) "\xFFFD" bracketedGlyph = char '[' *> @@ -177,7 +178,7 @@ escapeLexer = do charCode = do cs <- many1 (noneOf ['[',']',' ','\t','\n']) case M.lookup cs characterCodeMap of - Nothing -> mzero + Nothing -> escUnknown ("\\[" ++ cs ++ "]") '\xFFFD' Just c -> return c escStar = do @@ -216,7 +217,7 @@ escapeLexer = do '|' -> return " " '\'' -> return "`" '.' -> return "`" - _ -> escUnknown [c] + _ -> escUnknown [c] "\xFFFD" escFont :: PandocMonad m => ManLexer m String escFont = do @@ -245,11 +246,11 @@ escapeLexer = do , (char 'P' <|> char 'R') >> return Regular ] - escUnknown :: PandocMonad m => String -> ManLexer m String - escUnknown s = do + escUnknown :: PandocMonad m => String -> a -> ManLexer m a + escUnknown s x = do pos <- getPosition report $ SkippedContent ("Unknown escape sequence " ++ s) pos - return mempty + return x currentFont :: PandocMonad m => ManLexer m Font currentFont = fontKind <$> getState @@ -267,12 +268,13 @@ lexMacro :: PandocMonad m => ManLexer m ManToken lexMacro = do char '.' <|> char '\'' many spacetab - macroName <- many (letter <|> oneOf ['\\', '"', '&']) + macroName <- many (letter <|> oneOf ['\\', '"', '&', '.']) args <- lexArgs let joinedArgs = unwords $ fst <$> args tok = case macroName of "" -> MComment "" + "." -> MEndMacro x | x `elem` ["\\\"", "\\#"] -> MComment joinedArgs "B" -> MStr (joinedArgs, singleton Bold) "BR" -> MMaybeLink joinedArgs |