aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2018-10-20 10:37:58 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2018-10-20 10:46:08 -0700
commit08179695fa9741b9d617f1b7729f2dd2476b4f27 (patch)
tree2de7f7d2ee12c2be14da68a8d15685e780f3fe02
parent02b33a7d63ed2c930deda358c226031a42b10249 (diff)
downloadpandoc-08179695fa9741b9d617f1b7729f2dd2476b4f27.tar.gz
Man reader: support '..' (end macro).
Also give feedback for unknown character codes, and return a replacement character U+FFFD.
-rw-r--r--src/Text/Pandoc/Readers/Man.hs18
1 files changed, 10 insertions, 8 deletions
diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs
index 21325eb4f..7c54944e3 100644
--- a/src/Text/Pandoc/Readers/Man.hs
+++ b/src/Text/Pandoc/Readers/Man.hs
@@ -22,7 +22,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Readers.Man
- Copyright : Copyright (C) 2018 Yan Pashkovsky
+ Copyright : Copyright (C) 2018 Yan Pashkovsky and John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : Yan Pashkovsky <yanp.bugz@gmail.com>
@@ -74,6 +74,7 @@ data ManToken = MStr RoffStr
| MEmptyLine
| MMacro MacroKind [RoffStr]
| MComment String
+ | MEndMacro
deriving Show
data RoffState = RoffState { fontKind :: Font
@@ -159,7 +160,7 @@ escapeLexer = do
cs <- count 2 anyChar
case M.lookup cs characterCodeMap of
Just c -> return [c]
- Nothing -> escUnknown ('(':cs)
+ Nothing -> escUnknown ('\\':'(':cs) "\xFFFD"
bracketedGlyph =
char '[' *>
@@ -177,7 +178,7 @@ escapeLexer = do
charCode = do
cs <- many1 (noneOf ['[',']',' ','\t','\n'])
case M.lookup cs characterCodeMap of
- Nothing -> mzero
+ Nothing -> escUnknown ("\\[" ++ cs ++ "]") '\xFFFD'
Just c -> return c
escStar = do
@@ -216,7 +217,7 @@ escapeLexer = do
'|' -> return " "
'\'' -> return "`"
'.' -> return "`"
- _ -> escUnknown [c]
+ _ -> escUnknown [c] "\xFFFD"
escFont :: PandocMonad m => ManLexer m String
escFont = do
@@ -245,11 +246,11 @@ escapeLexer = do
, (char 'P' <|> char 'R') >> return Regular
]
- escUnknown :: PandocMonad m => String -> ManLexer m String
- escUnknown s = do
+ escUnknown :: PandocMonad m => String -> a -> ManLexer m a
+ escUnknown s x = do
pos <- getPosition
report $ SkippedContent ("Unknown escape sequence " ++ s) pos
- return mempty
+ return x
currentFont :: PandocMonad m => ManLexer m Font
currentFont = fontKind <$> getState
@@ -267,12 +268,13 @@ lexMacro :: PandocMonad m => ManLexer m ManToken
lexMacro = do
char '.' <|> char '\''
many spacetab
- macroName <- many (letter <|> oneOf ['\\', '"', '&'])
+ macroName <- many (letter <|> oneOf ['\\', '"', '&', '.'])
args <- lexArgs
let joinedArgs = unwords $ fst <$> args
tok = case macroName of
"" -> MComment ""
+ "." -> MEndMacro
x | x `elem` ["\\\"", "\\#"] -> MComment joinedArgs
"B" -> MStr (joinedArgs, singleton Bold)
"BR" -> MMaybeLink joinedArgs