aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2018-10-28 21:37:57 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2018-10-28 21:37:57 -0700
commit8d55dc10cd61ba6ddc9067eaff6246a41557632b (patch)
tree55c0cb1bb55d9e47388715369ae22e84f809fec3
parent7a30eae6935057a9395e5346a1635230389004f5 (diff)
downloadpandoc-8d55dc10cd61ba6ddc9067eaff6246a41557632b.tar.gz
Roff tokenizer: better handling of escapes.
-rw-r--r--src/Text/Pandoc/Readers/Roff.hs49
-rw-r--r--test/Tests/Readers/Man.hs4
2 files changed, 41 insertions, 12 deletions
diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs
index 0568f777d..72c97e22f 100644
--- a/src/Text/Pandoc/Readers/Roff.hs
+++ b/src/Text/Pandoc/Readers/Roff.hs
@@ -215,20 +215,24 @@ escapeNormal :: PandocMonad m => RoffLexer m [LinePart]
escapeNormal = do
c <- anyChar
case c of
+ 'A' -> quoteArg >>= checkDefined
'C' -> quoteArg >>= resolveGlyph '\''
'f' -> escFont
's' -> escFontSize
'*' -> escString
'"' -> mempty <$ skipMany (satisfy (/='\n')) -- line comment
'#' -> mempty <$ manyTill anyChar newline
- '%' -> return mempty
+ '%' -> return mempty -- optional hyphenation
+ ':' -> return mempty -- zero-width break
'{' -> return mempty
'}' -> return mempty
- '&' -> return mempty
- '\n' -> return mempty
- ':' -> return mempty
- '0' -> return mempty
- 'c' -> return mempty
+ '&' -> return mempty -- nonprintable zero-width
+ ')' -> return mempty -- nonprintable zero-width
+ '/' -> return mempty -- to fix spacing before roman
+ ',' -> return mempty -- to fix spacing after roman
+ '\n' -> return mempty -- line continuation
+ 'c' -> return mempty -- interrupt text processing
+ 'a' -> return mempty -- "non-interpreted leader character"
'-' -> return [RoffStr "-"]
'_' -> return [RoffStr "_"]
' ' -> return [RoffStr " "]
@@ -240,13 +244,31 @@ escapeNormal = do
return [RoffStr "\\"]
't' -> return [RoffStr "\t"]
'e' -> return [RoffStr "\\"]
+ 'E' -> do
+ mode <- roffMode <$> getState
+ case mode of
+ CopyMode -> return mempty
+ NormalMode -> return [RoffStr "\\"]
'`' -> return [RoffStr "`"]
- '^' -> return [RoffStr " "]
- '|' -> return [RoffStr " "]
+ '^' -> return [RoffStr "\x200A"] -- 1/12 em space
+ '|' -> return [RoffStr "\x2006"] --1/6 em space
'\'' -> return [RoffStr "`"]
'.' -> return [RoffStr "`"]
'~' -> return [RoffStr "\160"] -- nonbreaking space
- _ -> escUnknown ['\\',c]
+ '0' -> return [RoffStr "\x2007"] -- digit-width space
+ _ -> escIgnore c
+
+escIgnore :: PandocMonad m => Char -> RoffLexer m [LinePart]
+escIgnore c = do
+ pos <- getPosition
+ nextc <- lookAhead anyChar
+ arg <- case nextc of
+ '[' -> (\x -> "[" ++ x ++ "]") <$> escapeArg
+ '(' -> ('(':) <$> escapeArg
+ '\'' -> (\x -> "'" ++ x ++ "'") <$> quoteArg
+ _ -> count 1 anyChar
+ report $ SkippedContent ('\\':c:arg) pos
+ return mempty
escUnknown :: PandocMonad m => String -> RoffLexer m [LinePart]
escUnknown s = do
@@ -291,7 +313,7 @@ quoteArg = char '\'' *> manyTill (noneOf ['\n','\'']) (char '\'')
escFont :: PandocMonad m => RoffLexer m [LinePart]
escFont = do
font <- escapeArg <|> count 1 alphaNum
- font' <- if null font
+ font' <- if null font || font == "P"
then prevFont <$> getState
else return $ foldr processFontLetter defaultFontSpec font
modifyState $ \st -> st{ prevFont = currentFont st
@@ -581,6 +603,13 @@ lexArgs = do
char '"'
return [RoffStr "\""]
+checkDefined :: PandocMonad m => String -> RoffLexer m [LinePart]
+checkDefined name = do
+ macros <- customMacros <$> getState
+ case M.lookup name macros of
+ Just _ -> return [RoffStr "1"]
+ Nothing -> return [RoffStr "0"]
+
escString :: PandocMonad m => RoffLexer m [LinePart]
escString = try $ do
pos <- getPosition
diff --git a/test/Tests/Readers/Man.hs b/test/Tests/Readers/Man.hs
index d48919e54..c1bf10c80 100644
--- a/test/Tests/Readers/Man.hs
+++ b/test/Tests/Readers/Man.hs
@@ -63,12 +63,12 @@ tests = [
text " ok")
, "skip" =:
"a\\%\\{\\}\\\n\\:b\\0"
- =?> (para $ str "ab")
+ =?> (para $ str "ab\8199")
, "replace" =:
"\\-\\ \\\\\\[lq]\\[rq]\\[em]\\[en]\\*(lq\\*(rq"
=?> (para $ text "- \\“”—–“”")
, "replace2" =:
- "\\t\\e\\`\\^\\|\\'" =?> (para $ text "\\` `")
+ "\\t\\e\\`\\^\\|\\'" =?> (para $ text "\\`\8202\8198`")
, "comment with \\\"" =:
"Foo \\\" bar\n" =?> (para $ text "Foo")
, "comment with \\#" =: