aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Roff.hs
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2018-11-02 17:23:11 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2018-11-02 17:23:11 -0700
commit9e369e90164fe832d4e09ba51fbdf4ba4d2f9ba1 (patch)
tree8d0e33fbffed2669fb0e639bfdd5fc8a0a6240f0 /src/Text/Pandoc/Readers/Roff.hs
parente01ca7731394b135672e94e31d38a493fc17da24 (diff)
downloadpandoc-9e369e90164fe832d4e09ba51fbdf4ba4d2f9ba1.tar.gz
Roff reader: improve lexing of conditionals.
Partially addreses #5039.
Diffstat (limited to 'src/Text/Pandoc/Readers/Roff.hs')
-rw-r--r--src/Text/Pandoc/Readers/Roff.hs77
1 files changed, 49 insertions, 28 deletions
diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs
index cd8ec94bd..4919c5bc0 100644
--- a/src/Text/Pandoc/Readers/Roff.hs
+++ b/src/Text/Pandoc/Readers/Roff.hs
@@ -48,7 +48,7 @@ where
import Prelude
import Safe (lastDef)
-import Control.Monad (void, mzero, guard, when, mplus)
+import Control.Monad (void, mzero, mplus)
import Control.Monad.Except (throwError)
import Text.Pandoc.Class
(getResourcePath, readFileFromDirs, PandocMonad(..), report)
@@ -56,7 +56,7 @@ import Data.Char (isLower, toLower, toUpper, chr,
isAscii, isAlphaNum, isSpace)
import Data.Default (Default)
import qualified Data.Map as M
-import Data.List (intercalate, isSuffixOf)
+import Data.List (intercalate)
import qualified Data.Text as T
import Text.Pandoc.Logging (LogMessage(..))
import Text.Pandoc.Options
@@ -117,11 +117,12 @@ data RoffMode = NormalMode
| CopyMode
deriving Show
-data RoffState = RoffState { customMacros :: M.Map String RoffTokens
- , prevFont :: FontSpec
- , currentFont :: FontSpec
- , tableTabChar :: Char
- , roffMode :: RoffMode
+data RoffState = RoffState { customMacros :: M.Map String RoffTokens
+ , prevFont :: FontSpec
+ , currentFont :: FontSpec
+ , tableTabChar :: Char
+ , roffMode :: RoffMode
+ , lastExpression :: Maybe Bool
} deriving Show
instance Default RoffState where
@@ -137,6 +138,7 @@ instance Default RoffState where
, currentFont = defaultFontSpec
, tableTabChar = '\t'
, roffMode = NormalMode
+ , lastExpression = Nothing
}
type RoffLexer m = ParserT [Char] RoffState m
@@ -349,15 +351,16 @@ lexComment = do
lexMacro :: PandocMonad m => RoffLexer m RoffTokens
lexMacro = do
pos <- getPosition
- guard $ sourceColumn pos == 1
+ -- we don't want this because of '.ie .B foo':
+ -- guard $ sourceColumn pos == 1
char '.' <|> char '\''
skipMany spacetab
macroName <- many (satisfy (not . isSpace))
case macroName of
"nop" -> return mempty
- "ie" -> lexConditional
- "if" -> lexConditional
- "el" -> skipConditional
+ "ie" -> lexConditional "ie"
+ "if" -> lexConditional "if"
+ "el" -> lexConditional "el"
"TS" -> lexTable pos
_ -> do
@@ -484,18 +487,43 @@ tableColFormat = do
-- We don't fully handle the conditional. But we do
-- include everything under '.ie n', which occurs commonly
--- in man pages. We always skip the '.el' part.
-lexConditional :: PandocMonad m => RoffLexer m RoffTokens
-lexConditional = do
+-- in man pages.
+lexConditional :: PandocMonad m => String -> RoffLexer m RoffTokens
+lexConditional mname = do
+ pos <- getPosition
+ skipMany spacetab
+ mbtest <- if mname == "el"
+ then fmap not . lastExpression <$> getState
+ else expression
skipMany spacetab
- lexNCond <|> skipConditional
+ st <- getState -- save state, so we can reset it
+ ifPart <- lexGroup
+ <|> (char '\\' >> newline >> manToken)
+ <|> manToken
+ case mbtest of
+ Nothing -> do
+ putState st -- reset state, so we don't record macros in skipped section
+ report $ SkippedContent ('.':mname) pos
+ return mempty
+ Just True -> return ifPart
+ Just False -> do
+ putState st
+ return mempty
--- n means nroff mode
-lexNCond :: PandocMonad m => RoffLexer m RoffTokens
-lexNCond = do
- newline
- many1 spacetab
- lexGroup <|> manToken
+expression :: PandocMonad m => RoffLexer m (Maybe Bool)
+expression = do
+ raw <- charsInBalanced '(' ')' (satisfy (/= '\n'))
+ <|> many1 nonspaceChar
+ returnValue $
+ case raw of
+ "1" -> Just True
+ "n" -> Just True -- nroff mode
+ "t" -> Just False -- troff mode
+ _ -> Nothing
+ where
+ returnValue v = do
+ modifyState $ \st -> st{ lastExpression = v }
+ return v
lexGroup :: PandocMonad m => RoffLexer m RoffTokens
lexGroup = do
@@ -505,13 +533,6 @@ lexGroup = do
groupstart = try $ string "\\{\\" >> newline
groupend = try $ string "\\}" >> eofline
-skipConditional :: PandocMonad m => RoffLexer m RoffTokens
-skipConditional = do
- rest <- anyLine
- when ("\\{\\" `isSuffixOf` rest) $
- void $ manyTill anyChar (try (string "\\}"))
- return mempty
-
lexIncludeFile :: PandocMonad m => [Arg] -> RoffLexer m RoffTokens
lexIncludeFile args = do
pos <- getPosition