diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/Roff.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/Roff.hs | 78 |
1 files changed, 27 insertions, 51 deletions
diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs index 632578da7..4919c5bc0 100644 --- a/src/Text/Pandoc/Readers/Roff.hs +++ b/src/Text/Pandoc/Readers/Roff.hs @@ -1,7 +1,4 @@ {-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE MultiParamTypeClasses #-} -{-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {- Copyright (C) 2018 Yan Pashkovsky <yanp.bugz@gmail.com> @@ -51,9 +48,8 @@ where import Prelude import Safe (lastDef) -import Control.Monad (void, mzero, mplus, guard) +import Control.Monad (void, mzero, mplus) import Control.Monad.Except (throwError) -import Control.Monad.State (StateT(..), evalStateT, get, modify, put) import Text.Pandoc.Class (getResourcePath, readFileFromDirs, PandocMonad(..), report) import Data.Char (isLower, toLower, toUpper, chr, @@ -121,7 +117,8 @@ data RoffMode = NormalMode | CopyMode deriving Show -data RoffState = RoffState { prevFont :: FontSpec +data RoffState = RoffState { customMacros :: M.Map String RoffTokens + , prevFont :: FontSpec , currentFont :: FontSpec , tableTabChar :: Char , roffMode :: RoffMode @@ -129,17 +126,7 @@ data RoffState = RoffState { prevFont :: FontSpec } deriving Show instance Default RoffState where - def = RoffState { prevFont = defaultFontSpec - , currentFont = defaultFontSpec - , tableTabChar = '\t' - , roffMode = NormalMode - , lastExpression = Nothing - } - -type MacroState = M.Map String RoffTokens - -initialMacroState :: MacroState -initialMacroState = M.fromList + def = RoffState { customMacros = M.fromList $ map (\(n, s) -> (n, singleTok (TextLine [RoffStr s]))) @@ -147,19 +134,14 @@ initialMacroState = M.fromList , ("lq", "\x201C") , ("rq", "\x201D") , ("R", "\x00AE") ] + , prevFont = defaultFontSpec + , currentFont = defaultFontSpec + , tableTabChar = '\t' + , roffMode = NormalMode + , lastExpression = Nothing + } -newtype RoffStream = RoffStream{ unRoffStream :: [Char] } - deriving (Show) - -deriving instance Semigroup RoffStream -deriving instance Monoid RoffStream - -instance Monad m => Stream RoffStream (StateT MacroState m) Char - where - uncons (RoffStream []) = return Nothing - uncons (RoffStream (c:cs)) = return (Just (c, RoffStream cs)) - -type RoffLexer m = ParserT RoffStream RoffState (StateT MacroState m) +type RoffLexer m = ParserT [Char] RoffState m -- -- Lexer: String -> RoffToken @@ -230,14 +212,14 @@ readUnicodeChar _ = Nothing escapeNormal :: PandocMonad m => RoffLexer m [LinePart] escapeNormal = do c <- anyChar - (case c of + case c of ' ' -> return [RoffStr " "] '"' -> mempty <$ skipMany (satisfy (/='\n')) -- line comment '#' -> mempty <$ manyTill anyChar newline '%' -> return mempty -- optional hyphenation '&' -> return mempty -- nonprintable zero-width ')' -> return mempty -- nonprintable zero-width - '*' -> escString <|> escIgnore '*' [] + '*' -> escString ',' -> return mempty -- to fix spacing after roman '-' -> return [RoffStr "-"] '.' -> return [RoffStr "`"] @@ -301,7 +283,7 @@ escapeNormal = do CopyMode -> char '\\' NormalMode -> return '\\' return [RoffStr "\\"] - _ -> return [RoffStr [c]]) <|> escIgnore c [] + _ -> return [RoffStr [c]] -- man 7 groff: "If a backslash is followed by a character that -- does not constitute a defined escape sequence, the backslash -- is silently ignored and the character maps to itself." @@ -312,8 +294,7 @@ escIgnore :: PandocMonad m -> RoffLexer m [LinePart] escIgnore c argparsers = do pos <- getPosition - pos' <- (optional (choice argparsers) >> getPosition) - arg <- manyTill anyChar (getPosition >>= guard . (== pos')) + arg <- snd <$> withRaw (choice argparsers) <|> return "" report $ SkippedContent ('\\':c:arg) pos return mempty @@ -515,20 +496,17 @@ lexConditional mname = do then fmap not . lastExpression <$> getState else expression skipMany spacetab - macros <- get -- save macro state, so we can reset it - st <- getState + st <- getState -- save state, so we can reset it ifPart <- lexGroup <|> (char '\\' >> newline >> manToken) <|> manToken case mbtest of Nothing -> do - put macros -- reset state, so we don't record macros in skipped section - putState st + putState st -- reset state, so we don't record macros in skipped section report $ SkippedContent ('.':mname) pos return mempty Just True -> return ifPart Just False -> do - put macros putState st return mempty @@ -565,14 +543,14 @@ lexIncludeFile args = do result <- readFileFromDirs dirs fp case result of Nothing -> report $ CouldNotLoadIncludeFile fp pos - Just s -> getInput >>= setInput . (RoffStream s <>) -- TODO sourcepos! + Just s -> getInput >>= setInput . (s ++) return mempty [] -> return mempty resolveMacro :: PandocMonad m => String -> [Arg] -> SourcePos -> RoffLexer m RoffTokens resolveMacro macroName args pos = do - macros <- get + macros <- customMacros <$> getState case M.lookup macroName macros of Nothing -> return $ singleTok $ ControlLine macroName args pos Just ts -> do @@ -593,7 +571,8 @@ lexStringDef args = do -- string definition (x:ys) -> do let ts = singleTok $ TextLine (intercalate [RoffStr " " ] ys) let stringName = linePartsToString x - modify (M.insert stringName ts) + modifyState $ \st -> + st{ customMacros = M.insert stringName ts (customMacros st) } return mempty lexMacroDef :: PandocMonad m => [Arg] -> RoffLexer m RoffTokens @@ -612,8 +591,9 @@ lexMacroDef args = do -- macro definition _ <- lexArgs return () ts <- mconcat <$> manyTill manToken stop - modify (M.insert macroName ts) - modifyState $ \st -> st{ roffMode = NormalMode } + modifyState $ \st -> + st{ customMacros = M.insert macroName ts (customMacros st) + , roffMode = NormalMode } return mempty lexArgs :: PandocMonad m => RoffLexer m [Arg] @@ -655,7 +635,7 @@ lexArgs = do checkDefined :: PandocMonad m => String -> RoffLexer m [LinePart] checkDefined name = do - macros <- get + macros <- customMacros <$> getState case M.lookup name macros of Just _ -> return [RoffStr "1"] Nothing -> return [RoffStr "0"] @@ -749,12 +729,8 @@ linePartsToString = mconcat . map go -- | Tokenize a string as a sequence of roff tokens. lexRoff :: PandocMonad m => SourcePos -> T.Text -> m RoffTokens lexRoff pos txt = do - eithertokens <- evalStateT - (readWithM - (do setPosition pos - mconcat <$> many manToken) def - (RoffStream (T.unpack txt))) - initialMacroState + eithertokens <- readWithM (do setPosition pos + mconcat <$> many manToken) def (T.unpack txt) case eithertokens of Left e -> throwError e Right tokenz -> return tokenz |