From cd93faddbf7def1df5a61b71481a3f399e8cece9 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 27 Oct 2018 12:29:54 -0700 Subject: Revert "Roff reader: use LineParts abstraction." This reverts commit 42ba3c0a0b15fddd51e6a4b79882ddaeccf0eb3d. --- src/Text/Pandoc/Readers/Man.hs | 16 +++--- src/Text/Pandoc/Readers/Roff.hs | 122 +++++++++++++++++++--------------------- 2 files changed, 64 insertions(+), 74 deletions(-) (limited to 'src/Text/Pandoc/Readers') diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs index 2c5d10b93..3644050c7 100644 --- a/src/Text/Pandoc/Readers/Man.hs +++ b/src/Text/Pandoc/Readers/Man.hs @@ -53,7 +53,6 @@ import Text.Pandoc.Readers.Roff -- TODO explicit imports import Text.Parsec hiding (tokenPrim) import qualified Text.Parsec as Parsec import Text.Parsec.Pos (updatePosString, initialPos) -import qualified Data.Sequence as Seq import qualified Data.Foldable as Foldable data ManState = ManState { readerOptions :: ReaderOptions @@ -150,9 +149,8 @@ parseTable = do isHrule ([cellfmt], _) = columnType cellfmt `elem` ['_','-','='] isHrule (_, [RoffTokens ss]) = case Foldable.toList ss of - [MLine (LineParts (RoffStr [c] Seq.:<| Seq.Empty))] - -> c `elem` ['_','-','='] - _ -> False + [MLine [RoffStr [c]]] -> c `elem` ['_','-','='] + _ -> False isHrule _ = False fallback pos = do @@ -231,8 +229,8 @@ parseTitle = do modifyState $ \st -> st{ metadata = adjustMeta $ metadata st } return mempty -linePartsToInlines :: LineParts -> Inlines -linePartsToInlines = go False . Foldable.toList . unLineParts +linePartsToInlines :: [LinePart] -> Inlines +linePartsToInlines = go False where go :: Bool -> [LinePart] -> Inlines @@ -368,10 +366,10 @@ parseCodeBlock = try $ do where extractText :: RoffToken -> Maybe String - extractText (MLine (LineParts ss)) - | not (Seq.null ss) + extractText (MLine ss) + | not (null ss) , all isFontToken ss = Nothing - | otherwise = Just $ linePartsToString (LineParts ss) + | otherwise = Just $ linePartsToString ss where isFontToken (FontSize{}) = True isFontToken (Font{}) = True isFontToken _ = False diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs index cfc9ae980..1f8ec1c33 100644 --- a/src/Text/Pandoc/Readers/Roff.hs +++ b/src/Text/Pandoc/Readers/Roff.hs @@ -36,7 +36,6 @@ module Text.Pandoc.Readers.Roff , FontSpec(..) , defaultFontSpec , LinePart(..) - , LineParts(..) , Arg , TableOption , CellFormat(..) @@ -57,7 +56,7 @@ import Text.Pandoc.Class import Data.Char (isHexDigit, chr, ord, isAscii, isAlphaNum, isSpace) import Data.Default (Default) import qualified Data.Map as M -import Data.List (intersperse, isSuffixOf) +import Data.List (intercalate, isSuffixOf) import qualified Data.Text as T import Text.Pandoc.Logging (LogMessage(..)) import Text.Pandoc.Options @@ -92,13 +91,7 @@ data LinePart = RoffStr String | MacroArg Int deriving Show -newtype LineParts = LineParts { unLineParts :: Seq LinePart } - deriving (Show, Semigroup, Monoid) - -singleLinePart :: LinePart -> LineParts -singleLinePart t = LineParts (Seq.singleton t) - -type Arg = LineParts +type Arg = [LinePart] type TableOption = (String, String) @@ -112,7 +105,7 @@ data CellFormat = type TableRow = ([CellFormat], [RoffTokens]) -data RoffToken = MLine LineParts +data RoffToken = MLine [LinePart] | MEmptyLine | MMacro MacroKind [Arg] SourcePos | MTable [TableOption] [TableRow] SourcePos @@ -134,7 +127,7 @@ instance Default RoffState where def = RoffState { customMacros = M.fromList $ map (\(n, s) -> (n, singleTok - (MLine $ singleLinePart $ RoffStr s))) + (MLine [RoffStr s]))) [ ("Tm", "\x2122") , ("lq", "\x201C") , ("rq", "\x201D") @@ -164,7 +157,7 @@ combiningAccentsMap :: M.Map String Char combiningAccentsMap = M.fromList $ map (\(x,y) -> (y,x)) combiningAccents -escape :: PandocMonad m => RoffLexer m (LineParts) +escape :: PandocMonad m => RoffLexer m (Seq LinePart) escape = do char '\\' c <- anyChar @@ -184,18 +177,18 @@ escape = do ':' -> return mempty '0' -> return mempty 'c' -> return mempty - '-' -> return $ singleLinePart $ RoffStr "-" - '_' -> return $ singleLinePart $ RoffStr "_" - ' ' -> return $ singleLinePart $ RoffStr " " - '\\' -> return $ singleLinePart $ RoffStr "\\" - 't' -> return $ singleLinePart $ RoffStr "\t" - 'e' -> return $ singleLinePart $ RoffStr "\\" - '`' -> return $ singleLinePart $ RoffStr "`" - '^' -> return $ singleLinePart $ RoffStr " " - '|' -> return $ singleLinePart $ RoffStr " " - '\'' -> return $ singleLinePart $ RoffStr "`" - '.' -> return $ singleLinePart $ RoffStr "`" - '~' -> return $ singleLinePart $ RoffStr "\160" -- nonbreaking space + '-' -> return $ Seq.singleton $ RoffStr "-" + '_' -> return $ Seq.singleton $ RoffStr "_" + ' ' -> return $ Seq.singleton $ RoffStr " " + '\\' -> return $ Seq.singleton $ RoffStr "\\" + 't' -> return $ Seq.singleton $ RoffStr "\t" + 'e' -> return $ Seq.singleton $ RoffStr "\\" + '`' -> return $ Seq.singleton $ RoffStr "`" + '^' -> return $ Seq.singleton $ RoffStr " " + '|' -> return $ Seq.singleton $ RoffStr " " + '\'' -> return $ Seq.singleton $ RoffStr "`" + '.' -> return $ Seq.singleton $ RoffStr "`" + '~' -> return $ Seq.singleton $ RoffStr "\160" -- nonbreaking space _ -> escUnknown ['\\',c] where @@ -203,7 +196,7 @@ escape = do twoCharGlyph = do cs <- count 2 anyChar case M.lookup cs characterCodeMap of - Just c -> return $ singleLinePart $ RoffStr [c] + Just c -> return $ Seq.singleton $ RoffStr [c] Nothing -> escUnknown ('\\':'(':cs) bracketedGlyph = unicodeGlyph <|> charGlyph @@ -214,7 +207,7 @@ escape = do [] -> mzero [s] -> case M.lookup s characterCodeMap of Nothing -> mzero - Just c -> return $ singleLinePart $ RoffStr [c] + Just c -> return $ Seq.singleton $ RoffStr [c] (s:ss) -> do basechar <- case M.lookup cs characterCodeMap of Nothing -> @@ -231,12 +224,12 @@ escape = do Just x -> addAccents as (x:xs) Nothing -> mzero addAccents ss [basechar] >>= \xs -> - return (singleLinePart $ RoffStr xs)) + return (Seq.singleton $ RoffStr xs)) <|> escUnknown ("\\[" ++ cs ++ "]") unicodeGlyph = try $ do xs <- ucharCode `sepBy1` (char '_') <* char ']' - return $ singleLinePart $ RoffStr xs + return $ Seq.singleton $ RoffStr xs ucharCode = try $ do char 'u' @@ -247,20 +240,20 @@ escape = do Nothing -> mzero Just c -> return c - escUnknown :: PandocMonad m => String -> RoffLexer m (LineParts) + escUnknown :: PandocMonad m => String -> RoffLexer m (Seq LinePart) escUnknown s = do pos <- getPosition report $ SkippedContent ("Unknown escape sequence " ++ s) pos - return $ singleLinePart $ RoffStr "\xFFFD" + return $ Seq.singleton $ RoffStr "\xFFFD" -- \s-1 \s0 -escFontSize :: PandocMonad m => RoffLexer m (LineParts) +escFontSize :: PandocMonad m => RoffLexer m (Seq LinePart) escFontSize = do let sign = option "" $ ("-" <$ char '-' <|> "" <$ char '+') let toFontSize xs = case safeRead xs of Nothing -> mzero - Just n -> return $ singleLinePart $ FontSize n + Just n -> return $ Seq.singleton $ FontSize n choice [ do char '(' s <- sign @@ -276,7 +269,7 @@ escFontSize = do toFontSize (s ++ ds) ] -escFont :: PandocMonad m => RoffLexer m (LineParts) +escFont :: PandocMonad m => RoffLexer m (Seq LinePart) escFont = do font <- choice [ digit >> return defaultFontSpec @@ -286,7 +279,7 @@ escFont = do ] modifyState $ \st -> st{ prevFont = currentFont st , currentFont = font } - return $ singleLinePart $ Font font + return $ Seq.singleton $ Font font lettersFont :: PandocMonad m => RoffLexer m FontSpec lettersFont = try $ do @@ -503,10 +496,10 @@ resolveMacro macroName args pos = do let fillLP (MacroArg i) zs = case drop (i - 1) args of [] -> zs - (LineParts ys:_) -> ys <> zs - fillLP z zs = z Seq.<| zs - let fillMacroArg (MLine (LineParts lineparts)) = - MLine (LineParts (foldr fillLP mempty lineparts)) + (ys:_) -> ys ++ zs + fillLP z zs = z : zs + let fillMacroArg (MLine lineparts) = + MLine (foldr fillLP [] lineparts) fillMacroArg x = x return $ RoffTokens . fmap fillMacroArg . unRoffTokens $ ts @@ -515,8 +508,7 @@ lexStringDef args = do -- string definition case args of [] -> fail "No argument to .ds" (x:ys) -> do - let ts = singleTok $ MLine $ mconcat - $ intersperse (singleLinePart $ RoffStr " " ) ys + let ts = singleTok $ MLine (intercalate [RoffStr " " ] ys) let stringName = linePartsToString x modifyState $ \st -> st{ customMacros = M.insert stringName ts (customMacros st) } @@ -546,24 +538,24 @@ lexArgs = do args <- many $ try oneArg skipMany spacetab eofline - return args + return $ map Foldable.toList args where - oneArg :: PandocMonad m => RoffLexer m (LineParts) + oneArg :: PandocMonad m => RoffLexer m (Seq LinePart) oneArg = do skipMany $ try $ string "\\\n" -- continuation line try quotedArg <|> plainArg -- try, because there are some erroneous files, e.g. linux/bpf.2 - plainArg :: PandocMonad m => RoffLexer m (LineParts) + plainArg :: PandocMonad m => RoffLexer m (Seq LinePart) plainArg = do skipMany spacetab mconcat <$> many1 (macroArg <|> escape <|> regularText <|> unescapedQuote) where - unescapedQuote = char '"' >> return (singleLinePart $ RoffStr "\"") + unescapedQuote = char '"' >> return (Seq.singleton $ RoffStr "\"") - quotedArg :: PandocMonad m => RoffLexer m (LineParts) + quotedArg :: PandocMonad m => RoffLexer m (Seq LinePart) quotedArg = do skipMany spacetab char '"' @@ -576,9 +568,9 @@ lexArgs = do escapedQuote = try $ do char '"' char '"' - return $ singleLinePart $ RoffStr "\"" + return $ Seq.singleton $ RoffStr "\"" -escStar :: PandocMonad m => RoffLexer m (LineParts) +escStar :: PandocMonad m => RoffLexer m (Seq LinePart) escStar = try $ do pos <- getPosition c <- anyChar @@ -599,46 +591,46 @@ escStar = try $ do resolveString stringname pos = do RoffTokens ts <- resolveMacro stringname [] pos case Foldable.toList ts of - [MLine xs] -> return xs + [MLine xs] -> return $ Seq.fromList xs _ -> do report $ SkippedContent ("unknown string " ++ stringname) pos return mempty lexLine :: PandocMonad m => RoffLexer m RoffTokens lexLine = do - lnparts <- mconcat <$> many1 linePart + lnparts <- Foldable.toList . mconcat <$> many1 linePart eofline - go (unLineParts lnparts) - where -- return mempty if we only have empty strings; + go lnparts + where -- return empty line if we only have empty strings; -- this can happen if the line just contains \f[C], for example. - go Seq.Empty = return mempty - go ((RoffStr "") Seq.:<| xs) = go xs - go xs = return $ singleTok $ MLine (LineParts xs) + go [] = return mempty + go (RoffStr "" : xs) = go xs + go xs = return $ singleTok $ MLine xs -linePart :: PandocMonad m => RoffLexer m (LineParts) +linePart :: PandocMonad m => RoffLexer m (Seq LinePart) linePart = macroArg <|> escape <|> regularText <|> quoteChar <|> spaceTabChar -macroArg :: PandocMonad m => RoffLexer m (LineParts) +macroArg :: PandocMonad m => RoffLexer m (Seq LinePart) macroArg = try $ do string "\\\\$" x <- digit - return $ singleLinePart $ MacroArg $ ord x - ord '0' + return $ Seq.singleton $ MacroArg $ ord x - ord '0' -regularText :: PandocMonad m => RoffLexer m (LineParts) +regularText :: PandocMonad m => RoffLexer m (Seq LinePart) regularText = do s <- many1 $ noneOf "\n\r\t \\\"" - return $ singleLinePart $ RoffStr s + return $ Seq.singleton $ RoffStr s -quoteChar :: PandocMonad m => RoffLexer m (LineParts) +quoteChar :: PandocMonad m => RoffLexer m (Seq LinePart) quoteChar = do char '"' - return $ singleLinePart $ RoffStr "\"" + return $ Seq.singleton $ RoffStr "\"" -spaceTabChar :: PandocMonad m => RoffLexer m (LineParts) +spaceTabChar :: PandocMonad m => RoffLexer m (Seq LinePart) spaceTabChar = do c <- spacetab - return $ singleLinePart $ RoffStr [c] + return $ Seq.singleton $ RoffStr [c] lexEmptyLine :: PandocMonad m => RoffLexer m RoffTokens lexEmptyLine = newline >> return (singleTok MEmptyLine) @@ -646,8 +638,8 @@ lexEmptyLine = newline >> return (singleTok MEmptyLine) manToken :: PandocMonad m => RoffLexer m RoffTokens manToken = lexComment <|> lexMacro <|> lexLine <|> lexEmptyLine -linePartsToString :: LineParts -> String -linePartsToString = Foldable.foldMap go . unLineParts +linePartsToString :: [LinePart] -> String +linePartsToString = mconcat . map go where go (RoffStr s) = s go _ = mempty -- cgit v1.2.3