From 42ba3c0a0b15fddd51e6a4b79882ddaeccf0eb3d Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Sat, 27 Oct 2018 12:28:15 -0700
Subject: Roff reader: use LineParts abstraction.

This didn't really help performance in the end.
---
 src/Text/Pandoc/Readers/Man.hs  |  16 +++---
 src/Text/Pandoc/Readers/Roff.hs | 122 +++++++++++++++++++++-------------------
 2 files changed, 74 insertions(+), 64 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs
index 3644050c7..2c5d10b93 100644
--- a/src/Text/Pandoc/Readers/Man.hs
+++ b/src/Text/Pandoc/Readers/Man.hs
@@ -53,6 +53,7 @@ import Text.Pandoc.Readers.Roff  -- TODO explicit imports
 import Text.Parsec hiding (tokenPrim)
 import qualified Text.Parsec as Parsec
 import Text.Parsec.Pos (updatePosString, initialPos)
+import qualified Data.Sequence as Seq
 import qualified Data.Foldable as Foldable
 
 data ManState = ManState { readerOptions :: ReaderOptions
@@ -149,8 +150,9 @@ parseTable = do
   isHrule ([cellfmt], _) = columnType cellfmt `elem` ['_','-','=']
   isHrule (_, [RoffTokens ss]) =
     case Foldable.toList ss of
-      [MLine [RoffStr [c]]] -> c `elem` ['_','-','=']
-      _                     -> False
+      [MLine (LineParts (RoffStr [c] Seq.:<| Seq.Empty))]
+                        -> c `elem` ['_','-','=']
+      _                 -> False
   isHrule _ = False
 
   fallback pos = do
@@ -229,8 +231,8 @@ parseTitle = do
   modifyState $ \st -> st{ metadata = adjustMeta $ metadata st }
   return mempty
 
-linePartsToInlines :: [LinePart] -> Inlines
-linePartsToInlines = go False
+linePartsToInlines :: LineParts -> Inlines
+linePartsToInlines = go False . Foldable.toList . unLineParts
 
   where
   go :: Bool -> [LinePart] -> Inlines
@@ -366,10 +368,10 @@ parseCodeBlock = try $ do
   where
 
   extractText :: RoffToken -> Maybe String
-  extractText (MLine ss)
-    | not (null ss)
+  extractText (MLine (LineParts ss))
+    | not (Seq.null ss)
     , all isFontToken ss = Nothing
-    | otherwise          = Just $ linePartsToString ss
+    | otherwise          = Just $ linePartsToString (LineParts ss)
     where isFontToken (FontSize{}) = True
           isFontToken (Font{})     = True
           isFontToken _            = False
diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs
index 1f8ec1c33..cfc9ae980 100644
--- a/src/Text/Pandoc/Readers/Roff.hs
+++ b/src/Text/Pandoc/Readers/Roff.hs
@@ -36,6 +36,7 @@ module Text.Pandoc.Readers.Roff
   , FontSpec(..)
   , defaultFontSpec
   , LinePart(..)
+  , LineParts(..)
   , Arg
   , TableOption
   , CellFormat(..)
@@ -56,7 +57,7 @@ import Text.Pandoc.Class
 import Data.Char (isHexDigit, chr, ord, isAscii, isAlphaNum, isSpace)
 import Data.Default (Default)
 import qualified Data.Map as M
-import Data.List (intercalate, isSuffixOf)
+import Data.List (intersperse, isSuffixOf)
 import qualified Data.Text as T
 import Text.Pandoc.Logging (LogMessage(..))
 import Text.Pandoc.Options
@@ -91,7 +92,13 @@ data LinePart = RoffStr String
               | MacroArg Int
               deriving Show
 
-type Arg = [LinePart]
+newtype LineParts = LineParts { unLineParts :: Seq LinePart }
+        deriving (Show, Semigroup, Monoid)
+
+singleLinePart :: LinePart -> LineParts
+singleLinePart t = LineParts (Seq.singleton t)
+
+type Arg = LineParts
 
 type TableOption = (String, String)
 
@@ -105,7 +112,7 @@ data CellFormat =
 
 type TableRow = ([CellFormat], [RoffTokens])
 
-data RoffToken = MLine [LinePart]
+data RoffToken = MLine LineParts
                | MEmptyLine
                | MMacro MacroKind [Arg] SourcePos
                | MTable [TableOption] [TableRow] SourcePos
@@ -127,7 +134,7 @@ instance Default RoffState where
   def = RoffState { customMacros = M.fromList
                        $ map (\(n, s) ->
                                 (n, singleTok
-                                  (MLine [RoffStr s])))
+                                  (MLine $ singleLinePart $ RoffStr s)))
                        [ ("Tm", "\x2122")
                        , ("lq", "\x201C")
                        , ("rq", "\x201D")
@@ -157,7 +164,7 @@ combiningAccentsMap :: M.Map String Char
 combiningAccentsMap =
   M.fromList $ map (\(x,y) -> (y,x)) combiningAccents
 
-escape :: PandocMonad m => RoffLexer m (Seq LinePart)
+escape :: PandocMonad m => RoffLexer m (LineParts)
 escape = do
   char '\\'
   c <- anyChar
@@ -177,18 +184,18 @@ escape = do
     ':' -> return mempty
     '0' -> return mempty
     'c' -> return mempty
-    '-' -> return $ Seq.singleton $ RoffStr "-"
-    '_' -> return $ Seq.singleton $ RoffStr "_"
-    ' ' -> return $ Seq.singleton $ RoffStr " "
-    '\\' -> return $ Seq.singleton $ RoffStr "\\"
-    't' -> return $ Seq.singleton $ RoffStr "\t"
-    'e' -> return $ Seq.singleton $ RoffStr "\\"
-    '`' -> return $ Seq.singleton $ RoffStr "`"
-    '^' -> return $ Seq.singleton $ RoffStr " "
-    '|' -> return $ Seq.singleton $ RoffStr " "
-    '\'' -> return $ Seq.singleton $ RoffStr "`"
-    '.' -> return $ Seq.singleton $ RoffStr "`"
-    '~' -> return $ Seq.singleton $ RoffStr "\160" -- nonbreaking space
+    '-' -> return $ singleLinePart $ RoffStr "-"
+    '_' -> return $ singleLinePart $ RoffStr "_"
+    ' ' -> return $ singleLinePart $ RoffStr " "
+    '\\' -> return $ singleLinePart $ RoffStr "\\"
+    't' -> return $ singleLinePart $ RoffStr "\t"
+    'e' -> return $ singleLinePart $ RoffStr "\\"
+    '`' -> return $ singleLinePart $ RoffStr "`"
+    '^' -> return $ singleLinePart $ RoffStr " "
+    '|' -> return $ singleLinePart $ RoffStr " "
+    '\'' -> return $ singleLinePart $ RoffStr "`"
+    '.' -> return $ singleLinePart $ RoffStr "`"
+    '~' -> return $ singleLinePart $ RoffStr "\160" -- nonbreaking space
     _   -> escUnknown ['\\',c]
 
   where
@@ -196,7 +203,7 @@ escape = do
   twoCharGlyph = do
     cs <- count 2 anyChar
     case M.lookup cs characterCodeMap of
-      Just c  -> return $ Seq.singleton $ RoffStr [c]
+      Just c  -> return $ singleLinePart $ RoffStr [c]
       Nothing -> escUnknown ('\\':'(':cs)
 
   bracketedGlyph = unicodeGlyph <|> charGlyph
@@ -207,7 +214,7 @@ escape = do
         []  -> mzero
         [s] -> case M.lookup s characterCodeMap of
                  Nothing -> mzero
-                 Just c  -> return $ Seq.singleton $ RoffStr [c]
+                 Just c  -> return $ singleLinePart $ RoffStr [c]
         (s:ss) -> do
           basechar <- case M.lookup cs characterCodeMap of
                         Nothing ->
@@ -224,12 +231,12 @@ escape = do
                   Just x  -> addAccents as (x:xs)
                   Nothing -> mzero
           addAccents ss [basechar] >>= \xs ->
-             return (Seq.singleton $ RoffStr xs))
+             return (singleLinePart $ RoffStr xs))
       <|> escUnknown ("\\[" ++ cs ++ "]")
 
   unicodeGlyph = try $ do
     xs <- ucharCode `sepBy1` (char '_') <* char ']'
-    return $ Seq.singleton $ RoffStr xs
+    return $ singleLinePart $ RoffStr xs
 
   ucharCode = try $ do
     char 'u'
@@ -240,20 +247,20 @@ escape = do
        Nothing  -> mzero
        Just c   -> return c
 
-  escUnknown :: PandocMonad m => String -> RoffLexer m (Seq LinePart)
+  escUnknown :: PandocMonad m => String -> RoffLexer m (LineParts)
   escUnknown s = do
     pos <- getPosition
     report $ SkippedContent ("Unknown escape sequence " ++ s) pos
-    return $ Seq.singleton $ RoffStr "\xFFFD"
+    return $ singleLinePart $ RoffStr "\xFFFD"
 
 -- \s-1 \s0
-escFontSize :: PandocMonad m => RoffLexer m (Seq LinePart)
+escFontSize :: PandocMonad m => RoffLexer m (LineParts)
 escFontSize = do
   let sign = option "" $ ("-" <$ char '-' <|> "" <$ char '+')
   let toFontSize xs =
         case safeRead xs of
           Nothing  -> mzero
-          Just n   -> return $ Seq.singleton $ FontSize n
+          Just n   -> return $ singleLinePart $ FontSize n
   choice
     [ do char '('
          s <- sign
@@ -269,7 +276,7 @@ escFontSize = do
          toFontSize (s ++ ds)
     ]
 
-escFont :: PandocMonad m => RoffLexer m (Seq LinePart)
+escFont :: PandocMonad m => RoffLexer m (LineParts)
 escFont = do
   font <- choice
         [ digit >> return defaultFontSpec
@@ -279,7 +286,7 @@ escFont = do
         ]
   modifyState $ \st -> st{ prevFont = currentFont st
                          , currentFont = font }
-  return $ Seq.singleton $ Font font
+  return $ singleLinePart $ Font font
 
 lettersFont :: PandocMonad m => RoffLexer m FontSpec
 lettersFont = try $ do
@@ -496,10 +503,10 @@ resolveMacro macroName args pos = do
       let fillLP (MacroArg i)    zs =
             case drop (i - 1) args of
               []     -> zs
-              (ys:_) -> ys ++ zs
-          fillLP z zs = z : zs
-      let fillMacroArg (MLine lineparts) =
-            MLine (foldr fillLP [] lineparts)
+              (LineParts ys:_) -> ys <> zs
+          fillLP z zs = z Seq.<| zs
+      let fillMacroArg (MLine (LineParts lineparts)) =
+            MLine (LineParts (foldr fillLP mempty lineparts))
           fillMacroArg x = x
       return $ RoffTokens . fmap fillMacroArg . unRoffTokens $ ts
 
@@ -508,7 +515,8 @@ lexStringDef args = do -- string definition
    case args of
      []     -> fail "No argument to .ds"
      (x:ys) -> do
-       let ts = singleTok $ MLine (intercalate [RoffStr " " ] ys)
+       let ts = singleTok $ MLine $ mconcat
+                 $ intersperse (singleLinePart $ RoffStr " " ) ys
        let stringName = linePartsToString x
        modifyState $ \st ->
          st{ customMacros = M.insert stringName ts (customMacros st) }
@@ -538,24 +546,24 @@ lexArgs = do
   args <- many $ try oneArg
   skipMany spacetab
   eofline
-  return $ map Foldable.toList args
+  return args
 
   where
 
-  oneArg :: PandocMonad m => RoffLexer m (Seq LinePart)
+  oneArg :: PandocMonad m => RoffLexer m (LineParts)
   oneArg = do
     skipMany $ try $ string "\\\n"  -- continuation line
     try quotedArg <|> plainArg
     -- try, because there are some erroneous files, e.g. linux/bpf.2
 
-  plainArg :: PandocMonad m => RoffLexer m (Seq LinePart)
+  plainArg :: PandocMonad m => RoffLexer m (LineParts)
   plainArg = do
     skipMany spacetab
     mconcat <$> many1 (macroArg <|> escape <|> regularText <|> unescapedQuote)
     where
-      unescapedQuote = char '"' >> return (Seq.singleton $ RoffStr "\"")
+      unescapedQuote = char '"' >> return (singleLinePart $ RoffStr "\"")
 
-  quotedArg :: PandocMonad m => RoffLexer m (Seq LinePart)
+  quotedArg :: PandocMonad m => RoffLexer m (LineParts)
   quotedArg = do
     skipMany spacetab
     char '"'
@@ -568,9 +576,9 @@ lexArgs = do
       escapedQuote = try $ do
         char '"'
         char '"'
-        return $ Seq.singleton $ RoffStr "\""
+        return $ singleLinePart $ RoffStr "\""
 
-escStar :: PandocMonad m => RoffLexer m (Seq LinePart)
+escStar :: PandocMonad m => RoffLexer m (LineParts)
 escStar = try $ do
   pos <- getPosition
   c <- anyChar
@@ -591,46 +599,46 @@ escStar = try $ do
   resolveString stringname pos = do
     RoffTokens ts <- resolveMacro stringname [] pos
     case Foldable.toList ts of
-      [MLine xs] -> return $ Seq.fromList xs
+      [MLine xs] -> return xs
       _          -> do
         report $ SkippedContent ("unknown string " ++ stringname) pos
         return mempty
 
 lexLine :: PandocMonad m => RoffLexer m RoffTokens
 lexLine = do
-  lnparts <- Foldable.toList . mconcat <$> many1 linePart
+  lnparts <- mconcat <$> many1 linePart
   eofline
-  go lnparts
-  where  -- return empty line if we only have empty strings;
+  go (unLineParts lnparts)
+  where  -- return mempty if we only have empty strings;
          -- this can happen if the line just contains \f[C], for example.
-    go [] = return mempty
-    go (RoffStr "" : xs) = go xs
-    go xs = return $ singleTok $ MLine xs
+    go Seq.Empty = return mempty
+    go ((RoffStr "") Seq.:<| xs) = go xs
+    go xs = return $ singleTok $ MLine (LineParts xs)
 
-linePart :: PandocMonad m => RoffLexer m (Seq LinePart)
+linePart :: PandocMonad m => RoffLexer m (LineParts)
 linePart = macroArg <|> escape <|>
            regularText <|> quoteChar <|> spaceTabChar
 
-macroArg :: PandocMonad m => RoffLexer m (Seq LinePart)
+macroArg :: PandocMonad m => RoffLexer m (LineParts)
 macroArg = try $ do
   string "\\\\$"
   x <- digit
-  return $ Seq.singleton $ MacroArg $ ord x - ord '0'
+  return $ singleLinePart $ MacroArg $ ord x - ord '0'
 
-regularText :: PandocMonad m => RoffLexer m (Seq LinePart)
+regularText :: PandocMonad m => RoffLexer m (LineParts)
 regularText = do
   s <- many1 $ noneOf "\n\r\t \\\""
-  return $ Seq.singleton $ RoffStr s
+  return $ singleLinePart $ RoffStr s
 
-quoteChar :: PandocMonad m => RoffLexer m (Seq LinePart)
+quoteChar :: PandocMonad m => RoffLexer m (LineParts)
 quoteChar = do
   char '"'
-  return $ Seq.singleton $ RoffStr "\""
+  return $ singleLinePart $ RoffStr "\""
 
-spaceTabChar :: PandocMonad m => RoffLexer m (Seq LinePart)
+spaceTabChar :: PandocMonad m => RoffLexer m (LineParts)
 spaceTabChar = do
   c <- spacetab
-  return $ Seq.singleton $ RoffStr [c]
+  return $ singleLinePart $ RoffStr [c]
 
 lexEmptyLine :: PandocMonad m => RoffLexer m RoffTokens
 lexEmptyLine = newline >> return (singleTok MEmptyLine)
@@ -638,8 +646,8 @@ lexEmptyLine = newline >> return (singleTok MEmptyLine)
 manToken :: PandocMonad m => RoffLexer m RoffTokens
 manToken = lexComment <|> lexMacro <|> lexLine <|> lexEmptyLine
 
-linePartsToString :: [LinePart] -> String
-linePartsToString = mconcat . map go
+linePartsToString :: LineParts -> String
+linePartsToString = Foldable.foldMap go . unLineParts
   where
   go (RoffStr s) = s
   go _ = mempty
-- 
cgit v1.2.3