aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2018-10-27 12:28:15 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2018-10-27 12:28:15 -0700
commit42ba3c0a0b15fddd51e6a4b79882ddaeccf0eb3d (patch)
tree39ca38f86a77b77604509153eee8d9f3565764e0 /src
parent3a5726b2cf9cdb511635209d412ccb8c50f14d6d (diff)
downloadpandoc-42ba3c0a0b15fddd51e6a4b79882ddaeccf0eb3d.tar.gz
Roff reader: use LineParts abstraction.
This didn't really help performance in the end.
Diffstat (limited to 'src')
-rw-r--r--src/Text/Pandoc/Readers/Man.hs16
-rw-r--r--src/Text/Pandoc/Readers/Roff.hs122
2 files changed, 74 insertions, 64 deletions
diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs
index 3644050c7..2c5d10b93 100644
--- a/src/Text/Pandoc/Readers/Man.hs
+++ b/src/Text/Pandoc/Readers/Man.hs
@@ -53,6 +53,7 @@ import Text.Pandoc.Readers.Roff -- TODO explicit imports
import Text.Parsec hiding (tokenPrim)
import qualified Text.Parsec as Parsec
import Text.Parsec.Pos (updatePosString, initialPos)
+import qualified Data.Sequence as Seq
import qualified Data.Foldable as Foldable
data ManState = ManState { readerOptions :: ReaderOptions
@@ -149,8 +150,9 @@ parseTable = do
isHrule ([cellfmt], _) = columnType cellfmt `elem` ['_','-','=']
isHrule (_, [RoffTokens ss]) =
case Foldable.toList ss of
- [MLine [RoffStr [c]]] -> c `elem` ['_','-','=']
- _ -> False
+ [MLine (LineParts (RoffStr [c] Seq.:<| Seq.Empty))]
+ -> c `elem` ['_','-','=']
+ _ -> False
isHrule _ = False
fallback pos = do
@@ -229,8 +231,8 @@ parseTitle = do
modifyState $ \st -> st{ metadata = adjustMeta $ metadata st }
return mempty
-linePartsToInlines :: [LinePart] -> Inlines
-linePartsToInlines = go False
+linePartsToInlines :: LineParts -> Inlines
+linePartsToInlines = go False . Foldable.toList . unLineParts
where
go :: Bool -> [LinePart] -> Inlines
@@ -366,10 +368,10 @@ parseCodeBlock = try $ do
where
extractText :: RoffToken -> Maybe String
- extractText (MLine ss)
- | not (null ss)
+ extractText (MLine (LineParts ss))
+ | not (Seq.null ss)
, all isFontToken ss = Nothing
- | otherwise = Just $ linePartsToString ss
+ | otherwise = Just $ linePartsToString (LineParts ss)
where isFontToken (FontSize{}) = True
isFontToken (Font{}) = True
isFontToken _ = False
diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs
index 1f8ec1c33..cfc9ae980 100644
--- a/src/Text/Pandoc/Readers/Roff.hs
+++ b/src/Text/Pandoc/Readers/Roff.hs
@@ -36,6 +36,7 @@ module Text.Pandoc.Readers.Roff
, FontSpec(..)
, defaultFontSpec
, LinePart(..)
+ , LineParts(..)
, Arg
, TableOption
, CellFormat(..)
@@ -56,7 +57,7 @@ import Text.Pandoc.Class
import Data.Char (isHexDigit, chr, ord, isAscii, isAlphaNum, isSpace)
import Data.Default (Default)
import qualified Data.Map as M
-import Data.List (intercalate, isSuffixOf)
+import Data.List (intersperse, isSuffixOf)
import qualified Data.Text as T
import Text.Pandoc.Logging (LogMessage(..))
import Text.Pandoc.Options
@@ -91,7 +92,13 @@ data LinePart = RoffStr String
| MacroArg Int
deriving Show
-type Arg = [LinePart]
+newtype LineParts = LineParts { unLineParts :: Seq LinePart }
+ deriving (Show, Semigroup, Monoid)
+
+singleLinePart :: LinePart -> LineParts
+singleLinePart t = LineParts (Seq.singleton t)
+
+type Arg = LineParts
type TableOption = (String, String)
@@ -105,7 +112,7 @@ data CellFormat =
type TableRow = ([CellFormat], [RoffTokens])
-data RoffToken = MLine [LinePart]
+data RoffToken = MLine LineParts
| MEmptyLine
| MMacro MacroKind [Arg] SourcePos
| MTable [TableOption] [TableRow] SourcePos
@@ -127,7 +134,7 @@ instance Default RoffState where
def = RoffState { customMacros = M.fromList
$ map (\(n, s) ->
(n, singleTok
- (MLine [RoffStr s])))
+ (MLine $ singleLinePart $ RoffStr s)))
[ ("Tm", "\x2122")
, ("lq", "\x201C")
, ("rq", "\x201D")
@@ -157,7 +164,7 @@ combiningAccentsMap :: M.Map String Char
combiningAccentsMap =
M.fromList $ map (\(x,y) -> (y,x)) combiningAccents
-escape :: PandocMonad m => RoffLexer m (Seq LinePart)
+escape :: PandocMonad m => RoffLexer m (LineParts)
escape = do
char '\\'
c <- anyChar
@@ -177,18 +184,18 @@ escape = do
':' -> return mempty
'0' -> return mempty
'c' -> return mempty
- '-' -> return $ Seq.singleton $ RoffStr "-"
- '_' -> return $ Seq.singleton $ RoffStr "_"
- ' ' -> return $ Seq.singleton $ RoffStr " "
- '\\' -> return $ Seq.singleton $ RoffStr "\\"
- 't' -> return $ Seq.singleton $ RoffStr "\t"
- 'e' -> return $ Seq.singleton $ RoffStr "\\"
- '`' -> return $ Seq.singleton $ RoffStr "`"
- '^' -> return $ Seq.singleton $ RoffStr " "
- '|' -> return $ Seq.singleton $ RoffStr " "
- '\'' -> return $ Seq.singleton $ RoffStr "`"
- '.' -> return $ Seq.singleton $ RoffStr "`"
- '~' -> return $ Seq.singleton $ RoffStr "\160" -- nonbreaking space
+ '-' -> return $ singleLinePart $ RoffStr "-"
+ '_' -> return $ singleLinePart $ RoffStr "_"
+ ' ' -> return $ singleLinePart $ RoffStr " "
+ '\\' -> return $ singleLinePart $ RoffStr "\\"
+ 't' -> return $ singleLinePart $ RoffStr "\t"
+ 'e' -> return $ singleLinePart $ RoffStr "\\"
+ '`' -> return $ singleLinePart $ RoffStr "`"
+ '^' -> return $ singleLinePart $ RoffStr " "
+ '|' -> return $ singleLinePart $ RoffStr " "
+ '\'' -> return $ singleLinePart $ RoffStr "`"
+ '.' -> return $ singleLinePart $ RoffStr "`"
+ '~' -> return $ singleLinePart $ RoffStr "\160" -- nonbreaking space
_ -> escUnknown ['\\',c]
where
@@ -196,7 +203,7 @@ escape = do
twoCharGlyph = do
cs <- count 2 anyChar
case M.lookup cs characterCodeMap of
- Just c -> return $ Seq.singleton $ RoffStr [c]
+ Just c -> return $ singleLinePart $ RoffStr [c]
Nothing -> escUnknown ('\\':'(':cs)
bracketedGlyph = unicodeGlyph <|> charGlyph
@@ -207,7 +214,7 @@ escape = do
[] -> mzero
[s] -> case M.lookup s characterCodeMap of
Nothing -> mzero
- Just c -> return $ Seq.singleton $ RoffStr [c]
+ Just c -> return $ singleLinePart $ RoffStr [c]
(s:ss) -> do
basechar <- case M.lookup cs characterCodeMap of
Nothing ->
@@ -224,12 +231,12 @@ escape = do
Just x -> addAccents as (x:xs)
Nothing -> mzero
addAccents ss [basechar] >>= \xs ->
- return (Seq.singleton $ RoffStr xs))
+ return (singleLinePart $ RoffStr xs))
<|> escUnknown ("\\[" ++ cs ++ "]")
unicodeGlyph = try $ do
xs <- ucharCode `sepBy1` (char '_') <* char ']'
- return $ Seq.singleton $ RoffStr xs
+ return $ singleLinePart $ RoffStr xs
ucharCode = try $ do
char 'u'
@@ -240,20 +247,20 @@ escape = do
Nothing -> mzero
Just c -> return c
- escUnknown :: PandocMonad m => String -> RoffLexer m (Seq LinePart)
+ escUnknown :: PandocMonad m => String -> RoffLexer m (LineParts)
escUnknown s = do
pos <- getPosition
report $ SkippedContent ("Unknown escape sequence " ++ s) pos
- return $ Seq.singleton $ RoffStr "\xFFFD"
+ return $ singleLinePart $ RoffStr "\xFFFD"
-- \s-1 \s0
-escFontSize :: PandocMonad m => RoffLexer m (Seq LinePart)
+escFontSize :: PandocMonad m => RoffLexer m (LineParts)
escFontSize = do
let sign = option "" $ ("-" <$ char '-' <|> "" <$ char '+')
let toFontSize xs =
case safeRead xs of
Nothing -> mzero
- Just n -> return $ Seq.singleton $ FontSize n
+ Just n -> return $ singleLinePart $ FontSize n
choice
[ do char '('
s <- sign
@@ -269,7 +276,7 @@ escFontSize = do
toFontSize (s ++ ds)
]
-escFont :: PandocMonad m => RoffLexer m (Seq LinePart)
+escFont :: PandocMonad m => RoffLexer m (LineParts)
escFont = do
font <- choice
[ digit >> return defaultFontSpec
@@ -279,7 +286,7 @@ escFont = do
]
modifyState $ \st -> st{ prevFont = currentFont st
, currentFont = font }
- return $ Seq.singleton $ Font font
+ return $ singleLinePart $ Font font
lettersFont :: PandocMonad m => RoffLexer m FontSpec
lettersFont = try $ do
@@ -496,10 +503,10 @@ resolveMacro macroName args pos = do
let fillLP (MacroArg i) zs =
case drop (i - 1) args of
[] -> zs
- (ys:_) -> ys ++ zs
- fillLP z zs = z : zs
- let fillMacroArg (MLine lineparts) =
- MLine (foldr fillLP [] lineparts)
+ (LineParts ys:_) -> ys <> zs
+ fillLP z zs = z Seq.<| zs
+ let fillMacroArg (MLine (LineParts lineparts)) =
+ MLine (LineParts (foldr fillLP mempty lineparts))
fillMacroArg x = x
return $ RoffTokens . fmap fillMacroArg . unRoffTokens $ ts
@@ -508,7 +515,8 @@ lexStringDef args = do -- string definition
case args of
[] -> fail "No argument to .ds"
(x:ys) -> do
- let ts = singleTok $ MLine (intercalate [RoffStr " " ] ys)
+ let ts = singleTok $ MLine $ mconcat
+ $ intersperse (singleLinePart $ RoffStr " " ) ys
let stringName = linePartsToString x
modifyState $ \st ->
st{ customMacros = M.insert stringName ts (customMacros st) }
@@ -538,24 +546,24 @@ lexArgs = do
args <- many $ try oneArg
skipMany spacetab
eofline
- return $ map Foldable.toList args
+ return args
where
- oneArg :: PandocMonad m => RoffLexer m (Seq LinePart)
+ oneArg :: PandocMonad m => RoffLexer m (LineParts)
oneArg = do
skipMany $ try $ string "\\\n" -- continuation line
try quotedArg <|> plainArg
-- try, because there are some erroneous files, e.g. linux/bpf.2
- plainArg :: PandocMonad m => RoffLexer m (Seq LinePart)
+ plainArg :: PandocMonad m => RoffLexer m (LineParts)
plainArg = do
skipMany spacetab
mconcat <$> many1 (macroArg <|> escape <|> regularText <|> unescapedQuote)
where
- unescapedQuote = char '"' >> return (Seq.singleton $ RoffStr "\"")
+ unescapedQuote = char '"' >> return (singleLinePart $ RoffStr "\"")
- quotedArg :: PandocMonad m => RoffLexer m (Seq LinePart)
+ quotedArg :: PandocMonad m => RoffLexer m (LineParts)
quotedArg = do
skipMany spacetab
char '"'
@@ -568,9 +576,9 @@ lexArgs = do
escapedQuote = try $ do
char '"'
char '"'
- return $ Seq.singleton $ RoffStr "\""
+ return $ singleLinePart $ RoffStr "\""
-escStar :: PandocMonad m => RoffLexer m (Seq LinePart)
+escStar :: PandocMonad m => RoffLexer m (LineParts)
escStar = try $ do
pos <- getPosition
c <- anyChar
@@ -591,46 +599,46 @@ escStar = try $ do
resolveString stringname pos = do
RoffTokens ts <- resolveMacro stringname [] pos
case Foldable.toList ts of
- [MLine xs] -> return $ Seq.fromList xs
+ [MLine xs] -> return xs
_ -> do
report $ SkippedContent ("unknown string " ++ stringname) pos
return mempty
lexLine :: PandocMonad m => RoffLexer m RoffTokens
lexLine = do
- lnparts <- Foldable.toList . mconcat <$> many1 linePart
+ lnparts <- mconcat <$> many1 linePart
eofline
- go lnparts
- where -- return empty line if we only have empty strings;
+ go (unLineParts lnparts)
+ where -- return mempty if we only have empty strings;
-- this can happen if the line just contains \f[C], for example.
- go [] = return mempty
- go (RoffStr "" : xs) = go xs
- go xs = return $ singleTok $ MLine xs
+ go Seq.Empty = return mempty
+ go ((RoffStr "") Seq.:<| xs) = go xs
+ go xs = return $ singleTok $ MLine (LineParts xs)
-linePart :: PandocMonad m => RoffLexer m (Seq LinePart)
+linePart :: PandocMonad m => RoffLexer m (LineParts)
linePart = macroArg <|> escape <|>
regularText <|> quoteChar <|> spaceTabChar
-macroArg :: PandocMonad m => RoffLexer m (Seq LinePart)
+macroArg :: PandocMonad m => RoffLexer m (LineParts)
macroArg = try $ do
string "\\\\$"
x <- digit
- return $ Seq.singleton $ MacroArg $ ord x - ord '0'
+ return $ singleLinePart $ MacroArg $ ord x - ord '0'
-regularText :: PandocMonad m => RoffLexer m (Seq LinePart)
+regularText :: PandocMonad m => RoffLexer m (LineParts)
regularText = do
s <- many1 $ noneOf "\n\r\t \\\""
- return $ Seq.singleton $ RoffStr s
+ return $ singleLinePart $ RoffStr s
-quoteChar :: PandocMonad m => RoffLexer m (Seq LinePart)
+quoteChar :: PandocMonad m => RoffLexer m (LineParts)
quoteChar = do
char '"'
- return $ Seq.singleton $ RoffStr "\""
+ return $ singleLinePart $ RoffStr "\""
-spaceTabChar :: PandocMonad m => RoffLexer m (Seq LinePart)
+spaceTabChar :: PandocMonad m => RoffLexer m (LineParts)
spaceTabChar = do
c <- spacetab
- return $ Seq.singleton $ RoffStr [c]
+ return $ singleLinePart $ RoffStr [c]
lexEmptyLine :: PandocMonad m => RoffLexer m RoffTokens
lexEmptyLine = newline >> return (singleTok MEmptyLine)
@@ -638,8 +646,8 @@ lexEmptyLine = newline >> return (singleTok MEmptyLine)
manToken :: PandocMonad m => RoffLexer m RoffTokens
manToken = lexComment <|> lexMacro <|> lexLine <|> lexEmptyLine
-linePartsToString :: [LinePart] -> String
-linePartsToString = mconcat . map go
+linePartsToString :: LineParts -> String
+linePartsToString = Foldable.foldMap go . unLineParts
where
go (RoffStr s) = s
go _ = mempty