aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Roff.hs
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2018-10-27 12:29:54 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2018-10-27 12:29:54 -0700
commitcd93faddbf7def1df5a61b71481a3f399e8cece9 (patch)
tree73ba2d8e9f0f3dad3966a9ac5ea950a6ddb70c43 /src/Text/Pandoc/Readers/Roff.hs
parent42ba3c0a0b15fddd51e6a4b79882ddaeccf0eb3d (diff)
downloadpandoc-cd93faddbf7def1df5a61b71481a3f399e8cece9.tar.gz
Revert "Roff reader: use LineParts abstraction."
This reverts commit 42ba3c0a0b15fddd51e6a4b79882ddaeccf0eb3d.
Diffstat (limited to 'src/Text/Pandoc/Readers/Roff.hs')
-rw-r--r--src/Text/Pandoc/Readers/Roff.hs122
1 files changed, 57 insertions, 65 deletions
diff --git a/src/Text/Pandoc/Readers/Roff.hs b/src/Text/Pandoc/Readers/Roff.hs
index cfc9ae980..1f8ec1c33 100644
--- a/src/Text/Pandoc/Readers/Roff.hs
+++ b/src/Text/Pandoc/Readers/Roff.hs
@@ -36,7 +36,6 @@ module Text.Pandoc.Readers.Roff
, FontSpec(..)
, defaultFontSpec
, LinePart(..)
- , LineParts(..)
, Arg
, TableOption
, CellFormat(..)
@@ -57,7 +56,7 @@ import Text.Pandoc.Class
import Data.Char (isHexDigit, chr, ord, isAscii, isAlphaNum, isSpace)
import Data.Default (Default)
import qualified Data.Map as M
-import Data.List (intersperse, isSuffixOf)
+import Data.List (intercalate, isSuffixOf)
import qualified Data.Text as T
import Text.Pandoc.Logging (LogMessage(..))
import Text.Pandoc.Options
@@ -92,13 +91,7 @@ data LinePart = RoffStr String
| MacroArg Int
deriving Show
-newtype LineParts = LineParts { unLineParts :: Seq LinePart }
- deriving (Show, Semigroup, Monoid)
-
-singleLinePart :: LinePart -> LineParts
-singleLinePart t = LineParts (Seq.singleton t)
-
-type Arg = LineParts
+type Arg = [LinePart]
type TableOption = (String, String)
@@ -112,7 +105,7 @@ data CellFormat =
type TableRow = ([CellFormat], [RoffTokens])
-data RoffToken = MLine LineParts
+data RoffToken = MLine [LinePart]
| MEmptyLine
| MMacro MacroKind [Arg] SourcePos
| MTable [TableOption] [TableRow] SourcePos
@@ -134,7 +127,7 @@ instance Default RoffState where
def = RoffState { customMacros = M.fromList
$ map (\(n, s) ->
(n, singleTok
- (MLine $ singleLinePart $ RoffStr s)))
+ (MLine [RoffStr s])))
[ ("Tm", "\x2122")
, ("lq", "\x201C")
, ("rq", "\x201D")
@@ -164,7 +157,7 @@ combiningAccentsMap :: M.Map String Char
combiningAccentsMap =
M.fromList $ map (\(x,y) -> (y,x)) combiningAccents
-escape :: PandocMonad m => RoffLexer m (LineParts)
+escape :: PandocMonad m => RoffLexer m (Seq LinePart)
escape = do
char '\\'
c <- anyChar
@@ -184,18 +177,18 @@ escape = do
':' -> return mempty
'0' -> return mempty
'c' -> return mempty
- '-' -> return $ singleLinePart $ RoffStr "-"
- '_' -> return $ singleLinePart $ RoffStr "_"
- ' ' -> return $ singleLinePart $ RoffStr " "
- '\\' -> return $ singleLinePart $ RoffStr "\\"
- 't' -> return $ singleLinePart $ RoffStr "\t"
- 'e' -> return $ singleLinePart $ RoffStr "\\"
- '`' -> return $ singleLinePart $ RoffStr "`"
- '^' -> return $ singleLinePart $ RoffStr " "
- '|' -> return $ singleLinePart $ RoffStr " "
- '\'' -> return $ singleLinePart $ RoffStr "`"
- '.' -> return $ singleLinePart $ RoffStr "`"
- '~' -> return $ singleLinePart $ RoffStr "\160" -- nonbreaking space
+ '-' -> return $ Seq.singleton $ RoffStr "-"
+ '_' -> return $ Seq.singleton $ RoffStr "_"
+ ' ' -> return $ Seq.singleton $ RoffStr " "
+ '\\' -> return $ Seq.singleton $ RoffStr "\\"
+ 't' -> return $ Seq.singleton $ RoffStr "\t"
+ 'e' -> return $ Seq.singleton $ RoffStr "\\"
+ '`' -> return $ Seq.singleton $ RoffStr "`"
+ '^' -> return $ Seq.singleton $ RoffStr " "
+ '|' -> return $ Seq.singleton $ RoffStr " "
+ '\'' -> return $ Seq.singleton $ RoffStr "`"
+ '.' -> return $ Seq.singleton $ RoffStr "`"
+ '~' -> return $ Seq.singleton $ RoffStr "\160" -- nonbreaking space
_ -> escUnknown ['\\',c]
where
@@ -203,7 +196,7 @@ escape = do
twoCharGlyph = do
cs <- count 2 anyChar
case M.lookup cs characterCodeMap of
- Just c -> return $ singleLinePart $ RoffStr [c]
+ Just c -> return $ Seq.singleton $ RoffStr [c]
Nothing -> escUnknown ('\\':'(':cs)
bracketedGlyph = unicodeGlyph <|> charGlyph
@@ -214,7 +207,7 @@ escape = do
[] -> mzero
[s] -> case M.lookup s characterCodeMap of
Nothing -> mzero
- Just c -> return $ singleLinePart $ RoffStr [c]
+ Just c -> return $ Seq.singleton $ RoffStr [c]
(s:ss) -> do
basechar <- case M.lookup cs characterCodeMap of
Nothing ->
@@ -231,12 +224,12 @@ escape = do
Just x -> addAccents as (x:xs)
Nothing -> mzero
addAccents ss [basechar] >>= \xs ->
- return (singleLinePart $ RoffStr xs))
+ return (Seq.singleton $ RoffStr xs))
<|> escUnknown ("\\[" ++ cs ++ "]")
unicodeGlyph = try $ do
xs <- ucharCode `sepBy1` (char '_') <* char ']'
- return $ singleLinePart $ RoffStr xs
+ return $ Seq.singleton $ RoffStr xs
ucharCode = try $ do
char 'u'
@@ -247,20 +240,20 @@ escape = do
Nothing -> mzero
Just c -> return c
- escUnknown :: PandocMonad m => String -> RoffLexer m (LineParts)
+ escUnknown :: PandocMonad m => String -> RoffLexer m (Seq LinePart)
escUnknown s = do
pos <- getPosition
report $ SkippedContent ("Unknown escape sequence " ++ s) pos
- return $ singleLinePart $ RoffStr "\xFFFD"
+ return $ Seq.singleton $ RoffStr "\xFFFD"
-- \s-1 \s0
-escFontSize :: PandocMonad m => RoffLexer m (LineParts)
+escFontSize :: PandocMonad m => RoffLexer m (Seq LinePart)
escFontSize = do
let sign = option "" $ ("-" <$ char '-' <|> "" <$ char '+')
let toFontSize xs =
case safeRead xs of
Nothing -> mzero
- Just n -> return $ singleLinePart $ FontSize n
+ Just n -> return $ Seq.singleton $ FontSize n
choice
[ do char '('
s <- sign
@@ -276,7 +269,7 @@ escFontSize = do
toFontSize (s ++ ds)
]
-escFont :: PandocMonad m => RoffLexer m (LineParts)
+escFont :: PandocMonad m => RoffLexer m (Seq LinePart)
escFont = do
font <- choice
[ digit >> return defaultFontSpec
@@ -286,7 +279,7 @@ escFont = do
]
modifyState $ \st -> st{ prevFont = currentFont st
, currentFont = font }
- return $ singleLinePart $ Font font
+ return $ Seq.singleton $ Font font
lettersFont :: PandocMonad m => RoffLexer m FontSpec
lettersFont = try $ do
@@ -503,10 +496,10 @@ resolveMacro macroName args pos = do
let fillLP (MacroArg i) zs =
case drop (i - 1) args of
[] -> zs
- (LineParts ys:_) -> ys <> zs
- fillLP z zs = z Seq.<| zs
- let fillMacroArg (MLine (LineParts lineparts)) =
- MLine (LineParts (foldr fillLP mempty lineparts))
+ (ys:_) -> ys ++ zs
+ fillLP z zs = z : zs
+ let fillMacroArg (MLine lineparts) =
+ MLine (foldr fillLP [] lineparts)
fillMacroArg x = x
return $ RoffTokens . fmap fillMacroArg . unRoffTokens $ ts
@@ -515,8 +508,7 @@ lexStringDef args = do -- string definition
case args of
[] -> fail "No argument to .ds"
(x:ys) -> do
- let ts = singleTok $ MLine $ mconcat
- $ intersperse (singleLinePart $ RoffStr " " ) ys
+ let ts = singleTok $ MLine (intercalate [RoffStr " " ] ys)
let stringName = linePartsToString x
modifyState $ \st ->
st{ customMacros = M.insert stringName ts (customMacros st) }
@@ -546,24 +538,24 @@ lexArgs = do
args <- many $ try oneArg
skipMany spacetab
eofline
- return args
+ return $ map Foldable.toList args
where
- oneArg :: PandocMonad m => RoffLexer m (LineParts)
+ oneArg :: PandocMonad m => RoffLexer m (Seq LinePart)
oneArg = do
skipMany $ try $ string "\\\n" -- continuation line
try quotedArg <|> plainArg
-- try, because there are some erroneous files, e.g. linux/bpf.2
- plainArg :: PandocMonad m => RoffLexer m (LineParts)
+ plainArg :: PandocMonad m => RoffLexer m (Seq LinePart)
plainArg = do
skipMany spacetab
mconcat <$> many1 (macroArg <|> escape <|> regularText <|> unescapedQuote)
where
- unescapedQuote = char '"' >> return (singleLinePart $ RoffStr "\"")
+ unescapedQuote = char '"' >> return (Seq.singleton $ RoffStr "\"")
- quotedArg :: PandocMonad m => RoffLexer m (LineParts)
+ quotedArg :: PandocMonad m => RoffLexer m (Seq LinePart)
quotedArg = do
skipMany spacetab
char '"'
@@ -576,9 +568,9 @@ lexArgs = do
escapedQuote = try $ do
char '"'
char '"'
- return $ singleLinePart $ RoffStr "\""
+ return $ Seq.singleton $ RoffStr "\""
-escStar :: PandocMonad m => RoffLexer m (LineParts)
+escStar :: PandocMonad m => RoffLexer m (Seq LinePart)
escStar = try $ do
pos <- getPosition
c <- anyChar
@@ -599,46 +591,46 @@ escStar = try $ do
resolveString stringname pos = do
RoffTokens ts <- resolveMacro stringname [] pos
case Foldable.toList ts of
- [MLine xs] -> return xs
+ [MLine xs] -> return $ Seq.fromList xs
_ -> do
report $ SkippedContent ("unknown string " ++ stringname) pos
return mempty
lexLine :: PandocMonad m => RoffLexer m RoffTokens
lexLine = do
- lnparts <- mconcat <$> many1 linePart
+ lnparts <- Foldable.toList . mconcat <$> many1 linePart
eofline
- go (unLineParts lnparts)
- where -- return mempty if we only have empty strings;
+ go lnparts
+ where -- return empty line if we only have empty strings;
-- this can happen if the line just contains \f[C], for example.
- go Seq.Empty = return mempty
- go ((RoffStr "") Seq.:<| xs) = go xs
- go xs = return $ singleTok $ MLine (LineParts xs)
+ go [] = return mempty
+ go (RoffStr "" : xs) = go xs
+ go xs = return $ singleTok $ MLine xs
-linePart :: PandocMonad m => RoffLexer m (LineParts)
+linePart :: PandocMonad m => RoffLexer m (Seq LinePart)
linePart = macroArg <|> escape <|>
regularText <|> quoteChar <|> spaceTabChar
-macroArg :: PandocMonad m => RoffLexer m (LineParts)
+macroArg :: PandocMonad m => RoffLexer m (Seq LinePart)
macroArg = try $ do
string "\\\\$"
x <- digit
- return $ singleLinePart $ MacroArg $ ord x - ord '0'
+ return $ Seq.singleton $ MacroArg $ ord x - ord '0'
-regularText :: PandocMonad m => RoffLexer m (LineParts)
+regularText :: PandocMonad m => RoffLexer m (Seq LinePart)
regularText = do
s <- many1 $ noneOf "\n\r\t \\\""
- return $ singleLinePart $ RoffStr s
+ return $ Seq.singleton $ RoffStr s
-quoteChar :: PandocMonad m => RoffLexer m (LineParts)
+quoteChar :: PandocMonad m => RoffLexer m (Seq LinePart)
quoteChar = do
char '"'
- return $ singleLinePart $ RoffStr "\""
+ return $ Seq.singleton $ RoffStr "\""
-spaceTabChar :: PandocMonad m => RoffLexer m (LineParts)
+spaceTabChar :: PandocMonad m => RoffLexer m (Seq LinePart)
spaceTabChar = do
c <- spacetab
- return $ singleLinePart $ RoffStr [c]
+ return $ Seq.singleton $ RoffStr [c]
lexEmptyLine :: PandocMonad m => RoffLexer m RoffTokens
lexEmptyLine = newline >> return (singleTok MEmptyLine)
@@ -646,8 +638,8 @@ lexEmptyLine = newline >> return (singleTok MEmptyLine)
manToken :: PandocMonad m => RoffLexer m RoffTokens
manToken = lexComment <|> lexMacro <|> lexLine <|> lexEmptyLine
-linePartsToString :: LineParts -> String
-linePartsToString = Foldable.foldMap go . unLineParts
+linePartsToString :: [LinePart] -> String
+linePartsToString = mconcat . map go
where
go (RoffStr s) = s
go _ = mempty