aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Parsing.hs
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Parsing.hs')
-rw-r--r--src/Text/Pandoc/Parsing.hs242
1 files changed, 160 insertions, 82 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index a6d3cd46a..cd51bff69 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -7,7 +7,7 @@
, IncoherentInstances #-}
{-
-Copyright (C) 2006-2016 John MacFarlane <jgm@berkeley.edu>
+Copyright (C) 2006-2017 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -26,7 +26,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
{- |
Module : Text.Pandoc.Parsing
- Copyright : Copyright (C) 2006-2016 John MacFarlane
+ Copyright : Copyright (C) 2006-2017 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
@@ -36,6 +36,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
A utility library with parsers used in pandoc readers.
-}
module Text.Pandoc.Parsing ( anyLine,
+ anyLineNewline,
+ indentWith,
many1Till,
notFollowedBy',
oneOfStrings,
@@ -48,6 +50,7 @@ module Text.Pandoc.Parsing ( anyLine,
enclosed,
stringAnyCase,
parseFromString,
+ parseFromString',
lineClump,
charsInBalanced,
romanNumeral,
@@ -66,6 +69,7 @@ module Text.Pandoc.Parsing ( anyLine,
tableWith,
widthsFromIndices,
gridTableWith,
+ gridTableWith',
readWith,
readWithM,
testStringWith,
@@ -82,6 +86,7 @@ module Text.Pandoc.Parsing ( anyLine,
HasMacros (..),
HasLogMessages (..),
HasLastStrPosition (..),
+ HasIncludeFiles (..),
defaultParserState,
HeaderType (..),
ParserContext (..),
@@ -119,6 +124,7 @@ module Text.Pandoc.Parsing ( anyLine,
(<+?>),
extractIdClass,
insertIncludedFile,
+ insertIncludedFileF,
-- * Re-exports from Text.Pandoc.Parsec
Stream,
runParser,
@@ -252,12 +258,28 @@ anyLine = do
return this
_ -> mzero
+-- | Parse any line, include the final newline in the output
+anyLineNewline :: Stream [Char] m Char => ParserT [Char] st m [Char]
+anyLineNewline = (++ "\n") <$> anyLine
+
+-- | Parse indent by specified number of spaces (or equiv. tabs)
+indentWith :: Stream [Char] m Char
+ => HasReaderOptions st
+ => Int -> ParserT [Char] st m [Char]
+indentWith num = do
+ tabStop <- getOption readerTabStop
+ if (num < tabStop)
+ then count num (char ' ')
+ else choice [ try (count num (char ' '))
+ , try (char '\t' >> indentWith (num - tabStop)) ]
+
-- | Like @manyTill@, but reads at least one item.
-many1Till :: Stream s m t
+many1Till :: (Show end, Stream s m t)
=> ParserT s st m a
-> ParserT s st m end
-> ParserT s st m [a]
many1Till p end = do
+ notFollowedBy' end
first <- p
rest <- manyTill p end
return (first:rest)
@@ -322,7 +344,7 @@ blanklines :: Stream s m Char => ParserT s st m [Char]
blanklines = many1 blankline
-- | Parses material enclosed between start and end parsers.
-enclosed :: Stream s m Char => ParserT s st m t -- ^ start parser
+enclosed :: (Show end, Stream s m Char) => ParserT s st m t -- ^ start parser
-> ParserT s st m end -- ^ end parser
-> ParserT s st m a -- ^ content parser (to be used repeatedly)
-> ParserT s st m [a]
@@ -338,7 +360,10 @@ stringAnyCase (x:xs) = do
return (firstChar:rest)
-- | Parse contents of 'str' using 'parser' and return result.
-parseFromString :: Monad m => ParserT String st m a -> String -> ParserT String st m a
+parseFromString :: Monad m
+ => ParserT String st m a
+ -> String
+ -> ParserT String st m a
parseFromString parser str = do
oldPos <- getPosition
oldInput <- getInput
@@ -350,6 +375,18 @@ parseFromString parser str = do
setPosition oldPos
return result
+-- | Like 'parseFromString' but specialized for 'ParserState'.
+-- This resets 'stateLastStrPos', which is almost always what we want.
+parseFromString' :: Monad m
+ => ParserT String ParserState m a
+ -> String
+ -> ParserT String ParserState m a
+parseFromString' parser str = do
+ oldStrPos <- stateLastStrPos <$> getState
+ res <- parseFromString parser str
+ updateState $ \st -> st{ stateLastStrPos = oldStrPos }
+ return res
+
-- | Parse raw line block up to and including blank lines.
lineClump :: Stream [Char] m Char => ParserT [Char] st m String
lineClump = blanklines
@@ -445,33 +482,8 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain)
sepby1 p sep = (:) <$> p <*> (many (try $ sep >> p))
--- Schemes from http://www.iana.org/assignments/uri-schemes.html plus
--- the unofficial schemes coap, doi, javascript, isbn, pmid
-schemes :: [String]
-schemes = ["coap","doi","javascript","aaa","aaas","about","acap","cap","cid",
- "crid","data","dav","dict","dns","file","ftp","geo","go","gopher",
- "h323","http","https","iax","icap","im","imap","info","ipp","iris",
- "iris.beep","iris.xpc","iris.xpcs","iris.lwz","ldap","mailto","mid",
- "msrp","msrps","mtqp","mupdate","news","nfs","ni","nih","nntp",
- "opaquelocktoken","pop","pres","rtsp","service","session","shttp","sieve",
- "sip","sips","sms","snmp","soap.beep","soap.beeps","tag","tel","telnet",
- "tftp","thismessage","tn3270","tip","tv","urn","vemmi","ws","wss","xcon",
- "xcon-userid","xmlrpc.beep","xmlrpc.beeps","xmpp","z39.50r","z39.50s",
- "adiumxtra","afp","afs","aim","apt","attachment","aw","beshare","bitcoin",
- "bolo","callto","chrome","chrome-extension","com-eventbrite-attendee",
- "content", "cvs","dlna-playsingle","dlna-playcontainer","dtn","dvb",
- "ed2k","facetime","feed","finger","fish","gg","git","gizmoproject",
- "gtalk","hcp","icon","ipn","irc","irc6","ircs","itms","jar","jms",
- "keyparc","lastfm","ldaps","magnet","maps","market","message","mms",
- "ms-help","msnim","mumble","mvn","notes","oid","palm","paparazzi",
- "platform","proxy","psyc","query","res","resource","rmi","rsync",
- "rtmp","secondlife","sftp","sgn","skype","smb","soldat","spotify",
- "ssh","steam","svn","teamspeak","things","udp","unreal","ut2004",
- "ventrilo","view-source","webcal","wtai","wyciwyg","xfire","xri",
- "ymsgr", "isbn", "pmid"]
-
uriScheme :: Stream s m Char => ParserT s st m String
-uriScheme = oneOfStringsCI schemes
+uriScheme = oneOfStringsCI (Set.toList schemes)
-- | Parses a URI. Returns pair of original and URI-escaped version.
uri :: Stream [Char] m Char => ParserT [Char] st m (String, String)
@@ -762,21 +774,36 @@ lineBlockLines = try $ do
-- | Parse a table using 'headerParser', 'rowParser',
-- 'lineParser', and 'footerParser'.
-tableWith :: Stream s m Char
- => ParserT s ParserState m ([Blocks], [Alignment], [Int])
- -> ([Int] -> ParserT s ParserState m [Blocks])
- -> ParserT s ParserState m sep
- -> ParserT s ParserState m end
- -> ParserT s ParserState m Blocks
+tableWith :: (Stream s m Char, HasReaderOptions st,
+ Functor mf, Applicative mf, Monad mf)
+ => ParserT s st m (mf [Blocks], [Alignment], [Int])
+ -> ([Int] -> ParserT s st m (mf [Blocks]))
+ -> ParserT s st m sep
+ -> ParserT s st m end
+ -> ParserT s st m (mf Blocks)
tableWith headerParser rowParser lineParser footerParser = try $ do
+ (aligns, widths, heads, rows) <- tableWith' headerParser rowParser
+ lineParser footerParser
+ return $ B.table mempty (zip aligns widths) <$> heads <*> rows
+
+type TableComponents mf = ([Alignment], [Double], mf [Blocks], mf [[Blocks]])
+
+tableWith' :: (Stream s m Char, HasReaderOptions st,
+ Functor mf, Applicative mf, Monad mf)
+ => ParserT s st m (mf [Blocks], [Alignment], [Int])
+ -> ([Int] -> ParserT s st m (mf [Blocks]))
+ -> ParserT s st m sep
+ -> ParserT s st m end
+ -> ParserT s st m (TableComponents mf)
+tableWith' headerParser rowParser lineParser footerParser = try $ do
(heads, aligns, indices) <- headerParser
- lines' <- rowParser indices `sepEndBy1` lineParser
+ lines' <- sequence <$> rowParser indices `sepEndBy1` lineParser
footerParser
numColumns <- getOption readerColumns
let widths = if (indices == [])
then replicate (length aligns) 0.0
else widthsFromIndices numColumns indices
- return $ B.table mempty (zip aligns widths) heads lines'
+ return $ (aligns, widths, heads, lines')
-- Calculate relative widths of table columns, based on indices
widthsFromIndices :: Int -- Number of columns on terminal
@@ -809,25 +836,44 @@ widthsFromIndices numColumns' indices =
-- (which may be grid), then the rows,
-- which may be grid, separated by blank lines, and
-- ending with a footer (dashed line followed by blank line).
-gridTableWith :: Stream [Char] m Char
- => ParserT [Char] ParserState m Blocks -- ^ Block list parser
- -> Bool -- ^ Headerless table
- -> ParserT [Char] ParserState m Blocks
+gridTableWith :: (Stream [Char] m Char, HasReaderOptions st,
+ Functor mf, Applicative mf, Monad mf)
+ => ParserT [Char] st m (mf Blocks) -- ^ Block list parser
+ -> Bool -- ^ Headerless table
+ -> ParserT [Char] st m (mf Blocks)
gridTableWith blocks headless =
tableWith (gridTableHeader headless blocks) (gridTableRow blocks)
(gridTableSep '-') gridTableFooter
+gridTableWith' :: (Stream [Char] m Char, HasReaderOptions st,
+ Functor mf, Applicative mf, Monad mf)
+ => ParserT [Char] st m (mf Blocks) -- ^ Block list parser
+ -> Bool -- ^ Headerless table
+ -> ParserT [Char] st m (TableComponents mf)
+gridTableWith' blocks headless =
+ tableWith' (gridTableHeader headless blocks) (gridTableRow blocks)
+ (gridTableSep '-') gridTableFooter
+
gridTableSplitLine :: [Int] -> String -> [String]
gridTableSplitLine indices line = map removeFinalBar $ tail $
splitStringByIndices (init indices) $ trimr line
-gridPart :: Stream s m Char => Char -> ParserT s st m (Int, Int)
+gridPart :: Stream s m Char => Char -> ParserT s st m ((Int, Int), Alignment)
gridPart ch = do
+ leftColon <- option False (True <$ char ':')
dashes <- many1 (char ch)
+ rightColon <- option False (True <$ char ':')
char '+'
- return (length dashes, length dashes + 1)
-
-gridDashedLines :: Stream s m Char => Char -> ParserT s st m [(Int,Int)]
+ let lengthDashes = length dashes + (if leftColon then 1 else 0) +
+ (if rightColon then 1 else 0)
+ let alignment = case (leftColon, rightColon) of
+ (True, True) -> AlignCenter
+ (True, False) -> AlignLeft
+ (False, True) -> AlignRight
+ (False, False) -> AlignDefault
+ return ((lengthDashes, lengthDashes + 1), alignment)
+
+gridDashedLines :: Stream s m Char => Char -> ParserT s st m [((Int, Int), Alignment)]
gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline
removeFinalBar :: String -> String
@@ -835,14 +881,14 @@ removeFinalBar =
reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse
-- | Separator between rows of grid table.
-gridTableSep :: Stream s m Char => Char -> ParserT s ParserState m Char
+gridTableSep :: Stream s m Char => Char -> ParserT s st m Char
gridTableSep ch = try $ gridDashedLines ch >> return '\n'
-- | Parse header for a grid table.
-gridTableHeader :: Stream [Char] m Char
+gridTableHeader :: (Stream [Char] m Char, Functor mf, Applicative mf, Monad mf)
=> Bool -- ^ Headerless table
- -> ParserT [Char] ParserState m Blocks
- -> ParserT [Char] ParserState m ([Blocks], [Alignment], [Int])
+ -> ParserT [Char] st m (mf Blocks)
+ -> ParserT [Char] st m (mf [Blocks], [Alignment], [Int])
gridTableHeader headless blocks = try $ do
optional blanklines
dashes <- gridDashedLines '-'
@@ -851,36 +897,40 @@ gridTableHeader headless blocks = try $ do
else many1
(notFollowedBy (gridTableSep '=') >> char '|' >>
many1Till anyChar newline)
- if headless
- then return ()
- else gridTableSep '=' >> return ()
- let lines' = map snd dashes
+ underDashes <- if headless
+ then return dashes
+ else gridDashedLines '='
+ guard $ length dashes == length underDashes
+ let lines' = map (snd . fst) underDashes
let indices = scanl (+) 0 lines'
- let aligns = replicate (length lines') AlignDefault
- -- RST does not have a notion of alignments
+ let aligns = map snd underDashes
let rawHeads = if headless
- then replicate (length dashes) ""
- else map (intercalate " ") $ transpose
+ then replicate (length underDashes) ""
+ else map (unlines . map trim) $ transpose
$ map (gridTableSplitLine indices) rawContent
- heads <- mapM (parseFromString blocks) $ map trim rawHeads
+ heads <- fmap sequence $ mapM (parseFromString blocks . trim) rawHeads
return (heads, aligns, indices)
-gridTableRawLine :: Stream s m Char => [Int] -> ParserT s ParserState m [String]
+gridTableRawLine :: Stream s m Char => [Int] -> ParserT s st m [String]
gridTableRawLine indices = do
char '|'
line <- many1Till anyChar newline
return (gridTableSplitLine indices line)
-- | Parse row of grid table.
-gridTableRow :: Stream [Char] m Char
- => ParserT [Char] ParserState m Blocks
+gridTableRow :: (Stream [Char] m Char, Functor mf, Applicative mf, Monad mf)
+ => ParserT [Char] st m (mf Blocks)
-> [Int]
- -> ParserT [Char] ParserState m [Blocks]
+ -> ParserT [Char] st m (mf [Blocks])
gridTableRow blocks indices = do
colLines <- many1 (gridTableRawLine indices)
let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $
transpose colLines
- mapM (liftM compactifyCell . parseFromString blocks) cols
+ compactifyCell bs = case compactify [bs] of
+ [] -> mempty
+ x:_ -> x
+ cells <- sequence <$> mapM (parseFromString blocks) cols
+ return $ fmap (map compactifyCell) cells
removeOneLeadingSpace :: [String] -> [String]
removeOneLeadingSpace xs =
@@ -890,11 +940,8 @@ removeOneLeadingSpace xs =
where startsWithSpace "" = True
startsWithSpace (y:_) = y == ' '
-compactifyCell :: Blocks -> Blocks
-compactifyCell bs = head $ compactify [bs]
-
-- | Parse footer for a grid table.
-gridTableFooter :: Stream s m Char => ParserT s ParserState m [Char]
+gridTableFooter :: Stream s m Char => ParserT s st m [Char]
gridTableFooter = blanklines
---
@@ -937,6 +984,7 @@ data ParserState = ParserState
stateSubstitutions :: SubstTable, -- ^ List of substitution references
stateNotes :: NoteTable, -- ^ List of notes (raw bodies)
stateNotes' :: NoteTable', -- ^ List of notes (parsed bodies)
+ stateNoteRefs :: Set.Set String, -- ^ List of note references used
stateMeta :: Meta, -- ^ Document metadata
stateMeta' :: F Meta, -- ^ Document metadata
stateCitations :: M.Map String String, -- ^ RST-style citations
@@ -972,6 +1020,9 @@ class HasReaderOptions st where
-- default
getOption f = (f . extractReaderOptions) <$> getState
+instance HasReaderOptions ParserState where
+ extractReaderOptions = stateOptions
+
class HasQuoteContext st m where
getQuoteContext :: (Stream s m t) => ParsecT s st m QuoteContext
withQuoteContext :: QuoteContext -> ParsecT s st m a -> ParsecT s st m a
@@ -987,9 +1038,6 @@ instance Monad m => HasQuoteContext ParserState m where
setState newState { stateQuoteContext = oldQuoteContext }
return result
-instance HasReaderOptions ParserState where
- extractReaderOptions = stateOptions
-
class HasHeaderMap st where
extractHeaderMap :: st -> M.Map Inlines String
updateHeaderMap :: (M.Map Inlines String -> M.Map Inlines String) ->
@@ -1031,6 +1079,16 @@ instance HasLogMessages ParserState where
addLogMessage msg st = st{ stateLogMessages = msg : stateLogMessages st }
getLogMessages st = reverse $ stateLogMessages st
+class HasIncludeFiles st where
+ getIncludeFiles :: st -> [String]
+ addIncludeFile :: String -> st -> st
+ dropLatestIncludeFile :: st -> st
+
+instance HasIncludeFiles ParserState where
+ getIncludeFiles = stateContainers
+ addIncludeFile f s = s{ stateContainers = f : stateContainers s }
+ dropLatestIncludeFile s = s { stateContainers = drop 1 $ stateContainers s }
+
defaultParserState :: ParserState
defaultParserState =
ParserState { stateOptions = def,
@@ -1043,7 +1101,8 @@ defaultParserState =
stateHeaderKeys = M.empty,
stateSubstitutions = M.empty,
stateNotes = [],
- stateNotes' = [],
+ stateNotes' = M.empty,
+ stateNoteRefs = Set.empty,
stateMeta = nullMeta,
stateMeta' = return nullMeta,
stateCitations = M.empty,
@@ -1110,7 +1169,8 @@ data QuoteContext
type NoteTable = [(String, String)]
-type NoteTable' = [(String, F Blocks)] -- used in markdown reader
+type NoteTable' = M.Map String (SourcePos, F Blocks)
+-- used in markdown reader
newtype Key = Key String deriving (Show, Read, Eq, Ord)
@@ -1322,17 +1382,18 @@ extractIdClass (ident, cls, kvs) = (ident', cls', kvs')
Nothing -> cls
kvs' = filter (\(k,_) -> k /= "id" || k /= "class") kvs
-insertIncludedFile :: PandocMonad m
- => ParserT String ParserState m Blocks
- -> [FilePath] -> FilePath
- -> ParserT String ParserState m Blocks
-insertIncludedFile blocks dirs f = do
+insertIncludedFile' :: (PandocMonad m, HasIncludeFiles st,
+ Functor mf, Applicative mf, Monad mf)
+ => ParserT String st m (mf Blocks)
+ -> [FilePath] -> FilePath
+ -> ParserT String st m (mf Blocks)
+insertIncludedFile' blocks dirs f = do
oldPos <- getPosition
oldInput <- getInput
- containers <- stateContainers <$> getState
+ containers <- getIncludeFiles <$> getState
when (f `elem` containers) $
throwError $ PandocParseError $ "Include file loop at " ++ show oldPos
- updateState $ \s -> s{ stateContainers = f : stateContainers s }
+ updateState $ addIncludeFile f
mbcontents <- readFileFromDirs dirs f
contents <- case mbcontents of
Just s -> return s
@@ -1344,5 +1405,22 @@ insertIncludedFile blocks dirs f = do
bs <- blocks
setInput oldInput
setPosition oldPos
- updateState $ \s -> s{ stateContainers = tail $ stateContainers s }
+ updateState dropLatestIncludeFile
return bs
+
+-- | Parse content of include file as blocks. Circular includes result in an
+-- @PandocParseError@.
+insertIncludedFile :: (PandocMonad m, HasIncludeFiles st)
+ => ParserT String st m Blocks
+ -> [FilePath] -> FilePath
+ -> ParserT String st m Blocks
+insertIncludedFile blocks dirs f =
+ runIdentity <$> insertIncludedFile' (Identity <$> blocks) dirs f
+
+-- | Parse content of include file as future blocks. Circular includes result in
+-- an @PandocParseError@.
+insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st)
+ => ParserT String st m (Future st Blocks)
+ -> [FilePath] -> FilePath
+ -> ParserT String st m (Future st Blocks)
+insertIncludedFileF = insertIncludedFile'