diff options
Diffstat (limited to 'src/Text/Pandoc/Parsing.hs')
-rw-r--r-- | src/Text/Pandoc/Parsing.hs | 669 |
1 files changed, 405 insertions, 264 deletions
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 4bae8942b..09445622d 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -5,11 +5,10 @@ {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE IncoherentInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} -{-# LANGUAGE ViewPatterns #-} {-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Parsing - Copyright : Copyright (C) 2006-2020 John MacFarlane + Copyright : Copyright (C) 2006-2021 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane <jgm@berkeley.edu> @@ -19,8 +18,7 @@ A utility library with parsers used in pandoc readers. -} -module Text.Pandoc.Parsing ( take1WhileP, - takeP, +module Text.Pandoc.Parsing ( module Text.Pandoc.Sources, countChar, textStr, anyLine, @@ -105,14 +103,14 @@ module Text.Pandoc.Parsing ( take1WhileP, singleQuoteEnd, doubleQuoteStart, doubleQuoteEnd, - ellipses, apostrophe, + doubleCloseQuote, + ellipses, dash, nested, citeKey, Parser, ParserT, - F, Future(..), runF, askF, @@ -123,7 +121,6 @@ module Text.Pandoc.Parsing ( take1WhileP, (<+?>), extractIdClass, insertIncludedFile, - insertIncludedFileF, -- * Re-exports from Text.Parsec Stream, runParser, @@ -134,22 +131,10 @@ module Text.Pandoc.Parsing ( take1WhileP, getInput, setInput, unexpected, - char, - letter, - digit, - alphaNum, skipMany, skipMany1, - spaces, - space, - anyChar, - satisfy, - newline, - string, count, eof, - noneOf, - oneOf, lookAhead, notFollowedBy, many, @@ -174,6 +159,8 @@ module Text.Pandoc.Parsing ( take1WhileP, SourcePos, getPosition, setPosition, + sourceName, + setSourceName, sourceColumn, sourceLine, setSourceColumn, @@ -189,48 +176,141 @@ module Text.Pandoc.Parsing ( take1WhileP, where import Control.Monad.Identity + ( guard, + join, + unless, + when, + void, + liftM2, + liftM, + Identity(..), + MonadPlus(mzero) ) import Control.Monad.Reader -import Data.Char (chr, isAlphaNum, isAscii, isAsciiUpper, - isPunctuation, isSpace, ord, toLower, toUpper) -import Data.Default + ( asks, runReader, MonadReader(ask), Reader, ReaderT(ReaderT) ) +import Data.Char (chr, isAlphaNum, isAscii, isAsciiUpper, isAsciiLower, + isSpace, ord, toLower, toUpper) +import Data.Default ( Default(..) ) import Data.Functor (($>)) import Data.List (intercalate, transpose) import qualified Data.Map as M -import Data.Maybe (mapMaybe, fromMaybe) +import Data.Maybe (fromMaybe) import qualified Data.Set as Set -import Data.String import Data.Text (Text) import qualified Data.Text as T import Text.HTML.TagSoup.Entity (lookupEntity) -import Text.Pandoc.Asciify (toAsciiChar) +import Text.Pandoc.Asciify (toAsciiText) import Text.Pandoc.Builder (Blocks, HasMeta (..), Inlines, trimInlines) import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class.PandocMonad (PandocMonad, readFileFromDirs, report) import Text.Pandoc.Definition + ( Target, + nullMeta, + nullAttr, + Meta, + ColWidth(ColWidthDefault, ColWidth), + TableFoot(TableFoot), + TableBody(TableBody), + Attr, + TableHead(TableHead), + Row(..), + Alignment(..), + Inline(Str), + ListNumberDelim(..), + ListAttributes, + ListNumberStyle(..) ) import Text.Pandoc.Logging + ( LogMessage(CouldNotLoadIncludeFile, DuplicateIdentifier) ) import Text.Pandoc.Options + ( extensionEnabled, + Extension(Ext_old_dashes, Ext_tex_math_dollars, + Ext_tex_math_single_backslash, Ext_tex_math_double_backslash, + Ext_auto_identifiers, Ext_ascii_identifiers, Ext_smart), + ReaderOptions(readerTabStop, readerColumns, readerExtensions) ) import Text.Pandoc.Readers.LaTeX.Types (Macro) import Text.Pandoc.Shared + ( uniqueIdent, + tshow, + mapLeft, + compactify, + trim, + trimr, + splitTextByIndices, + safeRead, + trimMath, + schemes, + escapeURI ) +import Text.Pandoc.Sources import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn) import Text.Pandoc.XML (fromEntities) -import Text.Parsec hiding (token) -import Text.Parsec.Pos (initialPos, newPos, updatePosString) - -import Control.Monad.Except +import Text.Parsec + ( between, + setSourceName, + Parsec, + Column, + Line, + incSourceLine, + incSourceColumn, + setSourceLine, + setSourceColumn, + sourceLine, + sourceColumn, + sourceName, + setSourceName, + setPosition, + getPosition, + updateState, + setState, + getState, + optionMaybe, + optional, + option, + endBy1, + endBy, + sepEndBy1, + sepEndBy, + sepBy1, + sepBy, + try, + choice, + (<?>), + (<|>), + manyTill, + many1, + many, + notFollowedBy, + lookAhead, + eof, + count, + skipMany1, + skipMany, + unexpected, + setInput, + getInput, + anyToken, + tokenPrim, + parse, + runParserT, + runParser, + ParseError, + ParsecT, + SourcePos, + Stream(..) ) +import Text.Parsec.Pos (initialPos, newPos) +import Control.Monad.Except ( MonadError(throwError) ) import Text.Pandoc.Error + ( PandocError(PandocParseError, PandocParsecError) ) type Parser t s = Parsec t s type ParserT = ParsecT + -- | Reader monad wrapping the parser state. This is used to possibly delay -- evaluation until all relevant information has been parsed and made available -- in the parser state. newtype Future s a = Future { runDelayed :: Reader s a } deriving (Monad, Applicative, Functor) -type F = Future ParserState - runF :: Future s a -> s -> a runF = runReader . runDelayed @@ -253,70 +333,48 @@ instance (Semigroup a, Monoid a) => Monoid (Future s a) where mappend = (<>) -- | Like @count@, but packs its result -countChar :: (Stream s m Char, Monad m) +countChar :: (Stream s m Char, UpdateSourcePos s Char, Monad m) => Int -> ParsecT s st m Char -> ParsecT s st m Text countChar n = fmap T.pack . count n -- | Like @string@, but uses @Text@. -textStr :: Stream s m Char => Text -> ParsecT s u m Text +textStr :: (Stream s m Char, UpdateSourcePos s Char) + => Text -> ParsecT s u m Text textStr t = string (T.unpack t) $> t --- | Parse characters while a predicate is true. -take1WhileP :: Monad m - => (Char -> Bool) - -> ParserT Text st m Text -take1WhileP f = do - -- needed to persuade parsec that this won't match an empty string: - c <- satisfy f - inp <- getInput - pos <- getPosition - let (t, rest) = T.span f inp - setInput rest - setPosition $ - if f '\t' || f '\n' - then updatePosString pos $ T.unpack t - else incSourceColumn pos (T.length t) - return $ T.singleton c <> t - --- Parse n characters of input (or the rest of the input if --- there aren't n characters). -takeP :: Monad m => Int -> ParserT Text st m Text -takeP n = do - guard (n > 0) - -- faster than 'count n anyChar' - inp <- getInput - pos <- getPosition - let (xs, rest) = T.splitAt n inp - -- needed to persuade parsec that this won't match an empty string: - anyChar - setInput rest - setPosition $ updatePosString pos $ T.unpack xs - return xs - --- | Parse any line of text -anyLine :: Monad m => ParserT Text st m Text + +-- | Parse any line of text, returning the contents without the +-- final newline. +anyLine :: Monad m => ParserT Sources st m Text anyLine = do -- This is much faster than: -- manyTill anyChar newline inp <- getInput - pos <- getPosition - case T.break (=='\n') inp of - (this, T.uncons -> Just ('\n', rest)) -> do - -- needed to persuade parsec that this won't match an empty string: - anyChar - setInput rest - setPosition $ incSourceLine (setSourceColumn pos 1) 1 - return this - _ -> mzero + case inp of + Sources [] -> mzero + Sources ((fp,t):inps) -> + -- we assume that lines don't span different input files + case T.break (=='\n') t of + (this, rest) + | T.null rest + , not (null inps) -> + -- line may span different input files, so do it + -- character by character + T.pack <$> manyTill anyChar newline + | otherwise -> do -- either end of inputs or newline in rest + setInput $ Sources ((fp, rest):inps) + char '\n' -- needed so parsec knows we won't match empty string + -- and so source pos is updated + return this -- | Parse any line, include the final newline in the output -anyLineNewline :: Monad m => ParserT Text st m Text +anyLineNewline :: Monad m => ParserT Sources st m Text anyLineNewline = (<> "\n") <$> anyLine -- | Parse indent by specified number of spaces (or equiv. tabs) -indentWith :: Stream s m Char +indentWith :: (Stream s m Char, UpdateSourcePos s Char) => HasReaderOptions st => Int -> ParserT s st m Text indentWith num = do @@ -401,11 +459,13 @@ notFollowedBy' p = try $ join $ do a <- try p return (return ()) -- (This version due to Andrew Pimlott on the Haskell mailing list.) -oneOfStrings' :: Stream s m Char => (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text +oneOfStrings' :: (Stream s m Char, UpdateSourcePos s Char) + => (Char -> Char -> Bool) -> [Text] -> ParserT s st m Text oneOfStrings' f = fmap T.pack . oneOfStrings'' f . fmap T.unpack -- TODO: This should be re-implemented in a Text-aware way -oneOfStrings'' :: Stream s m Char => (Char -> Char -> Bool) -> [String] -> ParserT s st m String +oneOfStrings'' :: (Stream s m Char, UpdateSourcePos s Char) + => (Char -> Char -> Bool) -> [String] -> ParserT s st m String oneOfStrings'' _ [] = Prelude.fail "no strings" oneOfStrings'' matches strs = try $ do c <- anyChar @@ -420,14 +480,16 @@ oneOfStrings'' matches strs = try $ do -- | Parses one of a list of strings. If the list contains -- two strings one of which is a prefix of the other, the longer -- string will be matched if possible. -oneOfStrings :: Stream s m Char => [Text] -> ParserT s st m Text +oneOfStrings :: (Stream s m Char, UpdateSourcePos s Char) + => [Text] -> ParserT s st m Text oneOfStrings = oneOfStrings' (==) -- | Parses one of a list of strings (tried in order), case insensitive. -- TODO: This will not be accurate with general Unicode (neither -- Text.toLower nor Text.toCaseFold can be implemented with a map) -oneOfStringsCI :: Stream s m Char => [Text] -> ParserT s st m Text +oneOfStringsCI :: (Stream s m Char, UpdateSourcePos s Char) + => [Text] -> ParserT s st m Text oneOfStringsCI = oneOfStrings' ciMatch where ciMatch x y = toLower' x == toLower' y -- this optimizes toLower by checking common ASCII case @@ -438,29 +500,41 @@ oneOfStringsCI = oneOfStrings' ciMatch | otherwise = toLower c -- | Parses a space or tab. -spaceChar :: Stream s m Char => ParserT s st m Char +spaceChar :: (Stream s m Char, UpdateSourcePos s Char) + => ParserT s st m Char spaceChar = satisfy $ \c -> c == ' ' || c == '\t' -- | Parses a nonspace, nonnewline character. -nonspaceChar :: Stream s m Char => ParserT s st m Char -nonspaceChar = noneOf ['\t', '\n', ' ', '\r'] +nonspaceChar :: (Stream s m Char, UpdateSourcePos s Char) + => ParserT s st m Char +nonspaceChar = satisfy (not . isSpaceChar) + +isSpaceChar :: Char -> Bool +isSpaceChar ' ' = True +isSpaceChar '\t' = True +isSpaceChar '\n' = True +isSpaceChar '\r' = True +isSpaceChar _ = False -- | Skips zero or more spaces or tabs. -skipSpaces :: Stream s m Char => ParserT s st m () +skipSpaces :: (Stream s m Char, UpdateSourcePos s Char) + => ParserT s st m () skipSpaces = skipMany spaceChar -- | Skips zero or more spaces or tabs, then reads a newline. -blankline :: Stream s m Char => ParserT s st m Char +blankline :: (Stream s m Char, UpdateSourcePos s Char) + => ParserT s st m Char blankline = try $ skipSpaces >> newline -- | Parses one or more blank lines and returns a string of newlines. -blanklines :: Stream s m Char => ParserT s st m Text +blanklines :: (Stream s m Char, UpdateSourcePos s Char) + => ParserT s st m Text blanklines = T.pack <$> many1 blankline -- | Gobble n spaces; if tabs are encountered, expand them -- and gobble some or all of their spaces, leaving the rest. gobbleSpaces :: (HasReaderOptions st, Monad m) - => Int -> ParserT Text st m () + => Int -> ParserT Sources st m () gobbleSpaces 0 = return () gobbleSpaces n | n < 0 = error "gobbleSpaces called with negative number" @@ -468,18 +542,26 @@ gobbleSpaces n char ' ' <|> eatOneSpaceOfTab gobbleSpaces (n - 1) -eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Text st m Char +eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT Sources st m Char eatOneSpaceOfTab = do - char '\t' + lookAhead (char '\t') + pos <- getPosition tabstop <- getOption readerTabStop + -- replace the tab on the input stream with spaces + let numSpaces = tabstop - ((sourceColumn pos - 1) `mod` tabstop) inp <- getInput - setInput $ T.replicate (tabstop - 1) " " <> inp - return ' ' + setInput $ + case inp of + Sources [] -> error "eatOneSpaceOfTab - empty Sources list" + Sources ((fp,t):rest) -> + -- drop the tab and add spaces + Sources ((fp, T.replicate numSpaces " " <> T.drop 1 t):rest) + char ' ' -- | Gobble up to n spaces; if tabs are encountered, expand them -- and gobble some or all of their spaces, leaving the rest. gobbleAtMostSpaces :: (HasReaderOptions st, Monad m) - => Int -> ParserT Text st m Int + => Int -> ParserT Sources st m Int gobbleAtMostSpaces 0 = return 0 gobbleAtMostSpaces n | n < 0 = error "gobbleAtMostSpaces called with negative number" @@ -488,7 +570,8 @@ gobbleAtMostSpaces n (+ 1) <$> gobbleAtMostSpaces (n - 1) -- | Parses material enclosed between start and end parsers. -enclosed :: (Show end, Stream s m Char) => ParserT s st m t -- ^ start parser +enclosed :: (Show end, Stream s m Char, UpdateSourcePos s Char) + => ParserT s st m t -- ^ start parser -> ParserT s st m end -- ^ end parser -> ParserT s st m a -- ^ content parser (to be used repeatedly) -> ParserT s st m [a] @@ -496,39 +579,41 @@ enclosed start end parser = try $ start >> notFollowedBy space >> many1Till parser end -- | Parse string, case insensitive. -stringAnyCase :: Stream s m Char => Text -> ParserT s st m Text +stringAnyCase :: (Stream s m Char, UpdateSourcePos s Char) + => Text -> ParserT s st m Text stringAnyCase = fmap T.pack . stringAnyCase' . T.unpack -stringAnyCase' :: Stream s m Char => String -> ParserT s st m String +stringAnyCase' :: (Stream s m Char, UpdateSourcePos s Char) + => String -> ParserT s st m String stringAnyCase' [] = string "" stringAnyCase' (x:xs) = do firstChar <- char (toUpper x) <|> char (toLower x) rest <- stringAnyCase' xs return (firstChar:rest) +-- TODO rewrite by just adding to Sources stream? -- | Parse contents of 'str' using 'parser' and return result. -parseFromString :: (Stream s m Char, IsString s) - => ParserT s st m r +parseFromString :: Monad m + => ParserT Sources st m r -> Text - -> ParserT s st m r + -> ParserT Sources st m r parseFromString parser str = do oldPos <- getPosition setPosition $ initialPos "chunk" oldInput <- getInput - setInput $ fromString $ T.unpack str + setInput $ toSources str result <- parser spaces - eof setInput oldInput setPosition oldPos return result -- | Like 'parseFromString' but specialized for 'ParserState'. -- This resets 'stateLastStrPos', which is almost always what we want. -parseFromString' :: (Stream s m Char, IsString s, HasLastStrPosition u) - => ParserT s u m a +parseFromString' :: (Monad m, HasLastStrPosition u) + => ParserT Sources u m a -> Text - -> ParserT s u m a + -> ParserT Sources u m a parseFromString' parser str = do oldLastStrPos <- getLastStrPos <$> getState updateState $ setLastStrPos Nothing @@ -537,7 +622,7 @@ parseFromString' parser str = do return res -- | Parse raw line block up to and including blank lines. -lineClump :: Monad m => ParserT Text st m Text +lineClump :: Monad m => ParserT Sources st m Text lineClump = blanklines <|> (T.unlines <$> many1 (notFollowedBy blankline >> anyLine)) @@ -546,7 +631,7 @@ lineClump = blanklines -- pairs of open and close, which must be different. For example, -- @charsInBalanced '(' ')' anyChar@ will parse "(hello (there))" -- and return "hello (there)". -charsInBalanced :: Stream s m Char => Char -> Char -> ParserT s st m Char +charsInBalanced :: (Stream s m Char, UpdateSourcePos s Char) => Char -> Char -> ParserT s st m Char -> ParserT s st m Text charsInBalanced open close parser = try $ do char open @@ -565,7 +650,7 @@ charsInBalanced open close parser = try $ do -- Auxiliary functions for romanNumeral: -- | Parses a roman numeral (uppercase or lowercase), returns number. -romanNumeral :: Stream s m Char => Bool -- ^ Uppercase if true +romanNumeral :: (Stream s m Char, UpdateSourcePos s Char) => Bool -- ^ Uppercase if true -> ParserT s st m Int romanNumeral upperCase = do let rchar uc = char $ if upperCase then uc else toLower uc @@ -601,20 +686,19 @@ romanNumeral upperCase = do -- | Parses an email address; returns original and corresponding -- escaped mailto: URI. -emailAddress :: Stream s m Char => ParserT s st m (Text, Text) +emailAddress :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (Text, Text) emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain) where toResult mbox dom = let full = fromEntities $ T.pack $ mbox ++ '@':dom in (full, escapeURI $ "mailto:" <> full) mailbox = intercalate "." <$> (emailWord `sepBy1'` dot) domain = intercalate "." <$> (subdomain `sepBy1'` dot) dot = char '.' - subdomain = many1 $ alphaNum <|> innerPunct + subdomain = many1 $ alphaNum <|> innerPunct (=='-') -- this excludes some valid email addresses, since an -- email could contain e.g. '__', but gives better results -- for our purposes, when combined with markdown parsing: - innerPunct = try (satisfy (\c -> isEmailPunct c || c == '@') - <* notFollowedBy space - <* notFollowedBy (satisfy isPunctuation)) + innerPunct f = try (satisfy f + <* notFollowedBy (satisfy (not . isAlphaNum))) -- technically an email address could begin with a symbol, -- but allowing this creates too many problems. -- See e.g. https://github.com/jgm/pandoc/issues/2940 @@ -625,16 +709,16 @@ emailAddress = try $ toResult <$> mailbox <*> (char '@' *> domain) isEmailPunct c = T.any (== c) "!\"#$%&'*+-/=?^_{|}~;" -uriScheme :: Stream s m Char => ParserT s st m Text +uriScheme :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Text uriScheme = oneOfStringsCI (Set.toList schemes) -- | Parses a URI. Returns pair of original and URI-escaped version. -uri :: Stream s m Char => ParserT s st m (Text, Text) +uri :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (Text, Text) uri = try $ do scheme <- uriScheme char ':' -- Avoid parsing e.g. "**Notes:**" as a raw URI: - notFollowedBy (oneOf "*_]") + notFollowedBy $ satisfy (\c -> c == '*' || c == '_' || c == ']') -- We allow sentence punctuation except at the end, since -- we don't want the trailing '.' in 'http://google.com.' We want to allow -- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation) @@ -648,7 +732,20 @@ uri = try $ do let uri' = scheme <> ":" <> fromEntities str' return (uri', escapeURI uri') where - wordChar = alphaNum <|> oneOf "#$%+/@\\_-&=" + isWordChar '#' = True + isWordChar '$' = True + isWordChar '%' = True + isWordChar '+' = True + isWordChar '/' = True + isWordChar '@' = True + isWordChar '\\' = True + isWordChar '_' = True + isWordChar '-' = True + isWordChar '&' = True + isWordChar '=' = True + isWordChar c = isAlphaNum c + + wordChar = satisfy isWordChar percentEscaped = try $ (:) <$> char '%' <*> many1 hexDigit entity = try $ pure <$> characterReference punct = try $ many1 (char ',') <|> fmap pure (satisfy (\c -> not (isSpace c) && c /= '<' && c /= '>')) @@ -659,11 +756,13 @@ uri = try $ do uriChunkBetween l r = try $ do chunk <- between (char l) (char r) uriChunk return (T.pack $ [l] ++ chunk ++ [r]) -mathInlineWith :: Stream s m Char => Text -> Text -> ParserT s st m Text +mathInlineWith :: (Stream s m Char, UpdateSourcePos s Char) => Text -> Text -> ParserT s st m Text mathInlineWith op cl = try $ do textStr op when (op == "$") $ notFollowedBy space - words' <- many1Till (countChar 1 (noneOf " \t\n\\") + words' <- many1Till ( + (T.singleton <$> + satisfy (\c -> not (isSpaceChar c || c == '\\'))) <|> (char '\\' >> -- This next clause is needed because \text{..} can -- contain $, \(\), etc. @@ -671,17 +770,17 @@ mathInlineWith op cl = try $ do (("\\text" <>) <$> inBalancedBraces 0 "")) <|> (\c -> T.pack ['\\',c]) <$> anyChar)) <|> do (blankline <* notFollowedBy' blankline) <|> - (oneOf " \t" <* skipMany (oneOf " \t")) + (spaceChar <* skipMany spaceChar) notFollowedBy (char '$') return " " ) (try $ textStr cl) notFollowedBy digit -- to prevent capture of $5 return $ trimMath $ T.concat words' where - inBalancedBraces :: Stream s m Char => Int -> Text -> ParserT s st m Text + inBalancedBraces :: (Stream s m Char, UpdateSourcePos s Char) => Int -> Text -> ParserT s st m Text inBalancedBraces n = fmap T.pack . inBalancedBraces' n . T.unpack - inBalancedBraces' :: Stream s m Char => Int -> String -> ParserT s st m String + inBalancedBraces' :: (Stream s m Char, UpdateSourcePos s Char) => Int -> String -> ParserT s st m String inBalancedBraces' 0 "" = do c <- anyChar if c == '{' @@ -698,12 +797,13 @@ mathInlineWith op cl = try $ do '{' -> inBalancedBraces' (numOpen + 1) (c:xs) _ -> inBalancedBraces' numOpen (c:xs) -mathDisplayWith :: Stream s m Char => Text -> Text -> ParserT s st m Text +mathDisplayWith :: (Stream s m Char, UpdateSourcePos s Char) => Text -> Text -> ParserT s st m Text mathDisplayWith op cl = try $ fmap T.pack $ do textStr op - many1Till (noneOf "\n" <|> (newline <* notFollowedBy' blankline)) (try $ textStr cl) + many1Till (satisfy (/= '\n') <|> (newline <* notFollowedBy' blankline)) + (try $ textStr cl) -mathDisplay :: (HasReaderOptions st, Stream s m Char) +mathDisplay :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Text mathDisplay = (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$") @@ -712,7 +812,7 @@ mathDisplay = <|> (guardEnabled Ext_tex_math_double_backslash >> mathDisplayWith "\\\\[" "\\\\]") -mathInline :: (HasReaderOptions st , Stream s m Char) +mathInline :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Text mathInline = (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$") @@ -725,7 +825,7 @@ mathInline = -- displacement (the difference between the source column at the end -- and the source column at the beginning). Vertical displacement -- (source row) is ignored. -withHorizDisplacement :: Stream s m Char +withHorizDisplacement :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m a -- ^ Parser to apply -> ParserT s st m (a, Int) -- ^ (result, displacement) withHorizDisplacement parser = do @@ -737,30 +837,37 @@ withHorizDisplacement parser = do -- | Applies a parser and returns the raw string that was parsed, -- along with the value produced by the parser. withRaw :: Monad m - => ParsecT Text st m a - -> ParsecT Text st m (a, Text) + => ParsecT Sources st m a + -> ParsecT Sources st m (a, Text) withRaw parser = do - pos1 <- getPosition - inp <- getInput + inps1 <- getInput result <- parser - pos2 <- getPosition - let (l1,c1) = (sourceLine pos1, sourceColumn pos1) - let (l2,c2) = (sourceLine pos2, sourceColumn pos2) - let inplines = take ((l2 - l1) + 1) $ T.lines inp - let raw = case inplines of - [] -> "" - [l] -> T.take (c2 - c1) l - ls -> T.unlines (init ls) <> T.take (c2 - 1) (last ls) - return (result, raw) + inps2 <- getInput + -- 'raw' is the difference between inps1 and inps2 + return (result, sourcesDifference inps1 inps2) + +sourcesDifference :: Sources -> Sources -> Text +sourcesDifference (Sources is1) (Sources is2) = go is1 is2 + where + go inps1 inps2 = + case (inps1, inps2) of + ([], _) -> mempty + (_, []) -> mconcat $ map snd inps1 + ((p1,t1):rest1, (p2, t2):rest2) + | p1 == p2 + , t1 == t2 -> go rest1 rest2 + | p1 == p2 + , t1 /= t2 -> fromMaybe mempty $ T.stripSuffix t2 t1 + | otherwise -> t1 <> go rest1 inps2 -- | Parses backslash, then applies character parser. -escaped :: Stream s m Char +escaped :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char -- ^ Parser for character to escape -> ParserT s st m Char escaped parser = try $ char '\\' >> parser -- | Parse character entity. -characterReference :: Stream s m Char => ParserT s st m Char +characterReference :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char characterReference = try $ do char '&' ent <- many1Till nonspaceChar (char ';') @@ -773,19 +880,19 @@ characterReference = try $ do _ -> Prelude.fail "entity not found" -- | Parses an uppercase roman numeral and returns (UpperRoman, number). -upperRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) +upperRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) upperRoman = do num <- romanNumeral True return (UpperRoman, num) -- | Parses a lowercase roman numeral and returns (LowerRoman, number). -lowerRoman :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) +lowerRoman :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) lowerRoman = do num <- romanNumeral False return (LowerRoman, num) -- | Parses a decimal numeral and returns (Decimal, number). -decimal :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) +decimal :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) decimal = do num <- many1 digit return (Decimal, fromMaybe 1 $ safeRead $ T.pack num) @@ -794,7 +901,7 @@ decimal = do -- returns (DefaultStyle, [next example number]). The next -- example number is incremented in parser state, and the label -- (if present) is added to the label table. -exampleNum :: Stream s m Char +exampleNum :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s ParserState m (ListNumberStyle, Int) exampleNum = do char '@' @@ -813,37 +920,37 @@ exampleNum = do return (Example, num) -- | Parses a '#' returns (DefaultStyle, 1). -defaultNum :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) +defaultNum :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) defaultNum = do char '#' return (DefaultStyle, 1) -- | Parses a lowercase letter and returns (LowerAlpha, number). -lowerAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) +lowerAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) lowerAlpha = do - ch <- oneOf ['a'..'z'] + ch <- satisfy isAsciiLower return (LowerAlpha, ord ch - ord 'a' + 1) -- | Parses an uppercase letter and returns (UpperAlpha, number). -upperAlpha :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) +upperAlpha :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) upperAlpha = do - ch <- oneOf ['A'..'Z'] + ch <- satisfy isAsciiUpper return (UpperAlpha, ord ch - ord 'A' + 1) -- | Parses a roman numeral i or I -romanOne :: Stream s m Char => ParserT s st m (ListNumberStyle, Int) +romanOne :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) romanOne = (char 'i' >> return (LowerRoman, 1)) <|> (char 'I' >> return (UpperRoman, 1)) -- | Parses an ordered list marker and returns list attributes. -anyOrderedListMarker :: Stream s m Char => ParserT s ParserState m ListAttributes +anyOrderedListMarker :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s ParserState m ListAttributes anyOrderedListMarker = choice [delimParser numParser | delimParser <- [inPeriod, inOneParen, inTwoParens], numParser <- [decimal, exampleNum, defaultNum, romanOne, lowerAlpha, lowerRoman, upperAlpha, upperRoman]] -- | Parses a list number (num) followed by a period, returns list attributes. -inPeriod :: Stream s m Char +inPeriod :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) -> ParserT s st m ListAttributes inPeriod num = try $ do @@ -855,7 +962,7 @@ inPeriod num = try $ do return (start, style, delim) -- | Parses a list number (num) followed by a paren, returns list attributes. -inOneParen :: Stream s m Char +inOneParen :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) -> ParserT s st m ListAttributes inOneParen num = try $ do @@ -864,7 +971,7 @@ inOneParen num = try $ do return (start, style, OneParen) -- | Parses a list number (num) enclosed in parens, returns list attributes. -inTwoParens :: Stream s m Char +inTwoParens :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m (ListNumberStyle, Int) -> ParserT s st m ListAttributes inTwoParens num = try $ do @@ -875,7 +982,7 @@ inTwoParens num = try $ do -- | Parses an ordered list marker with a given style and delimiter, -- returns number. -orderedListMarker :: Stream s m Char +orderedListMarker :: (Stream s m Char, UpdateSourcePos s Char) => ListNumberStyle -> ListNumberDelim -> ParserT s ParserState m Int @@ -898,10 +1005,10 @@ orderedListMarker style delim = do return start -- | Parses a character reference and returns a Str element. -charRef :: Stream s m Char => ParserT s st m Inline +charRef :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inline charRef = Str . T.singleton <$> characterReference -lineBlockLine :: Monad m => ParserT Text st m Text +lineBlockLine :: Monad m => ParserT Sources st m Text lineBlockLine = try $ do char '|' char ' ' @@ -911,11 +1018,11 @@ lineBlockLine = try $ do continuations <- many (try $ char ' ' >> anyLine) return $ white <> T.unwords (line : continuations) -blankLineBlockLine :: Stream s m Char => ParserT s st m Char +blankLineBlockLine :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Char blankLineBlockLine = try (char '|' >> blankline) -- | Parses an RST-style line block and returns a list of strings. -lineBlockLines :: Monad m => ParserT Text st m [Text] +lineBlockLines :: Monad m => ParserT Sources st m [Text] lineBlockLines = try $ do lines' <- many1 (lineBlockLine <|> (T.singleton <$> blankLineBlockLine)) skipMany blankline @@ -923,7 +1030,8 @@ lineBlockLines = try $ do -- | Parse a table using 'headerParser', 'rowParser', -- 'lineParser', and 'footerParser'. -tableWith :: (Stream s m Char, HasReaderOptions st, Monad mf) +tableWith :: (Stream s m Char, UpdateSourcePos s Char, + HasReaderOptions st, Monad mf) => ParserT s st m (mf [Blocks], [Alignment], [Int]) -> ([Int] -> ParserT s st m (mf [Blocks])) -> ParserT s st m sep @@ -943,7 +1051,8 @@ tableWith headerParser rowParser lineParser footerParser = try $ do type TableComponents mf = ([Alignment], [Double], mf [Row], mf [Row]) -tableWith' :: (Stream s m Char, HasReaderOptions st, Monad mf) +tableWith' :: (Stream s m Char, UpdateSourcePos s Char, + HasReaderOptions st, Monad mf) => ParserT s st m (mf [Blocks], [Alignment], [Int]) -> ([Int] -> ParserT s st m (mf [Blocks])) -> ParserT s st m sep @@ -992,20 +1101,19 @@ widthsFromIndices numColumns' indices = -- (which may be grid), then the rows, -- which may be grid, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). -gridTableWith :: (Stream s m Char, HasReaderOptions st, HasLastStrPosition st, - Monad mf, IsString s) - => ParserT s st m (mf Blocks) -- ^ Block list parser +gridTableWith :: (Monad m, HasReaderOptions st, HasLastStrPosition st, Monad mf) + => ParserT Sources st m (mf Blocks) -- ^ Block list parser -> Bool -- ^ Headerless table - -> ParserT s st m (mf Blocks) + -> ParserT Sources st m (mf Blocks) gridTableWith blocks headless = tableWith (gridTableHeader headless blocks) (gridTableRow blocks) (gridTableSep '-') gridTableFooter -gridTableWith' :: (Stream s m Char, HasReaderOptions st, HasLastStrPosition st, - Monad mf, IsString s) - => ParserT s st m (mf Blocks) -- ^ Block list parser +gridTableWith' :: (Monad m, HasReaderOptions st, HasLastStrPosition st, + Monad mf) + => ParserT Sources st m (mf Blocks) -- ^ Block list parser -> Bool -- ^ Headerless table - -> ParserT s st m (TableComponents mf) + -> ParserT Sources st m (TableComponents mf) gridTableWith' blocks headless = tableWith' (gridTableHeader headless blocks) (gridTableRow blocks) (gridTableSep '-') gridTableFooter @@ -1014,7 +1122,7 @@ gridTableSplitLine :: [Int] -> Text -> [Text] gridTableSplitLine indices line = map removeFinalBar $ tail $ splitTextByIndices (init indices) $ trimr line -gridPart :: Stream s m Char => Char -> ParserT s st m ((Int, Int), Alignment) +gridPart :: Monad m => Char -> ParserT Sources st m ((Int, Int), Alignment) gridPart ch = do leftColon <- option False (True <$ char ':') dashes <- many1 (char ch) @@ -1029,7 +1137,7 @@ gridPart ch = do (False, False) -> AlignDefault return ((lengthDashes, lengthDashes + 1), alignment) -gridDashedLines :: Stream s m Char => Char -> ParserT s st m [((Int, Int), Alignment)] +gridDashedLines :: Monad m => Char -> ParserT Sources st m [((Int, Int), Alignment)] gridDashedLines ch = try $ char '+' >> many1 (gridPart ch) <* blankline removeFinalBar :: Text -> Text @@ -1038,47 +1146,47 @@ removeFinalBar = T.dropWhileEnd go . T.dropWhileEnd (=='|') go c = T.any (== c) " \t" -- | Separator between rows of grid table. -gridTableSep :: Stream s m Char => Char -> ParserT s st m Char +gridTableSep :: Monad m => Char -> ParserT Sources st m Char gridTableSep ch = try $ gridDashedLines ch >> return '\n' -- | Parse header for a grid table. -gridTableHeader :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st) +gridTableHeader :: (Monad m, Monad mf, HasLastStrPosition st) => Bool -- ^ Headerless table - -> ParserT s st m (mf Blocks) - -> ParserT s st m (mf [Blocks], [Alignment], [Int]) -gridTableHeader headless blocks = try $ do + -> ParserT Sources st m (mf Blocks) + -> ParserT Sources st m (mf [Blocks], [Alignment], [Int]) +gridTableHeader True _ = do + optional blanklines + dashes <- gridDashedLines '-' + let aligns = map snd dashes + let lines' = map (snd . fst) dashes + let indices = scanl (+) 0 lines' + return (return [], aligns, indices) +gridTableHeader False blocks = try $ do optional blanklines dashes <- gridDashedLines '-' - rawContent <- if headless - then return $ repeat "" - else many1 - (notFollowedBy (gridTableSep '=') >> char '|' >> + rawContent <- many1 (notFollowedBy (gridTableSep '=') >> char '|' >> T.pack <$> many1Till anyChar newline) - underDashes <- if headless - then return dashes - else gridDashedLines '=' + underDashes <- gridDashedLines '=' guard $ length dashes == length underDashes let lines' = map (snd . fst) underDashes let indices = scanl (+) 0 lines' let aligns = map snd underDashes - let rawHeads = if headless - then replicate (length underDashes) "" - else map (T.unlines . map trim) $ transpose + let rawHeads = map (T.unlines . map trim) $ transpose $ map (gridTableSplitLine indices) rawContent heads <- sequence <$> mapM (parseFromString' blocks . trim) rawHeads return (heads, aligns, indices) -gridTableRawLine :: Stream s m Char => [Int] -> ParserT s st m [Text] +gridTableRawLine :: (Stream s m Char, UpdateSourcePos s Char) => [Int] -> ParserT s st m [Text] gridTableRawLine indices = do char '|' line <- many1Till anyChar newline return (gridTableSplitLine indices $ T.pack line) -- | Parse row of grid table. -gridTableRow :: (Stream s m Char, Monad mf, IsString s, HasLastStrPosition st) - => ParserT s st m (mf Blocks) +gridTableRow :: (Monad m, Monad mf, HasLastStrPosition st) + => ParserT Sources st m (mf Blocks) -> [Int] - -> ParserT s st m (mf [Blocks]) + -> ParserT Sources st m (mf [Blocks]) gridTableRow blocks indices = do colLines <- many1 (gridTableRawLine indices) let cols = map ((<> "\n") . T.unlines . removeOneLeadingSpace) $ @@ -1099,34 +1207,38 @@ removeOneLeadingSpace xs = Just (c, _) -> c == ' ' -- | Parse footer for a grid table. -gridTableFooter :: Stream s m Char => ParserT s st m () +gridTableFooter :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m () gridTableFooter = optional blanklines --- -- | Removes the ParsecT layer from the monad transformer stack -readWithM :: (Stream s m Char, ToText s) - => ParserT s st m a -- ^ parser - -> st -- ^ initial state - -> s -- ^ input +readWithM :: (Monad m, ToSources t) + => ParserT Sources st m a -- ^ parser + -> st -- ^ initial state + -> t -- ^ input -> m (Either PandocError a) readWithM parser state input = - mapLeft (PandocParsecError $ toText input) `liftM` runParserT parser state "source" input + mapLeft (PandocParsecError sources) + <$> runParserT parser state (initialSourceName sources) sources + where + sources = toSources input -- | Parse a string with a given parser and state -readWith :: Parser Text st a +readWith :: ToSources t + => Parser Sources st a -> st - -> Text + -> t -> Either PandocError a readWith p t inp = runIdentity $ readWithM p t inp -- | Parse a string with @parser@ (for testing). testStringWith :: Show a - => ParserT Text ParserState Identity a + => ParserT Sources ParserState Identity a -> Text -> IO () -testStringWith parser str = UTF8.putStrLn $ show $ - readWith parser defaultParserState str +testStringWith parser str = UTF8.putStrLn $ tshow $ + readWith parser defaultParserState (toSources str) -- | Parsing options. data ParserState = ParserState @@ -1146,7 +1258,7 @@ data ParserState = ParserState stateInNote :: Bool, -- ^ True if parsing note contents stateNoteNumber :: Int, -- ^ Last note number for citations stateMeta :: Meta, -- ^ Document metadata - stateMeta' :: F Meta, -- ^ Document metadata + stateMeta' :: Future ParserState Meta, -- ^ Document metadata stateCitations :: M.Map Text Text, -- ^ RST-style citations stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used stateIdentifiers :: Set.Set Text, -- ^ Header identifiers used @@ -1325,7 +1437,7 @@ data QuoteContext type NoteTable = [(Text, Text)] -type NoteTable' = M.Map Text (SourcePos, F Blocks) +type NoteTable' = M.Map Text (SourcePos, Future ParserState Blocks) -- used in markdown reader newtype Key = Key Text deriving (Show, Read, Eq, Ord) @@ -1360,7 +1472,7 @@ registerHeader (ident,classes,kvs) header' = do then do let id' = uniqueIdent exts (B.toList header') ids let id'' = if Ext_ascii_identifiers `extensionEnabled` exts - then T.pack $ mapMaybe toAsciiChar $ T.unpack id' + then toAsciiText id' else id' updateState $ updateIdentifierList $ Set.insert id' updateState $ updateIdentifierList $ Set.insert id'' @@ -1373,34 +1485,42 @@ registerHeader (ident,classes,kvs) header' = do updateState $ updateIdentifierList $ Set.insert ident return (ident,classes,kvs) -smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m, Stream s m Char) +smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st, + HasQuoteContext st m, + Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines -> ParserT s st m Inlines smartPunctuation inlineParser = do guardEnabled Ext_smart - choice [ quoted inlineParser, apostrophe, dash, ellipses ] - -apostrophe :: Stream s m Char => ParserT s st m Inlines -apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\x2019") + choice [ quoted inlineParser, apostrophe, doubleCloseQuote, dash, ellipses ] -quoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char) +quoted :: (HasLastStrPosition st, HasQuoteContext st m, + Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines -> ParserT s st m Inlines quoted inlineParser = doubleQuoted inlineParser <|> singleQuoted inlineParser -singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char) +singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m, + Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines -> ParserT s st m Inlines -singleQuoted inlineParser = try $ B.singleQuoted . mconcat - <$ singleQuoteStart - <*> withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd) - -doubleQuoted :: (HasQuoteContext st m, Stream s m Char) +singleQuoted inlineParser = do + singleQuoteStart + (B.singleQuoted . mconcat <$> + try + (withQuoteContext InSingleQuote (many1Till inlineParser singleQuoteEnd))) + <|> pure "\8217" + +doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st, + Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines -> ParserT s st m Inlines -doubleQuoted inlineParser = try $ B.doubleQuoted . mconcat - <$ doubleQuoteStart - <*> withQuoteContext InDoubleQuote (manyTill inlineParser doubleQuoteEnd) +doubleQuoted inlineParser = do + doubleQuoteStart + (B.doubleQuoted . mconcat <$> + try + (withQuoteContext InDoubleQuote (manyTill inlineParser doubleQuoteEnd))) + <|> pure (B.str "\8220") failIfInQuoteContext :: (HasQuoteContext st m, Stream s m t) => QuoteContext @@ -1409,13 +1529,14 @@ failIfInQuoteContext context = do context' <- getQuoteContext when (context' == context) $ Prelude.fail "already inside quotes" -charOrRef :: Stream s m Char => [Char] -> ParserT s st m Char +charOrRef :: (Stream s m Char, UpdateSourcePos s Char) => [Char] -> ParserT s st m Char charOrRef cs = oneOf cs <|> try (do c <- characterReference guard (c `elem` cs) return c) -singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, Stream s m Char) +singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m, + Stream s m Char, UpdateSourcePos s Char) => ParserT s st m () singleQuoteStart = do failIfInQuoteContext InSingleQuote @@ -1423,30 +1544,39 @@ singleQuoteStart = do guard =<< notAfterString try $ do charOrRef "'\8216\145" - notFollowedBy (oneOf [' ', '\t', '\n']) + void $ lookAhead (satisfy (not . isSpaceChar)) -singleQuoteEnd :: Stream s m Char +singleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m () singleQuoteEnd = try $ do charOrRef "'\8217\146" notFollowedBy alphaNum -doubleQuoteStart :: (HasQuoteContext st m, Stream s m Char) +doubleQuoteStart :: (HasLastStrPosition st, + HasQuoteContext st m, + Stream s m Char, UpdateSourcePos s Char) => ParserT s st m () doubleQuoteStart = do failIfInQuoteContext InDoubleQuote + guard =<< notAfterString try $ do charOrRef "\"\8220\147" - notFollowedBy (oneOf [' ', '\t', '\n']) + void $ lookAhead (satisfy (not . isSpaceChar)) -doubleQuoteEnd :: Stream s m Char +doubleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m () doubleQuoteEnd = void (charOrRef "\"\8221\148") -ellipses :: Stream s m Char +apostrophe :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines +apostrophe = (char '\'' <|> char '\8217') >> return (B.str "\8217") + +doubleCloseQuote :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines +doubleCloseQuote = B.str "\8221" <$ char '"' + +ellipses :: (Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines ellipses = try (string "..." >> return (B.str "\8230")) -dash :: (HasReaderOptions st, Stream s m Char) +dash :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char) => ParserT s st m Inlines dash = try $ do oldDashes <- extensionEnabled Ext_old_dashes <$> getOption readerExtensions @@ -1473,20 +1603,28 @@ nested p = do updateState $ \st -> st{ stateMaxNestingLevel = nestlevel } return res -citeKey :: (Stream s m Char, HasLastStrPosition st) - => ParserT s st m (Bool, Text) -citeKey = try $ do +citeKey :: (Stream s m Char, UpdateSourcePos s Char, HasLastStrPosition st) + => Bool -- ^ If True, allow expanded @{..} syntax. + -> ParserT s st m (Bool, Text) +citeKey allowBraced = try $ do guard =<< notAfterString suppress_author <- option False (True <$ char '-') char '@' + key <- simpleCiteIdentifier + <|> if allowBraced + then charsInBalanced '{' '}' (satisfy (not . isSpace)) + else mzero + return (suppress_author, key) + +simpleCiteIdentifier :: (Stream s m Char, UpdateSourcePos s Char) + => ParserT s st m Text +simpleCiteIdentifier = do firstChar <- alphaNum <|> char '_' <|> char '*' -- @* for wildcard in nocite let regchar = satisfy (\c -> isAlphaNum c || c == '_') let internal p = try $ p <* lookAhead regchar rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/") <|> try (oneOf ":/" <* lookAhead (char '/')) - let key = firstChar:rest - return (suppress_author, T.pack key) - + return $ T.pack $ firstChar:rest token :: (Stream s m t) => (t -> Text) @@ -1506,12 +1644,15 @@ extractIdClass (ident, cls, kvs) = (ident', cls', kvs') cls' = maybe cls T.words $ lookup "class" kvs kvs' = filter (\(k,_) -> k /= "id" || k /= "class") kvs -insertIncludedFile' :: (PandocMonad m, HasIncludeFiles st) - => ParserT a st m (mf Blocks) - -> (Text -> a) - -> [FilePath] -> FilePath - -> ParserT a st m (mf Blocks) -insertIncludedFile' blocks totoks dirs f = do +insertIncludedFile :: (PandocMonad m, HasIncludeFiles st) + => ParserT a st m b -- ^ parser to apply + -> (Text -> a) -- ^ convert Text to stream type + -> [FilePath] -- ^ search path (directories) + -> FilePath -- ^ path of file to include + -> Maybe Int -- ^ start line (negative counts from end) + -> Maybe Int -- ^ end line (negative counts from end) + -> ParserT a st m b +insertIncludedFile parser toStream dirs f mbstartline mbendline = do oldPos <- getPosition oldInput <- getInput containers <- getIncludeFiles <$> getState @@ -1520,32 +1661,32 @@ insertIncludedFile' blocks totoks dirs f = do updateState $ addIncludeFile $ T.pack f mbcontents <- readFileFromDirs dirs f contents <- case mbcontents of - Just s -> return s + Just s -> return $ exciseLines mbstartline mbendline s Nothing -> do report $ CouldNotLoadIncludeFile (T.pack f) oldPos return "" - setPosition $ newPos f 1 1 - setInput $ totoks contents - bs <- blocks + setInput $ toStream contents + setPosition $ newPos f (fromMaybe 1 mbstartline) 1 + result <- parser setInput oldInput setPosition oldPos updateState dropLatestIncludeFile - return bs + return result + +exciseLines :: Maybe Int -> Maybe Int -> Text -> Text +exciseLines Nothing Nothing t = t +exciseLines mbstartline mbendline t = + T.unlines $ take (endline' - (startline' - 1)) + $ drop (startline' - 1) contentLines + where + contentLines = T.lines t + numLines = length contentLines + startline' = case mbstartline of + Nothing -> 1 + Just x | x >= 0 -> x + | otherwise -> numLines + x -- negative from end + endline' = case mbendline of + Nothing -> numLines + Just x | x >= 0 -> x + | otherwise -> numLines + x -- negative from end --- | Parse content of include file as blocks. Circular includes result in an --- @PandocParseError@. -insertIncludedFile :: (PandocMonad m, HasIncludeFiles st) - => ParserT [a] st m Blocks - -> (Text -> [a]) - -> [FilePath] -> FilePath - -> ParserT [a] st m Blocks -insertIncludedFile blocks totoks dirs f = - runIdentity <$> insertIncludedFile' (Identity <$> blocks) totoks dirs f - --- | Parse content of include file as future blocks. Circular includes result in --- an @PandocParseError@. -insertIncludedFileF :: (PandocMonad m, HasIncludeFiles st) - => ParserT Text st m (Future st Blocks) - -> [FilePath] -> FilePath - -> ParserT Text st m (Future st Blocks) -insertIncludedFileF p = insertIncludedFile' p id |