diff options
Diffstat (limited to 'src/Text')
| -rw-r--r-- | src/Text/Pandoc/Extensions.hs | 1 | ||||
| -rw-r--r-- | src/Text/Pandoc/Parsing.hs | 36 | ||||
| -rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 129 | 
3 files changed, 90 insertions, 76 deletions
| diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index e6a3ca044..95e59063b 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -111,6 +111,7 @@ data Extension =      | Ext_autolink_bare_uris  -- ^ Make all absolute URIs into links      | Ext_fancy_lists         -- ^ Enable fancy list numbers and delimiters      | Ext_lists_without_preceding_blankline -- ^ Allow lists without preceding blank +    | Ext_four_space_rule     -- ^ Require 4-space indent for list contents      | Ext_startnum            -- ^ Make start number of ordered list significant      | Ext_definition_lists    -- ^ Definition lists as in pandoc, mmd, php      | Ext_compact_definition_lists  -- ^ Definition lists without diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 37a0b53b4..9ed18d4e0 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -50,6 +50,7 @@ module Text.Pandoc.Parsing ( takeWhileP,                               blankline,                               blanklines,                               gobbleSpaces, +                             gobbleAtMostSpaces,                               enclosed,                               stringAnyCase,                               parseFromString, @@ -380,14 +381,33 @@ blanklines = many1 blankline  -- | Gobble n spaces; if tabs are encountered, expand them  -- and gobble some or all of their spaces, leaving the rest. -gobbleSpaces :: Monad m => ReaderOptions -> Int -> ParserT [Char] st m () -gobbleSpaces _    0 = return () -gobbleSpaces opts n = try $ do -  char ' ' <|> do char '\t' -                  inp <- getInput -                  setInput $ replicate (readerTabStop opts - 1) ' ' ++ inp -                  return ' ' -  gobbleSpaces opts (n - 1) +gobbleSpaces :: (HasReaderOptions st, Monad m) +             => Int -> ParserT [Char] st m () +gobbleSpaces 0 = return () +gobbleSpaces n +  | n < 0     = error "gobbleSpaces called with negative number" +  | otherwise = try $ do +      char ' ' <|> eatOneSpaceOfTab +      gobbleSpaces (n - 1) + +eatOneSpaceOfTab :: (HasReaderOptions st, Monad m) => ParserT [Char] st m Char +eatOneSpaceOfTab = do +  char '\t' +  tabstop <- getOption readerTabStop +  inp <- getInput +  setInput $ replicate (tabstop - 1) ' ' ++ inp +  return ' ' + +-- | Gobble up to n spaces; if tabs are encountered, expand them +-- and gobble some or all of their spaces, leaving the rest. +gobbleAtMostSpaces :: (HasReaderOptions st, Monad m) +                   => Int -> ParserT [Char] st m Int +gobbleAtMostSpaces 0 = return 0 +gobbleAtMostSpaces n +  | n < 0     = error "gobbleAtMostSpaces called with negative number" +  | otherwise = option 0 $ do +      char ' ' <|> eatOneSpaceOfTab +      (+ 1) <$> gobbleAtMostSpaces (n - 1)  -- | Parses material enclosed between start and end parsers.  enclosed :: (Show end, Stream s  m Char) => ParserT s st m t   -- ^ start parser diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 26263d674..664691c8c 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -138,12 +138,7 @@ nonindentSpaces = do  skipNonindentSpaces :: PandocMonad m => MarkdownParser m Int  skipNonindentSpaces = do    tabStop <- getOption readerTabStop -  atMostSpaces (tabStop - 1) <* notFollowedBy spaceChar - -atMostSpaces :: PandocMonad m => Int -> MarkdownParser m Int -atMostSpaces n -  | n > 0     = (char ' ' >> (+1) <$> atMostSpaces (n-1)) <|> return 0 -  | otherwise = return 0 +  gobbleAtMostSpaces (tabStop - 1) <* notFollowedBy spaceChar  litChar :: PandocMonad m => MarkdownParser m Char  litChar = escapedChar' @@ -809,49 +804,51 @@ blockQuote = do  bulletListStart :: PandocMonad m => MarkdownParser m ()  bulletListStart = try $ do    optional newline -- if preceded by a Plain block in a list context -  startpos <- sourceColumn <$> getPosition    skipNonindentSpaces    notFollowedBy' (() <$ hrule)     -- because hrules start out just like lists    satisfy isBulletListMarker -  endpos <- sourceColumn <$> getPosition -  tabStop <- getOption readerTabStop -  lookAhead (newline <|> spaceChar) -  () <$ atMostSpaces (tabStop - (endpos - startpos)) +  gobbleSpaces 1 <|> () <$ lookAhead newline +  try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar) <|> return () -anyOrderedListStart :: PandocMonad m => MarkdownParser m (Int, ListNumberStyle, ListNumberDelim) -anyOrderedListStart = try $ do +orderedListStart :: PandocMonad m +                 => Maybe (ListNumberStyle, ListNumberDelim) +                 -> MarkdownParser m (Int, ListNumberStyle, ListNumberDelim) +orderedListStart mbstydelim = try $ do    optional newline -- if preceded by a Plain block in a list context -  startpos <- sourceColumn <$> getPosition    skipNonindentSpaces    notFollowedBy $ string "p." >> spaceChar >> digit  -- page number -  res <- do guardDisabled Ext_fancy_lists -            start <- many1 digit >>= safeRead -            char '.' -            return (start, DefaultStyle, DefaultDelim) -     <|> do (num, style, delim) <- anyOrderedListMarker -            -- if it could be an abbreviated first name, -            -- insist on more than one space -            when (delim == Period && (style == UpperAlpha || -                 (style == UpperRoman && -                  num `elem` [1, 5, 10, 50, 100, 500, 1000]))) $ -               () <$ spaceChar -            return (num, style, delim) -  endpos <- sourceColumn <$> getPosition -  tabStop <- getOption readerTabStop -  lookAhead (newline <|> spaceChar) -  atMostSpaces (tabStop - (endpos - startpos)) -  return res +  (do guardDisabled Ext_fancy_lists +      start <- many1 digit >>= safeRead +      char '.' +      gobbleSpaces 1 <|> () <$ lookAhead newline +      optional $ try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar) +      return (start, DefaultStyle, DefaultDelim)) +   <|> +   (do (num, style, delim) <- maybe +          anyOrderedListMarker +          (\(sty,delim) -> (\start -> (start,sty,delim)) <$> +               orderedListMarker sty delim) +          mbstydelim +       gobbleSpaces 1 <|> () <$ lookAhead newline +       -- if it could be an abbreviated first name, +       -- insist on more than one space +       when (delim == Period && (style == UpperAlpha || +            (style == UpperRoman && +             num `elem` [1, 5, 10, 50, 100, 500, 1000]))) $ +              () <$ lookAhead (newline <|> spaceChar) +       optional $ try (gobbleAtMostSpaces 3 >> notFollowedBy spaceChar) +       return (num, style, delim))  listStart :: PandocMonad m => MarkdownParser m () -listStart = bulletListStart <|> (anyOrderedListStart >> return ()) +listStart = bulletListStart <|> (orderedListStart Nothing >> return ()) -listLine :: PandocMonad m => MarkdownParser m String -listLine = try $ do -  notFollowedBy' (do indentSpaces -                     many spaceChar +listLine :: PandocMonad m => Int -> MarkdownParser m String +listLine continuationIndent = try $ do +  notFollowedBy' (do gobbleSpaces continuationIndent +                     skipMany spaceChar                       listStart)    notFollowedByHtmlCloser -  optional (() <$ indentSpaces) +  optional (() <$ gobbleSpaces continuationIndent)    listLineCommon  listLineCommon :: PandocMonad m => MarkdownParser m String @@ -864,26 +861,39 @@ listLineCommon = concat <$> manyTill  -- parse raw text for one list item, excluding start marker and continuations  rawListItem :: PandocMonad m              => MarkdownParser m a -            -> MarkdownParser m String +            -> MarkdownParser m (String, Int)  rawListItem start = try $ do +  pos1 <- getPosition    start +  pos2 <- getPosition +  continuationIndent <- (4 <$ guardEnabled Ext_four_space_rule) +                    <|> return (sourceColumn pos2 - sourceColumn pos1)    first <- listLineCommon    rest <- many (do notFollowedBy listStart                     notFollowedBy (() <$ codeBlockFenced)                     notFollowedBy blankline -                   listLine) +                   listLine continuationIndent)    blanks <- many blankline -  return $ unlines (first:rest) ++ blanks +  let result = unlines (first:rest) ++ blanks +  return (result, continuationIndent)  -- continuation of a list item - indented and separated by blankline  -- or (in compact lists) endline.  -- note: nested lists are parsed as continuations -listContinuation :: PandocMonad m => MarkdownParser m String -listContinuation = try $ do -  lookAhead indentSpaces -  result <- many1 listContinuationLine +listContinuation :: PandocMonad m => Int -> MarkdownParser m String +listContinuation continuationIndent = try $ do +  x <- try $ do +         notFollowedBy blankline +         notFollowedByHtmlCloser +         gobbleSpaces continuationIndent +         anyLineNewline +  xs <- many $ try $ do +         notFollowedBy blankline +         notFollowedByHtmlCloser +         gobbleSpaces continuationIndent <|> notFollowedBy' listStart +         anyLineNewline    blanks <- many blankline -  return $ concat result ++ blanks +  return $ concat (x:xs) ++ blanks  notFollowedByHtmlCloser :: PandocMonad m => MarkdownParser m ()  notFollowedByHtmlCloser = do @@ -892,20 +902,12 @@ notFollowedByHtmlCloser = do          Just t  -> notFollowedBy' $ htmlTag (~== TagClose t)          Nothing -> return () -listContinuationLine :: PandocMonad m => MarkdownParser m String -listContinuationLine = try $ do -  notFollowedBy blankline -  notFollowedBy' listStart -  notFollowedByHtmlCloser -  optional indentSpaces -  anyLineNewline -  listItem :: PandocMonad m           => MarkdownParser m a           -> MarkdownParser m (F Blocks)  listItem start = try $ do -  first <- rawListItem start -  continuations <- many listContinuation +  (first, continuationIndent) <- rawListItem start +  continuations <- many (listContinuation continuationIndent)    -- parsing with ListItemState forces markers at beginning of lines to    -- count as list item markers, even if not separated by blank space.    -- see definition of "endline" @@ -920,23 +922,14 @@ listItem start = try $ do  orderedList :: PandocMonad m => MarkdownParser m (F Blocks)  orderedList = try $ do -  (start, style, delim) <- lookAhead anyOrderedListStart +  (start, style, delim) <- lookAhead (orderedListStart Nothing)    unless (style `elem` [DefaultStyle, Decimal, Example] &&            delim `elem` [DefaultDelim, Period]) $      guardEnabled Ext_fancy_lists    when (style == Example) $ guardEnabled Ext_example_lists    items <- fmap sequence $ many1 $ listItem -                 ( try $ do -                     optional newline -- if preceded by Plain block in a list -                     startpos <- sourceColumn <$> getPosition -                     skipNonindentSpaces -                     res <- orderedListMarker style delim -                     endpos <- sourceColumn <$> getPosition -                     tabStop <- getOption readerTabStop -                     lookAhead (newline <|> spaceChar) -                     atMostSpaces (tabStop - (endpos - startpos)) -                     return res ) -  start' <- option 1 $ guardEnabled Ext_startnum >> return start +                 (orderedListStart (Just (style, delim))) +  start' <- (start <$ guardEnabled Ext_startnum) <|> return 1    return $ B.orderedListWith (start', style, delim) <$> fmap compactify items  bulletList :: PandocMonad m => MarkdownParser m (F Blocks) @@ -1122,7 +1115,7 @@ rawHtmlBlocks = do    updateState $ \st -> st{ stateInHtmlBlock = Just tagtype }    let closer = htmlTag (\x -> x ~== TagClose tagtype)    let block' = do notFollowedBy' closer -                  atMostSpaces indentlevel +                  gobbleAtMostSpaces indentlevel                    block    contents <- mconcat <$> many block'    result <- | 
