aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs4
-rw-r--r--src/Text/Pandoc/Readers/Org.hs70
2 files changed, 59 insertions, 15 deletions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 02a787670..7a3be8291 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -735,9 +735,9 @@ anyOrderedListStart = try $ do
skipNonindentSpaces
notFollowedBy $ string "p." >> spaceChar >> digit -- page number
res <- do guardDisabled Ext_fancy_lists
- many1 digit
+ start <- many1 digit >>= safeRead
char '.'
- return (1, DefaultStyle, DefaultDelim)
+ return (start, DefaultStyle, DefaultDelim)
<|> do (num, style, delim) <- anyOrderedListMarker
-- if it could be an abbreviated first name,
-- insist on more than one space
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 5c00a1b27..a6ebf65dc 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -69,7 +69,32 @@ parseOrg = do
blocks' <- parseBlocks
st <- getState
let meta = runF (orgStateMeta' st) st
- return $ Pandoc meta $ filter (/= Null) (B.toList $ runF blocks' st)
+ let removeUnwantedBlocks = dropCommentTrees . filter (/= Null)
+ return $ Pandoc meta $ removeUnwantedBlocks (B.toList $ runF blocks' st)
+
+-- | Drop COMMENT headers and the document tree below those headers.
+dropCommentTrees :: [Block] -> [Block]
+dropCommentTrees [] = []
+dropCommentTrees blks@(b:bs) =
+ maybe blks (flip dropUntilHeaderAboveLevel bs) $ commentHeaderLevel b
+
+-- | Return the level of a header starting a comment tree and Nothing
+-- otherwise.
+commentHeaderLevel :: Block -> Maybe Int
+commentHeaderLevel blk =
+ case blk of
+ (Header level _ ((Str "COMMENT"):_)) -> Just level
+ _ -> Nothing
+
+-- | Drop blocks until a header on or above the given level is seen
+dropUntilHeaderAboveLevel :: Int -> [Block] -> [Block]
+dropUntilHeaderAboveLevel n = dropWhile (not . isHeaderLevelLowerEq n)
+
+isHeaderLevelLowerEq :: Int -> Block -> Bool
+isHeaderLevelLowerEq n blk =
+ case blk of
+ (Header level _ _) -> n >= level
+ _ -> False
--
-- Parser State for Org
@@ -828,12 +853,14 @@ list :: OrgParser (F Blocks)
list = choice [ definitionList, bulletList, orderedList ] <?> "list"
definitionList :: OrgParser (F Blocks)
-definitionList = fmap B.definitionList . fmap compactify'DL . sequence
- <$> many1 (definitionListItem bulletListStart)
+definitionList = try $ do n <- lookAhead (bulletListStart' Nothing)
+ fmap B.definitionList . fmap compactify'DL . sequence
+ <$> many1 (definitionListItem $ bulletListStart' (Just n))
bulletList :: OrgParser (F Blocks)
-bulletList = fmap B.bulletList . fmap compactify' . sequence
- <$> many1 (listItem bulletListStart)
+bulletList = try $ do n <- lookAhead (bulletListStart' Nothing)
+ fmap B.bulletList . fmap compactify' . sequence
+ <$> many1 (listItem (bulletListStart' $ Just n))
orderedList :: OrgParser (F Blocks)
orderedList = fmap B.orderedList . fmap compactify' . sequence
@@ -845,10 +872,27 @@ genericListStart listMarker = try $
(+) <$> (length <$> many spaceChar)
<*> (length <$> listMarker <* many1 spaceChar)
--- parses bullet list start and returns its length (excl. following whitespace)
+-- parses bullet list marker. maybe we know the indent level
bulletListStart :: OrgParser Int
-bulletListStart = genericListStart bulletListMarker
- where bulletListMarker = pure <$> oneOf "*-+"
+bulletListStart = bulletListStart' Nothing
+
+bulletListStart' :: Maybe Int -> OrgParser Int
+-- returns length of bulletList prefix, inclusive of marker
+bulletListStart' Nothing = do ind <- many spaceChar
+ oneOf bullets
+ many1 spaceChar
+ return $ length ind + 1
+ -- Unindented lists are legal, but they can't use '*' bullets
+ -- We return n to maintain compatibility with the generic listItem
+bulletListStart' (Just n) = do count (n-1) spaceChar
+ oneOf validBullets
+ many1 spaceChar
+ return n
+ where validBullets = if n == 1 then noAsterisks else bullets
+ noAsterisks = filter (/= '*') bullets
+
+bullets :: String
+bullets = "*+-"
orderedListStart :: OrgParser Int
orderedListStart = genericListStart orderedListMarker
@@ -927,7 +971,7 @@ parseInlines = trimInlinesF . mconcat <$> many1 inline
-- treat these as potentially non-text when parsing inline:
specialChars :: [Char]
-specialChars = "\"$'()*+-./:<=>[\\]^_{|}~"
+specialChars = "\"$'()*+-,./:<=>[\\]^_{|}~"
whitespace :: OrgParser (F Inlines)
@@ -1205,10 +1249,10 @@ displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
]
symbol :: OrgParser (F Inlines)
symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
- where updatePositions c
- | c `elem` emphasisPreChars = c <$ updateLastPreCharPos
- | c `elem` emphasisForbiddenBorderChars = c <$ updateLastForbiddenCharPos
- | otherwise = return c
+ where updatePositions c = do
+ when (c `elem` emphasisPreChars) updateLastPreCharPos
+ when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos
+ return c
emphasisBetween :: Char
-> OrgParser (F Inlines)