aboutsummaryrefslogtreecommitdiff
path: root/src/Text
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text')
-rw-r--r--src/Text/Pandoc/Options.hs40
-rw-r--r--src/Text/Pandoc/Readers/HTML.hs45
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs156
3 files changed, 118 insertions, 123 deletions
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs
index 6784219c9..cf1143067 100644
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@@ -37,19 +37,33 @@ import qualified Data.Set as Set
import Data.Default
-- | Individually selectable syntax extensions.
-data Extension = Footnotes
- | Tex_math
- | Delimited_code_blocks
- | Markdown_in_html_blocks
- | Fancy_lists
- | Startnum
- | Definition_lists
- | Header_identifiers
- | All_symbols_escapable
- | Intraword_underscores
- | Blank_before_blockquote
- | Blank_before_header
- | Significant_bullets
+data Extension = Ext_footnotes
+ | Ext_inline_notes
+ | Ext_pandoc_title_blocks
+ | Ext_table_captions
+ | Ext_simple_tables
+ | Ext_multiline_tables
+ | Ext_grid_tables
+ | Ext_pipe_tables
+ | Ext_citations
+ | Ext_raw_tex
+ | Ext_tex_math
+ | Ext_latex_macros
+ | Ext_delimited_code_blocks
+ | Ext_markdown_in_html_blocks
+ | Ext_autolink_code_spans
+ | Ext_fancy_lists
+ | Ext_startnum
+ | Ext_definition_lists
+ | Ext_header_identifiers
+ | Ext_all_symbols_escapable
+ | Ext_intraword_underscores
+ | Ext_blank_before_blockquote
+ | Ext_blank_before_header
+ | Ext_significant_bullets
+ | Ext_strikeout
+ | Ext_superscript
+ | Ext_subscript
deriving (Show, Read, Enum, Eq, Ord, Bounded)
data ReaderOptions = ReaderOptions{
diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs
index 96ad9ce20..515d8b008 100644
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@@ -125,25 +125,22 @@ pBulletList = try $ do
pOrderedList :: TagParser [Block]
pOrderedList = try $ do
TagOpen _ attribs <- pSatisfy (~== TagOpen "ol" [])
- st <- getState
- let (start, style) = if readerStrict (stateOptions st)
- then (1, DefaultStyle)
- else (sta', sty')
- where sta = fromMaybe "1" $
- lookup "start" attribs
- sta' = if all isDigit sta
- then read sta
- else 1
- sty = fromMaybe (fromMaybe "" $
- lookup "style" attribs) $
- lookup "class" attribs
- sty' = case sty of
- "lower-roman" -> LowerRoman
- "upper-roman" -> UpperRoman
- "lower-alpha" -> LowerAlpha
- "upper-alpha" -> UpperAlpha
- "decimal" -> Decimal
- _ -> DefaultStyle
+ let (start, style) = (sta', sty')
+ where sta = fromMaybe "1" $
+ lookup "start" attribs
+ sta' = if all isDigit sta
+ then read sta
+ else 1
+ sty = fromMaybe (fromMaybe "" $
+ lookup "style" attribs) $
+ lookup "class" attribs
+ sty' = case sty of
+ "lower-roman" -> LowerRoman
+ "upper-roman" -> UpperRoman
+ "lower-alpha" -> LowerAlpha
+ "upper-alpha" -> UpperAlpha
+ "decimal" -> Decimal
+ _ -> DefaultStyle
let nonItem = pSatisfy (\t ->
not (tagOpen (`elem` ["li","ol","ul","dl"]) (const True) t) &&
not (t ~== TagClose "ol"))
@@ -280,10 +277,7 @@ pCodeBlock = try $ do
let attribsId = fromMaybe "" $ lookup "id" attr
let attribsClasses = words $ fromMaybe "" $ lookup "class" attr
let attribsKV = filter (\(k,_) -> k /= "class" && k /= "id") attr
- st <- getState
- let attribs = if readerStrict (stateOptions st)
- then ("",[],[])
- else (attribsId, attribsClasses, attribsKV)
+ let attribs = (attribsId, attribsClasses, attribsKV)
return [CodeBlock attribs result]
inline :: TagParser [Inline]
@@ -331,14 +325,13 @@ pStrong :: TagParser [Inline]
pStrong = pInlinesInTags "strong" Strong <|> pInlinesInTags "b" Strong
pSuperscript :: TagParser [Inline]
-pSuperscript = failIfStrict >> pInlinesInTags "sup" Superscript
+pSuperscript = pInlinesInTags "sup" Superscript
pSubscript :: TagParser [Inline]
-pSubscript = failIfStrict >> pInlinesInTags "sub" Subscript
+pSubscript = pInlinesInTags "sub" Subscript
pStrikeout :: TagParser [Inline]
pStrikeout = do
- failIfStrict
pInlinesInTags "s" Strikeout <|>
pInlinesInTags "strike" Strikeout <|>
pInlinesInTags "del" Strikeout <|>
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index b51cee1a6..48807cbec 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -112,12 +112,6 @@ litChar = escapedChar'
<|> noneOf "\n"
<|> (newline >> notFollowedBy blankline >> return ' ')
--- | Fail unless we're at beginning of a line.
-failUnlessBeginningOfLine :: Parser [tok] st ()
-failUnlessBeginningOfLine = do
- pos <- getPosition
- if sourceColumn pos == 1 then return () else fail "not beginning of line"
-
-- | Parse a sequence of inline elements between square brackets,
-- including inlines between balanced pairs of square brackets.
inlinesInBalancedBrackets :: Parser [Char] ParserState Inline
@@ -165,7 +159,7 @@ dateLine = try $ do
titleBlock :: Parser [Char] ParserState ([Inline], [[Inline]], [Inline])
titleBlock = try $ do
- failIfStrict
+ guardEnabled Ext_pandoc_title_blocks
title <- option [] titleLine
author <- option [] authorsLine
date <- option [] dateLine
@@ -181,10 +175,10 @@ parseMarkdown = do
startPos <- getPosition
-- go through once just to get list of reference keys and notes
-- docMinusKeys is the raw document with blanks where the keys/notes were...
- st <- getState
let firstPassParser = referenceKey
- <|> (if readerStrict (stateOptions st) then mzero else noteBlock)
- <|> liftM snd (withRaw codeBlockDelimited)
+ <|> (guardEnabled Ext_footnotes >> noteBlock)
+ <|> (guardEnabled Ext_delimited_code_blocks >>
+ liftM snd (withRaw codeBlockDelimited))
<|> lineClump
docMinusKeys <- liftM concat $ manyTill firstPassParser eof
setInput docMinusKeys
@@ -292,35 +286,22 @@ parseBlocks :: Parser [Char] ParserState [Block]
parseBlocks = manyTill block eof
block :: Parser [Char] ParserState Block
-block = do
- st <- getState
- choice (if readerStrict (stateOptions st)
- then [ header
- , codeBlockIndented
- , blockQuote
- , hrule
- , bulletList
- , orderedList
- , htmlBlock
- , para
- , plain
- , nullBlock ]
- else [ codeBlockDelimited
- , macro
- , header
- , table
- , codeBlockIndented
- , lhsCodeBlock
- , blockQuote
- , hrule
- , bulletList
- , orderedList
- , definitionList
- , rawTeXBlock
- , para
- , rawHtmlBlocks
- , plain
- , nullBlock ]) <?> "block"
+block = choice [ codeBlockDelimited
+ , guardEnabled Ext_latex_macros >> macro
+ , header
+ , table
+ , codeBlockIndented
+ , lhsCodeBlock
+ , blockQuote
+ , hrule
+ , bulletList
+ , orderedList
+ , definitionList
+ , rawTeXBlock
+ , para
+ , htmlBlock
+ , plain
+ , nullBlock ] <?> "block"
--
-- header blocks
@@ -431,8 +412,9 @@ keyValAttr = try $ do
<|> many nonspaceChar
return ("",[],[(key,val)])
-codeBlockDelimited :: Parser [Char] st Block
+codeBlockDelimited :: Parser [Char] ParserState Block
codeBlockDelimited = try $ do
+ guardEnabled Ext_delimited_code_blocks
(size, attr, c) <- blockDelimiter (\c -> c == '~' || c == '`') Nothing
contents <- manyTill anyLine (blockDelimiter (== c) (Just size))
blanklines
@@ -635,6 +617,7 @@ defListMarker = do
definitionListItem :: Parser [Char] ParserState ([Inline], [[Block]])
definitionListItem = try $ do
+ guardEnabled Ext_definition_lists
-- first, see if this has any chance of being a definition list:
lookAhead (anyLine >> optional blankline >> defListMarker)
term <- manyTill inline newline
@@ -694,9 +677,9 @@ para = try $ do
guard $ not . all isHtmlOrBlank $ result
option (Plain result) $ try $ do
newline
- blanklines <|>
- (getState >>= guard . readerStrict . stateOptions >>
- lookAhead (blockQuote <|> header) >> return "")
+ (blanklines >> return Null)
+ <|> (guardDisabled Ext_blank_before_blockquote >> lookAhead blockQuote)
+ <|> (guardDisabled Ext_blank_before_header >> lookAhead header)
return $ Para result
plain :: Parser [Char] ParserState Block
@@ -710,12 +693,16 @@ htmlElement :: Parser [Char] ParserState [Char]
htmlElement = strictHtmlBlock <|> liftM snd (htmlTag isBlockTag)
htmlBlock :: Parser [Char] ParserState Block
-htmlBlock = try $ do
- failUnlessBeginningOfLine
+htmlBlock = RawBlock "html" `fmap`
+ ((guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks)
+ <|> htmlBlock')
+
+htmlBlock' :: Parser [Char] ParserState String
+htmlBlock' = try $ do
first <- htmlElement
finalSpace <- many spaceChar
finalNewlines <- many newline
- return $ RawBlock "html" $ first ++ finalSpace ++ finalNewlines
+ return $ first ++ finalSpace ++ finalNewlines
strictHtmlBlock :: Parser [Char] ParserState [Char]
strictHtmlBlock = htmlInBalanced (not . isInlineTag)
@@ -730,13 +717,13 @@ rawVerbatimBlock = try $ do
rawTeXBlock :: Parser [Char] ParserState Block
rawTeXBlock = do
- failIfStrict
+ guardEnabled Ext_raw_tex
result <- liftM (RawBlock "latex") rawLaTeXBlock
<|> liftM (RawBlock "context") rawConTeXtEnvironment
spaces
return result
-rawHtmlBlocks :: Parser [Char] ParserState Block
+rawHtmlBlocks :: Parser [Char] ParserState String
rawHtmlBlocks = do
htmlBlocks <- many1 $ do blk <- rawVerbatimBlock <|>
liftM snd (htmlTag isBlockTag)
@@ -750,8 +737,7 @@ rawHtmlBlocks = do
-- by a blank line
return $ blk ++ sps
let combined = concat htmlBlocks
- let combined' = if last combined == '\n' then init combined else combined
- return $ RawBlock "html" combined'
+ return $ if last combined == '\n' then init combined else combined
--
-- Tables
@@ -826,6 +812,7 @@ multilineRow indices = do
-- and followed by blank lines.
tableCaption :: Parser [Char] ParserState [Inline]
tableCaption = try $ do
+ guardEnabled Ext_table_captions
skipNonindentSpaces
string ":" <|> string "Table:"
result <- many1 inline
@@ -961,10 +948,14 @@ table :: Parser [Char] ParserState Block
table = try $ do
frontCaption <- option [] tableCaption
Table _ aligns widths heads lines' <-
- try (scanForPipe >> (pipeTable True <|> pipeTable False)) <|>
- multilineTable False <|> simpleTable True <|>
- simpleTable False <|> multilineTable True <|>
- gridTable False <|> gridTable True <?> "table"
+ try (guardEnabled Ext_pipe_tables >> scanForPipe >>
+ (pipeTable True <|> pipeTable False)) <|>
+ try (guardEnabled Ext_multiline_tables >>
+ (multilineTable False <|> simpleTable True)) <|>
+ try (guardEnabled Ext_simple_tables >>
+ (simpleTable False <|> multilineTable True)) <|>
+ try (guardEnabled Ext_grid_tables >>
+ (gridTable False <|> gridTable True)) <?> "table"
optional blanklines
caption <- if null frontCaption
then option [] tableCaption
@@ -1008,10 +999,8 @@ inlineParsers = [ whitespace
escapedChar' :: Parser [Char] ParserState Char
escapedChar' = try $ do
char '\\'
- state <- getState
- if readerStrict (stateOptions state)
- then oneOf "\\`*_{}[]()>#+-.!~"
- else satisfy (not . isAlphaNum)
+ (guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum))
+ <|> oneOf "\\`*_{}[]()>#+-.!~"
escapedChar :: Parser [Char] ParserState Inline
escapedChar = do
@@ -1023,10 +1012,9 @@ escapedChar = do
ltSign :: Parser [Char] ParserState Inline
ltSign = do
- st <- getState
- if readerStrict (stateOptions st)
- then char '<'
- else notFollowedBy' rawHtmlBlocks >> char '<' -- unless it starts html
+ guardDisabled Ext_markdown_in_html_blocks
+ <|> (notFollowedBy' rawHtmlBlocks >> return ())
+ char '<'
return $ Str ['<']
exampleRef :: Parser [Char] ParserState Inline
@@ -1072,13 +1060,13 @@ math = (mathDisplay >>= applyMacros' >>= return . Math DisplayMath)
mathDisplay :: Parser [Char] ParserState String
mathDisplay = try $ do
- failIfStrict
+ guardEnabled Ext_tex_math
string "$$"
many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string "$$")
mathInline :: Parser [Char] ParserState String
mathInline = try $ do
- failIfStrict
+ guardEnabled Ext_tex_math
char '$'
notFollowedBy space
words' <- sepBy1 mathWord (many1 (spaceChar <|> (newline >>~ notFollowedBy' blankline)))
@@ -1135,18 +1123,18 @@ strong = Strong `liftM` nested
strikeout :: Parser [Char] ParserState Inline
strikeout = Strikeout `liftM`
- (failIfStrict >> inlinesBetween strikeStart strikeEnd)
+ (guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd)
where strikeStart = string "~~" >> lookAhead nonspaceChar
>> notFollowedBy (char '~')
strikeEnd = try $ string "~~"
superscript :: Parser [Char] ParserState Inline
-superscript = failIfStrict >> enclosed (char '^') (char '^')
+superscript = guardEnabled Ext_superscript >> enclosed (char '^') (char '^')
(notFollowedBy spaceChar >> inline) >>= -- may not contain Space
return . Superscript
subscript :: Parser [Char] ParserState Inline
-subscript = failIfStrict >> enclosed (char '~') (char '~')
+subscript = guardEnabled Ext_subscript >> enclosed (char '~') (char '~')
(notFollowedBy spaceChar >> inline) >>= -- may not contain Space
return . Subscript
@@ -1163,7 +1151,8 @@ str = do
smart <- (readerSmart . stateOptions) `fmap` getState
a <- alphaNum
as <- many $ alphaNum
- <|> (try $ char '_' >>~ lookAhead alphaNum)
+ <|> (guardEnabled Ext_intraword_underscores >>
+ try (char '_' >>~ lookAhead alphaNum))
<|> if smart
then (try $ satisfy (\c -> c == '\'' || c == '\x2019') >>
lookAhead alphaNum >> return '\x2019')
@@ -1200,11 +1189,10 @@ endline :: Parser [Char] ParserState Inline
endline = try $ do
newline
notFollowedBy blankline
- st <- getState
- when (readerStrict (stateOptions st)) $ do
- notFollowedBy emailBlockQuoteStart
- notFollowedBy (char '#') -- atx header
+ guardEnabled Ext_blank_before_blockquote <|> notFollowedBy emailBlockQuoteStart
+ guardEnabled Ext_blank_before_header <|> notFollowedBy (char '#') -- atx header
-- parse potential list-starts differently if in a list:
+ st <- getState
when (stateParserContext st == ListItemState) $ do
notFollowedBy' bulletListStart
notFollowedBy' anyOrderedListStart
@@ -1282,10 +1270,9 @@ autoLink = try $ do
char '<'
(orig, src) <- uri <|> emailAddress
char '>'
- st <- getState
- return $ if readerStrict (stateOptions st)
- then Link [Str orig] (src, "")
- else Link [Code ("",["url"],[]) orig] (src, "")
+ (guardEnabled Ext_autolink_code_spans >>
+ return (Link [Code ("",["url"],[]) orig] (src, "")))
+ <|> return (Link [Str orig] (src, ""))
image :: Parser [Char] ParserState Inline
image = try $ do
@@ -1296,7 +1283,7 @@ image = try $ do
note :: Parser [Char] ParserState Inline
note = try $ do
- failIfStrict
+ guardEnabled Ext_footnotes
ref <- noteMarker
state <- getState
let notes = stateNotes state
@@ -1313,14 +1300,14 @@ note = try $ do
inlineNote :: Parser [Char] ParserState Inline
inlineNote = try $ do
- failIfStrict
+ guardEnabled Ext_inline_notes
char '^'
contents <- inlinesInBalancedBrackets inline
return $ Note [Para contents]
rawLaTeXInline' :: Parser [Char] ParserState Inline
rawLaTeXInline' = try $ do
- failIfStrict
+ guardEnabled Ext_raw_tex
lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env
RawInline _ s <- rawLaTeXInline
return $ RawInline "tex" s -- "tex" because it might be context or latex
@@ -1343,17 +1330,18 @@ inBrackets parser = do
rawHtmlInline :: Parser [Char] ParserState Inline
rawHtmlInline = do
- st <- getState
- (_,result) <- if readerStrict (stateOptions st)
- then htmlTag (not . isTextTag)
- else htmlTag isInlineTag
+ mdInHtml <- option False $
+ guardEnabled Ext_markdown_in_html_blocks >> return True
+ (_,result) <- if mdInHtml
+ then htmlTag isInlineTag
+ else htmlTag (not . isTextTag)
return $ RawInline "html" result
-- Citations
cite :: Parser [Char] ParserState Inline
cite = do
- failIfStrict
+ guardEnabled Ext_citations
citations <- textualCite <|> normalCite
return $ Cite citations []