diff options
author | John MacFarlane <fiddlosopher@gmail.com> | 2013-01-03 20:32:15 -0800 |
---|---|---|
committer | John MacFarlane <fiddlosopher@gmail.com> | 2013-01-03 20:35:01 -0800 |
commit | c435e9cda7eb73cf4d8c446bb829079dddfaedbc (patch) | |
tree | cc52c835729c925e6b2e20cb345f05dac7f4fb43 /src/Text | |
parent | 56ff5e1845363dea91423b785f1c28cabed213bb (diff) | |
download | pandoc-c435e9cda7eb73cf4d8c446bb829079dddfaedbc.tar.gz |
Implemented `Ext_header_identifiers`, `Ext_implicit_header_references`.
Now by default pandoc will act as if link references have been defined
for all headers. So, you can do this:
# My header
Link to [My header].
Another link to [it][My header].
Closes #691.
Diffstat (limited to 'src/Text')
-rw-r--r-- | src/Text/Pandoc/Options.hs | 4 | ||||
-rw-r--r-- | src/Text/Pandoc/Parsing.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 306 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 4 |
4 files changed, 172 insertions, 144 deletions
diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index ffaefedc2..86b1f5b99 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -89,6 +89,8 @@ data Extension = | Ext_hard_line_breaks -- ^ All newlines become hard line breaks | Ext_literate_haskell -- ^ Enable literate Haskell conventions | Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions + | Ext_header_identifiers -- ^ Automatic identifiers for headers + | Ext_implicit_header_references -- ^ Implicit reference links for headers deriving (Show, Read, Enum, Eq, Ord, Bounded) pandocExtensions :: Set Extension @@ -125,6 +127,8 @@ pandocExtensions = Set.fromList , Ext_strikeout , Ext_superscript , Ext_subscript + , Ext_header_identifiers + , Ext_implicit_header_references ] strictExtensions :: Set Extension diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 777c07b4f..9a6ac1671 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -721,6 +721,7 @@ data ParserState = ParserState stateAuthors :: [[Inline]], -- ^ Authors of document stateDate :: [Inline], -- ^ Date of document stateHeaderTable :: [HeaderType], -- ^ Ordered list of header types used + stateHeaders :: [[Inline]], -- ^ List of headers (used for implicit ref links) stateNextExample :: Int, -- ^ Number of next example stateExamples :: M.Map String Int, -- ^ Map from example labels to numbers stateHasChapters :: Bool, -- ^ True if \chapter encountered @@ -747,6 +748,7 @@ defaultParserState = stateAuthors = [], stateDate = [], stateHeaderTable = [], + stateHeaders = [], stateNextExample = 1, stateExamples = M.empty, stateHasChapters = False, diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index ab8069a75..98ee40827 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -54,6 +54,8 @@ import Text.HTML.TagSoup import Text.HTML.TagSoup.Match (tagOpen) import qualified Data.Set as Set +type MarkdownParser = Parser [Char] ParserState + -- | Read markdown from an input string and return a Pandoc document. readMarkdown :: ReaderOptions -- ^ Reader options -> String -- ^ String to parse (assuming @'\n'@ line endings) @@ -103,13 +105,13 @@ spnl = try $ do skipSpaces notFollowedBy (char '\n') -indentSpaces :: Parser [Char] ParserState String +indentSpaces :: MarkdownParser String indentSpaces = try $ do tabStop <- getOption readerTabStop count tabStop (char ' ') <|> string "\t" <?> "indentation" -nonindentSpaces :: Parser [Char] ParserState String +nonindentSpaces :: MarkdownParser String nonindentSpaces = do tabStop <- getOption readerTabStop sps <- many (char ' ') @@ -117,27 +119,27 @@ nonindentSpaces = do then return sps else unexpected "indented line" -skipNonindentSpaces :: Parser [Char] ParserState () +skipNonindentSpaces :: MarkdownParser () skipNonindentSpaces = do tabStop <- getOption readerTabStop atMostSpaces (tabStop - 1) -atMostSpaces :: Int -> Parser [Char] ParserState () +atMostSpaces :: Int -> MarkdownParser () atMostSpaces 0 = notFollowedBy (char ' ') atMostSpaces n = (char ' ' >> atMostSpaces (n-1)) <|> return () -litChar :: Parser [Char] ParserState Char +litChar :: MarkdownParser Char litChar = escapedChar' <|> noneOf "\n" <|> (newline >> notFollowedBy blankline >> return ' ') -- | Parse a sequence of inline elements between square brackets, -- including inlines between balanced pairs of square brackets. -inlinesInBalancedBrackets :: Parser [Char] ParserState (F Inlines) +inlinesInBalancedBrackets :: MarkdownParser (F Inlines) inlinesInBalancedBrackets = charsInBalancedBrackets >>= parseFromString (trimInlinesF . mconcat <$> many inline) -charsInBalancedBrackets :: Parser [Char] ParserState [Char] +charsInBalancedBrackets :: MarkdownParser [Char] charsInBalancedBrackets = do char '[' result <- manyTill ( many1 (noneOf "`[]\n") @@ -152,7 +154,7 @@ charsInBalancedBrackets = do -- document structure -- -titleLine :: Parser [Char] ParserState (F Inlines) +titleLine :: MarkdownParser (F Inlines) titleLine = try $ do char '%' skipSpaces @@ -161,7 +163,7 @@ titleLine = try $ do newline return $ trimInlinesF $ mconcat res -authorsLine :: Parser [Char] ParserState (F [Inlines]) +authorsLine :: MarkdownParser (F [Inlines]) authorsLine = try $ do char '%' skipSpaces @@ -172,16 +174,16 @@ authorsLine = try $ do newline return $ sequence $ filter (not . isNull) $ map (trimInlinesF . mconcat) authors -dateLine :: Parser [Char] ParserState (F Inlines) +dateLine :: MarkdownParser (F Inlines) dateLine = try $ do char '%' skipSpaces trimInlinesF . mconcat <$> manyTill inline newline -titleBlock :: Parser [Char] ParserState (F Inlines, F [Inlines], F Inlines) +titleBlock :: MarkdownParser (F Inlines, F [Inlines], F Inlines) titleBlock = pandocTitleBlock <|> mmdTitleBlock -pandocTitleBlock :: Parser [Char] ParserState (F Inlines, F [Inlines], F Inlines) +pandocTitleBlock :: MarkdownParser (F Inlines, F [Inlines], F Inlines) pandocTitleBlock = try $ do guardEnabled Ext_pandoc_title_block title <- option mempty titleLine @@ -190,7 +192,7 @@ pandocTitleBlock = try $ do optional blanklines return (title, author, date) -mmdTitleBlock :: Parser [Char] ParserState (F Inlines, F [Inlines], F Inlines) +mmdTitleBlock :: MarkdownParser (F Inlines, F [Inlines], F Inlines) mmdTitleBlock = try $ do guardEnabled Ext_mmd_title_block kvPairs <- many1 kvPair @@ -200,7 +202,7 @@ mmdTitleBlock = try $ do let date = maybe mempty return $ lookup "date" kvPairs return (title, author, date) -kvPair :: Parser [Char] ParserState (String, Inlines) +kvPair :: MarkdownParser (String, Inlines) kvPair = try $ do key <- many1Till (alphaNum <|> oneOf "_- ") (char ':') val <- manyTill anyChar @@ -209,7 +211,7 @@ kvPair = try $ do let val' = trimInlines $ B.text val return (key',val') -parseMarkdown :: Parser [Char] ParserState Pandoc +parseMarkdown :: MarkdownParser Pandoc parseMarkdown = do -- markdown allows raw HTML updateState $ \state -> state { stateOptions = @@ -226,7 +228,7 @@ parseMarkdown = do $ B.setDate (runF date st) $ B.doc $ runF blocks st -referenceKey :: Parser [Char] ParserState (F Blocks) +referenceKey :: MarkdownParser (F Blocks) referenceKey = try $ do skipNonindentSpaces (_,raw) <- reference @@ -250,7 +252,7 @@ referenceKey = try $ do updateState $ \s -> s { stateKeys = M.insert (toKey raw) target oldkeys } return $ return mempty -referenceTitle :: Parser [Char] ParserState String +referenceTitle :: MarkdownParser String referenceTitle = try $ do skipSpaces >> optional newline >> skipSpaces tit <- (charsInBalanced '(' ')' litChar >>= return . unwords . words) @@ -262,7 +264,7 @@ referenceTitle = try $ do -- | PHP Markdown Extra style abbreviation key. Currently -- we just skip them, since Pandoc doesn't have an element for -- an abbreviation. -abbrevKey :: Parser [Char] ParserState (F Blocks) +abbrevKey :: MarkdownParser (F Blocks) abbrevKey = do guardEnabled Ext_abbreviations try $ do @@ -273,23 +275,23 @@ abbrevKey = do blanklines return $ return mempty -noteMarker :: Parser [Char] ParserState String +noteMarker :: MarkdownParser String noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']') -rawLine :: Parser [Char] ParserState String +rawLine :: MarkdownParser String rawLine = try $ do notFollowedBy blankline notFollowedBy' $ try $ skipNonindentSpaces >> noteMarker optional indentSpaces anyLine -rawLines :: Parser [Char] ParserState String +rawLines :: MarkdownParser String rawLines = do first <- anyLine rest <- many rawLine return $ unlines (first:rest) -noteBlock :: Parser [Char] ParserState (F Blocks) +noteBlock :: MarkdownParser (F Blocks) noteBlock = try $ do skipNonindentSpaces ref <- noteMarker @@ -309,10 +311,10 @@ noteBlock = try $ do -- parsing blocks -- -parseBlocks :: Parser [Char] ParserState (F Blocks) +parseBlocks :: MarkdownParser (F Blocks) parseBlocks = mconcat <$> manyTill block eof -block :: Parser [Char] ParserState (F Blocks) +block :: MarkdownParser (F Blocks) block = choice [ codeBlockFenced , codeBlockBackticks , guardEnabled Ext_latex_macros *> (mempty <$ macro) @@ -338,21 +340,27 @@ block = choice [ codeBlockFenced -- header blocks -- -header :: Parser [Char] ParserState (F Blocks) +header :: MarkdownParser (F Blocks) header = setextHeader <|> atxHeader <?> "header" -atxHeader :: Parser [Char] ParserState (F Blocks) +addToHeaderList :: F Inlines -> MarkdownParser () +addToHeaderList text = + updateState $ \st -> st{ stateHeaders = B.toList (runF text defaultParserState) + : stateHeaders st } + +atxHeader :: MarkdownParser (F Blocks) atxHeader = try $ do level <- many1 (char '#') >>= return . length notFollowedBy (char '.' <|> char ')') -- this would be a list skipSpaces text <- trimInlinesF . mconcat <$> manyTill inline atxClosing + addToHeaderList text return $ B.header level <$> text atxClosing :: Parser [Char] st String atxClosing = try $ skipMany (char '#') >> blanklines -setextHeader :: Parser [Char] ParserState (F Blocks) +setextHeader :: MarkdownParser (F Blocks) setextHeader = try $ do -- This lookahead prevents us from wasting time parsing Inlines -- unless necessary -- it gives a significant performance boost. @@ -362,6 +370,7 @@ setextHeader = try $ do many (char underlineChar) blanklines let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1 + addToHeaderList text return $ B.header level <$> text -- @@ -382,7 +391,7 @@ hrule = try $ do -- code blocks -- -indentedLine :: Parser [Char] ParserState String +indentedLine :: MarkdownParser String indentedLine = indentSpaces >> manyTill anyChar newline >>= return . (++ "\n") blockDelimiter :: (Char -> Bool) @@ -437,7 +446,7 @@ keyValAttr = try $ do <|> many nonspaceChar return ("",[],[(key,val)]) -codeBlockFenced :: Parser [Char] ParserState (F Blocks) +codeBlockFenced :: MarkdownParser (F Blocks) codeBlockFenced = try $ do guardEnabled Ext_fenced_code_blocks size <- blockDelimiter (=='~') Nothing @@ -449,7 +458,7 @@ codeBlockFenced = try $ do blanklines return $ return $ B.codeBlockWith attr $ intercalate "\n" contents -codeBlockBackticks :: Parser [Char] ParserState (F Blocks) +codeBlockBackticks :: MarkdownParser (F Blocks) codeBlockBackticks = try $ do guardEnabled Ext_backtick_code_blocks blockDelimiter (=='`') (Just 3) @@ -460,7 +469,7 @@ codeBlockBackticks = try $ do blanklines return $ return $ B.codeBlockWith ("",[cls],[]) $ intercalate "\n" contents -codeBlockIndented :: Parser [Char] ParserState (F Blocks) +codeBlockIndented :: MarkdownParser (F Blocks) codeBlockIndented = do contents <- many1 (indentedLine <|> try (do b <- blanklines @@ -471,7 +480,7 @@ codeBlockIndented = do return $ return $ B.codeBlockWith ("", classes, []) $ stripTrailingNewlines $ concat contents -lhsCodeBlock :: Parser [Char] ParserState (F Blocks) +lhsCodeBlock :: MarkdownParser (F Blocks) lhsCodeBlock = do guardEnabled Ext_literate_haskell (return . B.codeBlockWith ("",["sourceCode","literate","haskell"],[]) <$> @@ -479,7 +488,7 @@ lhsCodeBlock = do <|> (return . B.codeBlockWith ("",["sourceCode","haskell"],[]) <$> lhsCodeBlockInverseBird) -lhsCodeBlockLaTeX :: Parser [Char] ParserState String +lhsCodeBlockLaTeX :: MarkdownParser String lhsCodeBlockLaTeX = try $ do string "\\begin{code}" manyTill spaceChar newline @@ -487,13 +496,13 @@ lhsCodeBlockLaTeX = try $ do blanklines return $ stripTrailingNewlines contents -lhsCodeBlockBird :: Parser [Char] ParserState String +lhsCodeBlockBird :: MarkdownParser String lhsCodeBlockBird = lhsCodeBlockBirdWith '>' -lhsCodeBlockInverseBird :: Parser [Char] ParserState String +lhsCodeBlockInverseBird :: MarkdownParser String lhsCodeBlockInverseBird = lhsCodeBlockBirdWith '<' -lhsCodeBlockBirdWith :: Char -> Parser [Char] ParserState String +lhsCodeBlockBirdWith :: Char -> MarkdownParser String lhsCodeBlockBirdWith c = try $ do pos <- getPosition when (sourceColumn pos /= 1) $ fail "Not in first column" @@ -516,10 +525,10 @@ birdTrackLine c = try $ do -- block quotes -- -emailBlockQuoteStart :: Parser [Char] ParserState Char +emailBlockQuoteStart :: MarkdownParser Char emailBlockQuoteStart = try $ skipNonindentSpaces >> char '>' >>~ optional (char ' ') -emailBlockQuote :: Parser [Char] ParserState [String] +emailBlockQuote :: MarkdownParser [String] emailBlockQuote = try $ do emailBlockQuoteStart let emailLine = many $ nonEndline <|> try @@ -533,7 +542,7 @@ emailBlockQuote = try $ do optional blanklines return raw -blockQuote :: Parser [Char] ParserState (F Blocks) +blockQuote :: MarkdownParser (F Blocks) blockQuote = do raw <- emailBlockQuote -- parse the extracted block, which may contain various block elements: @@ -544,7 +553,7 @@ blockQuote = do -- list blocks -- -bulletListStart :: Parser [Char] ParserState () +bulletListStart :: MarkdownParser () bulletListStart = try $ do optional newline -- if preceded by a Plain block in a list context skipNonindentSpaces @@ -553,7 +562,7 @@ bulletListStart = try $ do spaceChar skipSpaces -anyOrderedListStart :: Parser [Char] ParserState (Int, ListNumberStyle, ListNumberDelim) +anyOrderedListStart :: MarkdownParser (Int, ListNumberStyle, ListNumberDelim) anyOrderedListStart = try $ do optional newline -- if preceded by a Plain block in a list context skipNonindentSpaces @@ -572,11 +581,11 @@ anyOrderedListStart = try $ do skipSpaces return (num, style, delim) -listStart :: Parser [Char] ParserState () +listStart :: MarkdownParser () listStart = bulletListStart <|> (anyOrderedListStart >> return ()) -- parse a line of a list item (start = parser for beginning of list item) -listLine :: Parser [Char] ParserState String +listLine :: MarkdownParser String listLine = try $ do notFollowedBy blankline notFollowedBy' (do indentSpaces @@ -586,8 +595,8 @@ listLine = try $ do return $ concat chunks ++ "\n" -- parse raw text for one list item, excluding start marker and continuations -rawListItem :: Parser [Char] ParserState a - -> Parser [Char] ParserState String +rawListItem :: MarkdownParser a + -> MarkdownParser String rawListItem start = try $ do start first <- listLine @@ -598,14 +607,14 @@ rawListItem start = try $ do -- continuation of a list item - indented and separated by blankline -- or (in compact lists) endline. -- note: nested lists are parsed as continuations -listContinuation :: Parser [Char] ParserState String +listContinuation :: MarkdownParser String listContinuation = try $ do lookAhead indentSpaces result <- many1 listContinuationLine blanks <- many blankline return $ concat result ++ blanks -listContinuationLine :: Parser [Char] ParserState String +listContinuationLine :: MarkdownParser String listContinuationLine = try $ do notFollowedBy blankline notFollowedBy' listStart @@ -613,8 +622,8 @@ listContinuationLine = try $ do result <- manyTill anyChar newline return $ result ++ "\n" -listItem :: Parser [Char] ParserState a - -> Parser [Char] ParserState (F Blocks) +listItem :: MarkdownParser a + -> MarkdownParser (F Blocks) listItem start = try $ do first <- rawListItem start continuations <- many listContinuation @@ -630,7 +639,7 @@ listItem start = try $ do updateState (\st -> st {stateParserContext = oldContext}) return contents -orderedList :: Parser [Char] ParserState (F Blocks) +orderedList :: MarkdownParser (F Blocks) orderedList = try $ do (start, style, delim) <- lookAhead anyOrderedListStart unless ((style == DefaultStyle || style == Decimal || style == Example) && @@ -645,14 +654,14 @@ orderedList = try $ do start' <- option 1 $ guardEnabled Ext_startnum >> return start return $ B.orderedListWith (start', style, delim) <$> fmap compactify' items -bulletList :: Parser [Char] ParserState (F Blocks) +bulletList :: MarkdownParser (F Blocks) bulletList = do items <- fmap sequence $ many1 $ listItem bulletListStart return $ B.bulletList <$> fmap compactify' items -- definition lists -defListMarker :: Parser [Char] ParserState () +defListMarker :: MarkdownParser () defListMarker = do sps <- nonindentSpaces char ':' <|> char '~' @@ -663,7 +672,7 @@ defListMarker = do else mzero return () -definitionListItem :: Parser [Char] ParserState (F (Inlines, [Blocks])) +definitionListItem :: MarkdownParser (F (Inlines, [Blocks])) definitionListItem = try $ do guardEnabled Ext_definition_lists -- first, see if this has any chance of being a definition list: @@ -678,7 +687,7 @@ definitionListItem = try $ do updateState (\st -> st {stateParserContext = oldContext}) return $ liftM2 (,) term (sequence contents) -defRawBlock :: Parser [Char] ParserState String +defRawBlock :: MarkdownParser String defRawBlock = try $ do defListMarker firstline <- anyLine @@ -690,7 +699,7 @@ defRawBlock = try $ do return $ unlines lns ++ trl return $ firstline ++ "\n" ++ unlines rawlines ++ trailing ++ cont -definitionList :: Parser [Char] ParserState (F Blocks) +definitionList :: MarkdownParser (F Blocks) definitionList = do items <- fmap sequence $ many1 definitionListItem return $ B.definitionList <$> fmap compactify'DL items @@ -723,7 +732,7 @@ isHtmlOrBlank (LineBreak) = True isHtmlOrBlank _ = False -} -para :: Parser [Char] ParserState (F Blocks) +para :: MarkdownParser (F Blocks) para = try $ do result <- trimInlinesF . mconcat <$> many1 inline -- TODO remove this if not really needed? and remove isHtmlOrBlank @@ -735,34 +744,34 @@ para = try $ do <|> (guardDisabled Ext_blank_before_header >> lookAhead header) return $ B.para <$> result -plain :: Parser [Char] ParserState (F Blocks) +plain :: MarkdownParser (F Blocks) plain = fmap B.plain . trimInlinesF . mconcat <$> many1 inline <* spaces -- -- raw html -- -htmlElement :: Parser [Char] ParserState String +htmlElement :: MarkdownParser String htmlElement = strictHtmlBlock <|> liftM snd (htmlTag isBlockTag) -htmlBlock :: Parser [Char] ParserState (F Blocks) +htmlBlock :: MarkdownParser (F Blocks) htmlBlock = do guardEnabled Ext_raw_html res <- (guardEnabled Ext_markdown_in_html_blocks >> rawHtmlBlocks) <|> htmlBlock' return $ return $ B.rawBlock "html" res -htmlBlock' :: Parser [Char] ParserState String +htmlBlock' :: MarkdownParser String htmlBlock' = try $ do first <- htmlElement finalSpace <- many spaceChar finalNewlines <- many newline return $ first ++ finalSpace ++ finalNewlines -strictHtmlBlock :: Parser [Char] ParserState String +strictHtmlBlock :: MarkdownParser String strictHtmlBlock = htmlInBalanced (not . isInlineTag) -rawVerbatimBlock :: Parser [Char] ParserState String +rawVerbatimBlock :: MarkdownParser String rawVerbatimBlock = try $ do (TagOpen tag _, open) <- htmlTag (tagOpen (\t -> t == "pre" || t == "style" || t == "script") @@ -770,7 +779,7 @@ rawVerbatimBlock = try $ do contents <- manyTill anyChar (htmlTag (~== TagClose tag)) return $ open ++ contents ++ renderTags [TagClose tag] -rawTeXBlock :: Parser [Char] ParserState (F Blocks) +rawTeXBlock :: MarkdownParser (F Blocks) rawTeXBlock = do guardEnabled Ext_raw_tex result <- (B.rawBlock "latex" <$> rawLaTeXBlock) @@ -778,7 +787,7 @@ rawTeXBlock = do spaces return $ return result -rawHtmlBlocks :: Parser [Char] ParserState String +rawHtmlBlocks :: MarkdownParser String rawHtmlBlocks = do htmlBlocks <- many1 $ try $ do s <- rawVerbatimBlock <|> try ( @@ -832,7 +841,7 @@ dashedLine ch = do -- Parse a table header with dashed lines of '-' preceded by -- one (or zero) line of text. simpleTableHeader :: Bool -- ^ Headerless table - -> Parser [Char] ParserState (F [Blocks], [Alignment], [Int]) + -> MarkdownParser (F [Blocks], [Alignment], [Int]) simpleTableHeader headless = try $ do rawContent <- if headless then return "" @@ -876,16 +885,16 @@ alignType strLst len = (False, False) -> AlignDefault -- Parse a table footer - dashed lines followed by blank line. -tableFooter :: Parser [Char] ParserState String +tableFooter :: MarkdownParser String tableFooter = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> blanklines -- Parse a table separator - dashed line. -tableSep :: Parser [Char] ParserState Char +tableSep :: MarkdownParser Char tableSep = try $ skipNonindentSpaces >> many1 (dashedLine '-') >> char '\n' -- Parse a raw line and split it into chunks by indices. rawTableLine :: [Int] - -> Parser [Char] ParserState [String] + -> MarkdownParser [String] rawTableLine indices = do notFollowedBy' (blanklines <|> tableFooter) line <- many1Till anyChar newline @@ -894,13 +903,13 @@ rawTableLine indices = do -- Parse a table line and return a list of lists of blocks (columns). tableLine :: [Int] - -> Parser [Char] ParserState (F [Blocks]) + -> MarkdownParser (F [Blocks]) tableLine indices = rawTableLine indices >>= fmap sequence . mapM (parseFromString (mconcat <$> many plain)) -- Parse a multiline table row and return a list of blocks (columns). multilineRow :: [Int] - -> Parser [Char] ParserState (F [Blocks]) + -> MarkdownParser (F [Blocks]) multilineRow indices = do colLines <- many1 (rawTableLine indices) let cols = map unlines $ transpose colLines @@ -908,7 +917,7 @@ multilineRow indices = do -- Parses a table caption: inlines beginning with 'Table:' -- and followed by blank lines. -tableCaption :: Parser [Char] ParserState (F Inlines) +tableCaption :: MarkdownParser (F Inlines) tableCaption = try $ do guardEnabled Ext_table_captions skipNonindentSpaces @@ -917,7 +926,7 @@ tableCaption = try $ do -- Parse a simple table with '---' header and one line per row. simpleTable :: Bool -- ^ Headerless table - -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]]) + -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) simpleTable headless = do (aligns, _widths, heads', lines') <- tableWith (simpleTableHeader headless) tableLine @@ -931,12 +940,12 @@ simpleTable headless = do -- which may be multiline, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). multilineTable :: Bool -- ^ Headerless table - -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]]) + -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) multilineTable headless = tableWith (multilineTableHeader headless) multilineRow blanklines tableFooter multilineTableHeader :: Bool -- ^ Headerless table - -> Parser [Char] ParserState (F [Blocks], [Alignment], [Int]) + -> MarkdownParser (F [Blocks], [Alignment], [Int]) multilineTableHeader headless = try $ do if headless then return '\n' @@ -970,7 +979,7 @@ multilineTableHeader headless = try $ do -- which may be grid, separated by blank lines, and -- ending with a footer (dashed line followed by blank line). gridTable :: Bool -- ^ Headerless table - -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]]) + -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) gridTable headless = tableWith (gridTableHeader headless) gridTableRow (gridTableSep '-') gridTableFooter @@ -993,12 +1002,12 @@ removeFinalBar = reverse . dropWhile (`elem` " \t") . dropWhile (=='|') . reverse -- | Separator between rows of grid table. -gridTableSep :: Char -> Parser [Char] ParserState Char +gridTableSep :: Char -> MarkdownParser Char gridTableSep ch = try $ gridDashedLines ch >> return '\n' -- | Parse header for a grid table. gridTableHeader :: Bool -- ^ Headerless table - -> Parser [Char] ParserState (F [Blocks], [Alignment], [Int]) + -> MarkdownParser (F [Blocks], [Alignment], [Int]) gridTableHeader headless = try $ do optional blanklines dashes <- gridDashedLines '-' @@ -1022,7 +1031,7 @@ gridTableHeader headless = try $ do map trim rawHeads return (heads, aligns, indices) -gridTableRawLine :: [Int] -> Parser [Char] ParserState [String] +gridTableRawLine :: [Int] -> MarkdownParser [String] gridTableRawLine indices = do char '|' line <- many1Till anyChar newline @@ -1030,7 +1039,7 @@ gridTableRawLine indices = do -- | Parse row of grid table. gridTableRow :: [Int] - -> Parser [Char] ParserState (F [Blocks]) + -> MarkdownParser (F [Blocks]) gridTableRow indices = do colLines <- many1 (gridTableRawLine indices) let cols = map ((++ "\n") . unlines . removeOneLeadingSpace) $ @@ -1046,10 +1055,10 @@ removeOneLeadingSpace xs = startsWithSpace (y:_) = y == ' ' -- | Parse footer for a grid table. -gridTableFooter :: Parser [Char] ParserState [Char] +gridTableFooter :: MarkdownParser [Char] gridTableFooter = blanklines -pipeTable :: Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]]) +pipeTable :: MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) pipeTable = try $ do let pipeBreak = nonindentSpaces *> optional (char '|') *> pipeTableHeaderPart `sepBy1` sepPipe <* @@ -1064,13 +1073,13 @@ pipeTable = try $ do let widths = replicate (length aligns) 0.0 return $ (aligns, widths, heads, lines') -sepPipe :: Parser [Char] ParserState () +sepPipe :: MarkdownParser () sepPipe = try $ do char '|' <|> char '+' notFollowedBy blankline -- parse a row, also returning probable alignments for org-table cells -pipeTableRow :: Parser [Char] ParserState (F [Blocks]) +pipeTableRow :: MarkdownParser (F [Blocks]) pipeTableRow = do nonindentSpaces optional (char '|') @@ -1109,11 +1118,11 @@ scanForPipe = lookAhead (manyTill (satisfy (/='\n')) (char '|')) >> return () -- | Parse a table using 'headerParser', 'rowParser', -- 'lineParser', and 'footerParser'. Variant of the version in -- Text.Pandoc.Parsing. -tableWith :: Parser [Char] ParserState (F [Blocks], [Alignment], [Int]) - -> ([Int] -> Parser [Char] ParserState (F [Blocks])) - -> Parser [Char] ParserState sep - -> Parser [Char] ParserState end - -> Parser [Char] ParserState ([Alignment], [Double], F [Blocks], F [[Blocks]]) +tableWith :: MarkdownParser (F [Blocks], [Alignment], [Int]) + -> ([Int] -> MarkdownParser (F [Blocks])) + -> MarkdownParser sep + -> MarkdownParser end + -> MarkdownParser ([Alignment], [Double], F [Blocks], F [[Blocks]]) tableWith headerParser rowParser lineParser footerParser = try $ do (heads, aligns, indices) <- headerParser lines' <- fmap sequence $ rowParser indices `sepEndBy1` lineParser @@ -1124,7 +1133,7 @@ tableWith headerParser rowParser lineParser footerParser = try $ do else widthsFromIndices numColumns indices return $ (aligns, widths, heads, lines') -table :: Parser [Char] ParserState (F Blocks) +table :: MarkdownParser (F Blocks) table = try $ do frontCaption <- option Nothing (Just <$> tableCaption) (aligns, widths, heads, lns) <- @@ -1151,7 +1160,7 @@ table = try $ do -- inline -- -inline :: Parser [Char] ParserState (F Inlines) +inline :: MarkdownParser (F Inlines) inline = choice [ whitespace , bareURL , str @@ -1180,13 +1189,13 @@ inline = choice [ whitespace , ltSign ] <?> "inline" -escapedChar' :: Parser [Char] ParserState Char +escapedChar' :: MarkdownParser Char escapedChar' = try $ do char '\\' (guardEnabled Ext_all_symbols_escapable >> satisfy (not . isAlphaNum)) <|> oneOf "\\`*_{}[]()>#+-.!~" -escapedChar :: Parser [Char] ParserState (F Inlines) +escapedChar :: MarkdownParser (F Inlines) escapedChar = do result <- escapedChar' case result of @@ -1195,7 +1204,7 @@ escapedChar = do return (return B.linebreak) -- "\[newline]" is a linebreak _ -> return $ return $ B.str [result] -ltSign :: Parser [Char] ParserState (F Inlines) +ltSign :: MarkdownParser (F Inlines) ltSign = do guardDisabled Ext_raw_html <|> guardDisabled Ext_markdown_in_html_blocks @@ -1203,7 +1212,7 @@ ltSign = do char '<' return $ return $ B.str "<" -exampleRef :: Parser [Char] ParserState (F Inlines) +exampleRef :: MarkdownParser (F Inlines) exampleRef = try $ do guardEnabled Ext_example_lists char '@' @@ -1214,7 +1223,7 @@ exampleRef = try $ do Just n -> B.str (show n) Nothing -> B.str ('@':lab) -symbol :: Parser [Char] ParserState (F Inlines) +symbol :: MarkdownParser (F Inlines) symbol = do result <- noneOf "<\\\n\t " <|> try (do lookAhead $ char '\\' @@ -1223,7 +1232,7 @@ symbol = do return $ return $ B.str [result] -- parses inline code, between n `s and n `s -code :: Parser [Char] ParserState (F Inlines) +code :: MarkdownParser (F Inlines) code = try $ do starts <- many1 (char '`') skipSpaces @@ -1235,11 +1244,11 @@ code = try $ do optional whitespace >> attributes) return $ return $ B.codeWith attr $ trim $ concat result -math :: Parser [Char] ParserState (F Inlines) +math :: MarkdownParser (F Inlines) math = (return . B.displayMath <$> (mathDisplay >>= applyMacros')) <|> (return . B.math <$> (mathInline >>= applyMacros')) -mathDisplay :: Parser [Char] ParserState String +mathDisplay :: MarkdownParser String mathDisplay = (guardEnabled Ext_tex_math_dollars >> mathDisplayWith "$$" "$$") <|> (guardEnabled Ext_tex_math_single_backslash >> @@ -1247,12 +1256,12 @@ mathDisplay = <|> (guardEnabled Ext_tex_math_double_backslash >> mathDisplayWith "\\\\[" "\\\\]") -mathDisplayWith :: String -> String -> Parser [Char] ParserState String +mathDisplayWith :: String -> String -> MarkdownParser String mathDisplayWith op cl = try $ do string op many1Till (noneOf "\n" <|> (newline >>~ notFollowedBy' blankline)) (try $ string cl) -mathInline :: Parser [Char] ParserState String +mathInline :: MarkdownParser String mathInline = (guardEnabled Ext_tex_math_dollars >> mathInlineWith "$" "$") <|> (guardEnabled Ext_tex_math_single_backslash >> @@ -1260,7 +1269,7 @@ mathInline = <|> (guardEnabled Ext_tex_math_double_backslash >> mathInlineWith "\\\\(" "\\\\)") -mathInlineWith :: String -> String -> Parser [Char] ParserState String +mathInlineWith :: String -> String -> MarkdownParser String mathInlineWith op cl = try $ do string op notFollowedBy space @@ -1283,15 +1292,15 @@ fours = try $ do -- | Parses a list of inlines between start and end delimiters. inlinesBetween :: (Show b) - => Parser [Char] ParserState a - -> Parser [Char] ParserState b - -> Parser [Char] ParserState (F Inlines) + => MarkdownParser a + -> MarkdownParser b + -> MarkdownParser (F Inlines) inlinesBetween start end = (trimInlinesF . mconcat) <$> try (start >> many1Till inner end) where inner = innerSpace <|> (notFollowedBy' (() <$ whitespace) >> inline) innerSpace = try $ whitespace >>~ notFollowedBy' end -emph :: Parser [Char] ParserState (F Inlines) +emph :: MarkdownParser (F Inlines) emph = fmap B.emph <$> nested (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd) where starStart = char '*' >> lookAhead nonspaceChar @@ -1299,7 +1308,7 @@ emph = fmap B.emph <$> nested ulStart = char '_' >> lookAhead nonspaceChar ulEnd = notFollowedBy' (() <$ strong) >> char '_' -strong :: Parser [Char] ParserState (F Inlines) +strong :: MarkdownParser (F Inlines) strong = fmap B.strong <$> nested (inlinesBetween starStart starEnd <|> inlinesBetween ulStart ulEnd) where starStart = string "**" >> lookAhead nonspaceChar @@ -1307,26 +1316,26 @@ strong = fmap B.strong <$> nested ulStart = string "__" >> lookAhead nonspaceChar ulEnd = try $ string "__" -strikeout :: Parser [Char] ParserState (F Inlines) +strikeout :: MarkdownParser (F Inlines) strikeout = fmap B.strikeout <$> (guardEnabled Ext_strikeout >> inlinesBetween strikeStart strikeEnd) where strikeStart = string "~~" >> lookAhead nonspaceChar >> notFollowedBy (char '~') strikeEnd = try $ string "~~" -superscript :: Parser [Char] ParserState (F Inlines) +superscript :: MarkdownParser (F Inlines) superscript = fmap B.superscript <$> try (do guardEnabled Ext_superscript char '^' mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '^')) -subscript :: Parser [Char] ParserState (F Inlines) +subscript :: MarkdownParser (F Inlines) subscript = fmap B.subscript <$> try (do guardEnabled Ext_subscript char '~' mconcat <$> many1Till (notFollowedBy spaceChar >> inline) (char '~')) -whitespace :: Parser [Char] ParserState (F Inlines) +whitespace :: MarkdownParser (F Inlines) whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace" where lb = spaceChar >> skipMany spaceChar >> option B.space (endline >> return B.linebreak) regsp = skipMany spaceChar >> return B.space @@ -1334,7 +1343,7 @@ whitespace = spaceChar >> return <$> (lb <|> regsp) <?> "whitespace" nonEndline :: Parser [Char] st Char nonEndline = satisfy (/='\n') -str :: Parser [Char] ParserState (F Inlines) +str :: MarkdownParser (F Inlines) str = do isSmart <- readerSmart . stateOptions <$> getState a <- alphaNum @@ -1374,7 +1383,7 @@ likelyAbbrev x = in map snd $ filter (\(y,_) -> y == x) abbrPairs -- an endline character that can be treated as a space, not a structural break -endline :: Parser [Char] ParserState (F Inlines) +endline :: MarkdownParser (F Inlines) endline = try $ do newline notFollowedBy blankline @@ -1393,19 +1402,19 @@ endline = try $ do -- -- a reference label for a link -reference :: Parser [Char] ParserState (F Inlines, String) +reference :: MarkdownParser (F Inlines, String) reference = do notFollowedBy' (string "[^") -- footnote reference withRaw $ trimInlinesF <$> inlinesInBalancedBrackets -- source for a link, with optional title -source :: Parser [Char] ParserState (String, String) +source :: MarkdownParser (String, String) source = (try $ charsInBalanced '(' ')' litChar >>= parseFromString source') <|> -- the following is needed for cases like: [ref](/url(a). (enclosed (char '(') (char ')') litChar >>= parseFromString source') -- auxiliary function for source -source' :: Parser [Char] ParserState (String, String) +source' :: MarkdownParser (String, String) source' = do skipSpaces let nl = char '\n' >>~ notFollowedBy blankline @@ -1423,7 +1432,7 @@ source' = do eof return (escapeURI $ trimr src, tit) -linkTitle :: Parser [Char] ParserState String +linkTitle :: MarkdownParser String linkTitle = try $ do (many1 spaceChar >> option '\n' newline) <|> newline skipSpaces @@ -1431,7 +1440,7 @@ linkTitle = try $ do tit <- manyTill litChar (try (char delim >> skipSpaces >> eof)) return $ fromEntities tit -link :: Parser [Char] ParserState (F Inlines) +link :: MarkdownParser (F Inlines) link = try $ do st <- getState guard $ stateAllowLinks st @@ -1441,51 +1450,62 @@ link = try $ do regLink B.link lab <|> referenceLink B.link (lab,raw) regLink :: (String -> String -> Inlines -> Inlines) - -> F Inlines -> Parser [Char] ParserState (F Inlines) + -> F Inlines -> MarkdownParser (F Inlines) regLink constructor lab = try $ do (src, tit) <- source return $ constructor src tit <$> lab -- a link like [this][ref] or [this][] or [this] referenceLink :: (String -> String -> Inlines -> Inlines) - -> (F Inlines, String) -> Parser [Char] ParserState (F Inlines) + -> (F Inlines, String) -> MarkdownParser (F Inlines) referenceLink constructor (lab, raw) = do - raw' <- try (optional (char ' ') >> - optional (newline >> skipSpaces) >> - (snd <$> reference)) <|> return "" - let key = toKey $ if raw' == "[]" || raw' == "" then raw else raw' + (ref,raw') <- try (optional (char ' ') >> + optional (newline >> skipSpaces) >> + reference) <|> return (mempty, "") + let labIsRef = raw' == "" || raw' == "[]" + let key = toKey $ if labIsRef then raw else raw' let dropRB (']':xs) = xs dropRB xs = xs let dropLB ('[':xs) = xs dropLB xs = xs let dropBrackets = reverse . dropRB . reverse . dropLB fallback <- parseFromString (mconcat <$> many inline) $ dropBrackets raw + implicitHeaderRefs <- option False $ + True <$ guardEnabled Ext_implicit_header_references return $ do keys <- asksF stateKeys case M.lookup key keys of - Nothing -> (\x -> B.str "[" <> x <> B.str "]" <> B.str raw') <$> fallback + Nothing -> do + headers <- asksF stateHeaders + let ref' = B.toList $ runF (if labIsRef then lab else ref) + defaultParserState + if implicitHeaderRefs && ref' `elem` headers + then do + let src = '#' : uniqueIdent ref' [] + constructor src "" <$> lab + else (\x -> B.str "[" <> x <> B.str "]" <> B.str raw') <$> fallback Just (src,tit) -> constructor src tit <$> lab -bareURL :: Parser [Char] ParserState (F Inlines) +bareURL :: MarkdownParser (F Inlines) bareURL = try $ do guardEnabled Ext_autolink_urls (orig, src) <- uri <|> emailAddress return $ return $ B.link src "" (B.codeWith ("",["url"],[]) orig) -autoLink :: Parser [Char] ParserState (F Inlines) +autoLink :: MarkdownParser (F Inlines) autoLink = try $ do char '<' (orig, src) <- uri <|> emailAddress char '>' return $ return $ B.link src "" (B.codeWith ("",["url"],[]) orig) -image :: Parser [Char] ParserState (F Inlines) +image :: MarkdownParser (F Inlines) image = try $ do char '!' (lab,raw) <- reference regLink B.image lab <|> referenceLink B.image (lab,raw) -note :: Parser [Char] ParserState (F Inlines) +note :: MarkdownParser (F Inlines) note = try $ do guardEnabled Ext_footnotes ref <- noteMarker @@ -1501,14 +1521,14 @@ note = try $ do let contents' = runF contents st{ stateNotes' = [] } return $ B.note contents' -inlineNote :: Parser [Char] ParserState (F Inlines) +inlineNote :: MarkdownParser (F Inlines) inlineNote = try $ do guardEnabled Ext_inline_notes char '^' contents <- inlinesInBalancedBrackets return $ B.note . B.para <$> contents -rawLaTeXInline' :: Parser [Char] ParserState (F Inlines) +rawLaTeXInline' :: MarkdownParser (F Inlines) rawLaTeXInline' = try $ do guardEnabled Ext_raw_tex lookAhead $ char '\\' >> notFollowedBy' (string "start") -- context env @@ -1532,7 +1552,7 @@ inBrackets parser = do char ']' return $ "[" ++ contents ++ "]" -rawHtmlInline :: Parser [Char] ParserState (F Inlines) +rawHtmlInline :: MarkdownParser (F Inlines) rawHtmlInline = do guardEnabled Ext_raw_html mdInHtml <- option False $ @@ -1544,14 +1564,14 @@ rawHtmlInline = do -- Citations -cite :: Parser [Char] ParserState (F Inlines) +cite :: MarkdownParser (F Inlines) cite = do guardEnabled Ext_citations getOption readerReferences >>= guard . not . null citations <- textualCite <|> normalCite return $ flip B.cite mempty <$> citations -textualCite :: Parser [Char] ParserState (F [Citation]) +textualCite :: MarkdownParser (F [Citation]) textualCite = try $ do (_, key) <- citeKey let first = Citation{ citationId = key @@ -1566,7 +1586,7 @@ textualCite = try $ do Just rest -> return $ (first:) <$> rest Nothing -> option (return [first]) $ bareloc first -bareloc :: Citation -> Parser [Char] ParserState (F [Citation]) +bareloc :: Citation -> MarkdownParser (F [Citation]) bareloc c = try $ do spnl char '[' @@ -1579,7 +1599,7 @@ bareloc c = try $ do rest' <- rest return $ c{ citationSuffix = B.toList suff' } : rest' -normalCite :: Parser [Char] ParserState (F [Citation]) +normalCite :: MarkdownParser (F [Citation]) normalCite = try $ do char '[' spnl @@ -1588,7 +1608,7 @@ normalCite = try $ do char ']' return citations -citeKey :: Parser [Char] ParserState (Bool, String) +citeKey :: MarkdownParser (Bool, String) citeKey = try $ do suppress_author <- option False (char '-' >> return True) char '@' @@ -1600,7 +1620,7 @@ citeKey = try $ do guard $ key `elem` citations' return (suppress_author, key) -suffix :: Parser [Char] ParserState (F Inlines) +suffix :: MarkdownParser (F Inlines) suffix = try $ do hasSpace <- option False (notFollowedBy nonspaceChar >> return True) spnl @@ -1609,14 +1629,14 @@ suffix = try $ do then (B.space <>) <$> rest else rest -prefix :: Parser [Char] ParserState (F Inlines) +prefix :: MarkdownParser (F Inlines) prefix = trimInlinesF . mconcat <$> manyTill inline (char ']' <|> liftM (const ']') (lookAhead citeKey)) -citeList :: Parser [Char] ParserState (F [Citation]) +citeList :: MarkdownParser (F [Citation]) citeList = fmap sequence $ sepBy1 citation (try $ char ';' >> spnl) -citation :: Parser [Char] ParserState (F Citation) +citation :: MarkdownParser (F Citation) citation = try $ do pref <- prefix (suppress_author, key) <- citeKey @@ -1634,20 +1654,20 @@ citation = try $ do , citationHash = 0 } -smart :: Parser [Char] ParserState (F Inlines) +smart :: MarkdownParser (F Inlines) smart = do getOption readerSmart >>= guard doubleQuoted <|> singleQuoted <|> choice (map (return . B.singleton <$>) [apostrophe, dash, ellipses]) -singleQuoted :: Parser [Char] ParserState (F Inlines) +singleQuoted :: MarkdownParser (F Inlines) singleQuoted = try $ do singleQuoteStart withQuoteContext InSingleQuote $ fmap B.singleQuoted . trimInlinesF . mconcat <$> many1Till inline singleQuoteEnd -doubleQuoted :: Parser [Char] ParserState (F Inlines) +doubleQuoted :: MarkdownParser (F Inlines) doubleQuoted = try $ do doubleQuoteStart withQuoteContext InDoubleQuote $ diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index d8a9c6b81..6222bd810 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -281,7 +281,9 @@ elementToHtml slideLevel opts (Sec level num id' title' elements) = do then filter isSec elements else elements let header'' = if (writerSectionDivs opts || - writerSlideVariant opts == S5Slides || slide) + writerSlideVariant opts == S5Slides || + slide || + not (isEnabled Ext_header_identifiers opts)) then header' else header' ! prefixedId opts id' let inNl x = mconcat $ nl opts : intersperse (nl opts) x ++ [nl opts] |