diff options
Diffstat (limited to 'src/Text/Pandoc')
-rw-r--r-- | src/Text/Pandoc/PDF.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Parsing.hs | 5 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 9 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Markdown.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/RST.hs | 26 | ||||
-rw-r--r-- | src/Text/Pandoc/SelfContained.hs | 3 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 5 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/CommonMark.hs | 14 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/DokuWiki.hs | 2 | ||||
-rw-r--r-- | src/Text/Pandoc/Writers/HTML.hs | 5 |
10 files changed, 42 insertions, 31 deletions
diff --git a/src/Text/Pandoc/PDF.hs b/src/Text/Pandoc/PDF.hs index 1711c0f36..8f92a3321 100644 --- a/src/Text/Pandoc/PDF.hs +++ b/src/Text/Pandoc/PDF.hs @@ -190,7 +190,7 @@ runTeXProgram verbose program args runNumber numRuns tmpDir source = do let file' = file #endif let programArgs = ["-halt-on-error", "-interaction", "nonstopmode", - "-output-directory", tmpDir', file'] ++ args + "-output-directory", tmpDir'] ++ args ++ [file'] env' <- getEnvironment let sep = searchPathSeparator:[] let texinputs = maybe (tmpDir' ++ sep) ((tmpDir' ++ sep) ++) diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 82e7e2c33..5dc991be2 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -448,13 +448,12 @@ uri :: Stream [Char] m Char => ParserT [Char] st m (String, String) uri = try $ do scheme <- uriScheme char ':' - -- We allow punctuation except at the end, since + -- We allow sentence punctuation except at the end, since -- we don't want the trailing '.' in 'http://google.com.' We want to allow -- http://en.wikipedia.org/wiki/State_of_emergency_(disambiguation) -- as a URL, while NOT picking up the closing paren in -- (http://wikipedia.org). So we include balanced parens in the URL. - let isWordChar c = isAlphaNum c || c == '_' || c == '/' || c == '+' || - not (isAscii c) + let isWordChar c = isAlphaNum c || c `elem` "#$%*+/@\\_-" let wordChar = satisfy isWordChar let percentEscaped = try $ char '%' >> skipMany1 (satisfy isHexDigit) let entity = () <$ characterReference diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index f2f97dbc4..361d64361 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -911,8 +911,15 @@ htmlTag :: Monad m htmlTag f = try $ do lookAhead (char '<') inp <- getInput - let (next : _) = canonicalizeTags $ parseTags inp + let hasTagWarning (TagWarning _:_) = True + hasTagWarning _ = False + let (next : rest) = canonicalizeTags $ parseTagsOptions + parseOptions{ optTagWarning = True } inp guard $ f next + -- we get a TagWarning on things like + -- <www.boe.es/buscar/act.php?id=BOE-A-1996-8930#a66> + -- which should NOT be parsed as an HTML tag, see #2277 + guard $ not $ hasTagWarning rest case next of TagComment s | "<!--" `isPrefixOf` inp -> do diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 3b5ae0978..b8f5dab60 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -1752,12 +1752,14 @@ dropBrackets = reverse . dropRB . reverse . dropLB bareURL :: MarkdownParser (F Inlines) bareURL = try $ do guardEnabled Ext_autolink_bare_uris + getState >>= guard . stateAllowLinks (orig, src) <- uri <|> emailAddress notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a") return $ return $ B.link src "" (B.str orig) autoLink :: MarkdownParser (F Inlines) autoLink = try $ do + getState >>= guard . stateAllowLinks char '<' (orig, src) <- uri <|> emailAddress -- in rare cases, something may remain after the uri parser diff --git a/src/Text/Pandoc/Readers/RST.hs b/src/Text/Pandoc/Readers/RST.hs index 38de77f9f..678eecc52 100644 --- a/src/Text/Pandoc/Readers/RST.hs +++ b/src/Text/Pandoc/Readers/RST.hs @@ -209,7 +209,7 @@ rawFieldListItem minIndent = try $ do fieldListItem :: Int -> RSTParser (Inlines, [Blocks]) fieldListItem minIndent = try $ do (name, raw) <- rawFieldListItem minIndent - let term = B.str name + term <- parseInlineFromString name contents <- parseFromString parseBlocks raw optional blanklines return (term, [contents]) @@ -229,8 +229,7 @@ fieldList = try $ do lineBlock :: RSTParser Blocks lineBlock = try $ do lines' <- lineBlockLines - lines'' <- mapM (parseFromString - (trimInlines . mconcat <$> many inline)) lines' + lines'' <- mapM parseInlineFromString lines' return $ B.para (mconcat $ intersperse B.linebreak lines'') -- @@ -549,39 +548,33 @@ directive' = do "role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields "container" -> parseFromString parseBlocks body' "replace" -> B.para <$> -- consumed by substKey - parseFromString (trimInlines . mconcat <$> many inline) - (trim top) + parseInlineFromString (trim top) "unicode" -> B.para <$> -- consumed by substKey - parseFromString (trimInlines . mconcat <$> many inline) - (trim $ unicodeTransform top) + parseInlineFromString (trim $ unicodeTransform top) "compound" -> parseFromString parseBlocks body' "pull-quote" -> B.blockQuote <$> parseFromString parseBlocks body' "epigraph" -> B.blockQuote <$> parseFromString parseBlocks body' "highlights" -> B.blockQuote <$> parseFromString parseBlocks body' - "rubric" -> B.para . B.strong <$> parseFromString - (trimInlines . mconcat <$> many inline) top + "rubric" -> B.para . B.strong <$> parseInlineFromString top _ | label `elem` ["attention","caution","danger","error","hint", "important","note","tip","warning"] -> do let tit = B.para $ B.strong $ B.str label bod <- parseFromString parseBlocks $ top ++ "\n\n" ++ body' return $ B.blockQuote $ tit <> bod "admonition" -> - do tit <- B.para . B.strong <$> parseFromString - (trimInlines . mconcat <$> many inline) top + do tit <- B.para . B.strong <$> parseInlineFromString top bod <- parseFromString parseBlocks body' return $ B.blockQuote $ tit <> bod "sidebar" -> do let subtit = maybe "" trim $ lookup "subtitle" fields - tit <- B.para . B.strong <$> parseFromString - (trimInlines . mconcat <$> many inline) + tit <- B.para . B.strong <$> parseInlineFromString (trim top ++ if null subtit then "" else (": " ++ subtit)) bod <- parseFromString parseBlocks body' return $ B.blockQuote $ tit <> bod "topic" -> - do tit <- B.para . B.strong <$> parseFromString - (trimInlines . mconcat <$> many inline) top + do tit <- B.para . B.strong <$> parseInlineFromString top bod <- parseFromString parseBlocks body' return $ tit <> bod "default-role" -> mempty <$ updateState (\s -> @@ -962,6 +955,9 @@ inline = choice [ whitespace , escapedChar , symbol ] <?> "inline" +parseInlineFromString :: String -> RSTParser Inlines +parseInlineFromString = parseFromString (trimInlines . mconcat <$> many inline) + hyphens :: RSTParser Inlines hyphens = do result <- many1 (char '-') diff --git a/src/Text/Pandoc/SelfContained.hs b/src/Text/Pandoc/SelfContained.hs index 896e4327a..a77127286 100644 --- a/src/Text/Pandoc/SelfContained.hs +++ b/src/Text/Pandoc/SelfContained.hs @@ -103,8 +103,9 @@ parseCSSUrls :: MediaBag -> Maybe String -> FilePath parseCSSUrls media sourceURL d = B.concat <$> P.many (pCSSWhite <|> pCSSComment <|> pCSSUrl media sourceURL d <|> pCSSOther) +-- Note: some whitespace in CSS is significant, so we can't collapse it! pCSSWhite :: ParsecT ByteString () IO ByteString -pCSSWhite = P.space >> P.spaces >> return B.empty +pCSSWhite = B.singleton <$> P.space <* P.spaces pCSSComment :: ParsecT ByteString () IO ByteString pCSSComment = P.try $ do diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 52e53e394..ef9f66aa7 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -662,12 +662,17 @@ hierarchicalizeWithIds ((Header level attr@(_,classes,_) title'):xs) = do sectionContents' <- hierarchicalizeWithIds sectionContents rest' <- hierarchicalizeWithIds rest return $ Sec level newnum attr title' sectionContents' : rest' +hierarchicalizeWithIds ((Div ("",["references"],[]) + (Header level (ident,classes,kvs) title' : xs)):ys) = + hierarchicalizeWithIds ((Header level (ident,("references":classes),kvs) + title') : (xs ++ ys)) hierarchicalizeWithIds (x:rest) = do rest' <- hierarchicalizeWithIds rest return $ (Blk x) : rest' headerLtEq :: Int -> Block -> Bool headerLtEq level (Header l _ _) = l <= level +headerLtEq level (Div ("",["references"],[]) (Header l _ _ : _)) = l <= level headerLtEq _ _ = False -- | Generate a unique identifier from a list of inlines. diff --git a/src/Text/Pandoc/Writers/CommonMark.hs b/src/Text/Pandoc/Writers/CommonMark.hs index 706b27175..fee36d454 100644 --- a/src/Text/Pandoc/Writers/CommonMark.hs +++ b/src/Text/Pandoc/Writers/CommonMark.hs @@ -76,8 +76,8 @@ blocksToCommonMark opts bs = return $ $ node DOCUMENT (blocksToNodes bs) where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] colwidth = if writerWrapText opts - then writerColumns opts - else 0 + then Just $ writerColumns opts + else Nothing inlinesToCommonMark :: WriterOptions -> [Inline] -> Identity String inlinesToCommonMark opts ils = return $ @@ -85,8 +85,8 @@ inlinesToCommonMark opts ils = return $ $ node PARAGRAPH (inlinesToNodes ils) where cmarkOpts = [optHardBreaks | isEnabled Ext_hard_line_breaks opts] colwidth = if writerWrapText opts - then writerColumns opts - else 0 + then Just $ writerColumns opts + else Nothing blocksToNodes :: [Block] -> [Node] blocksToNodes = foldr blockToNodes [] @@ -144,11 +144,11 @@ inlineToNodes (Strikeout xs) = ((node (INLINE_HTML (T.pack "<s>")) [] : inlinesToNodes xs ++ [node (INLINE_HTML (T.pack "</s>")) []]) ++ ) inlineToNodes (Superscript xs) = - ((node (INLINE_HTML (T.pack "<sub>")) [] : inlinesToNodes xs ++ - [node (INLINE_HTML (T.pack "</sub>")) []]) ++ ) -inlineToNodes (Subscript xs) = ((node (INLINE_HTML (T.pack "<sup>")) [] : inlinesToNodes xs ++ [node (INLINE_HTML (T.pack "</sup>")) []]) ++ ) +inlineToNodes (Subscript xs) = + ((node (INLINE_HTML (T.pack "<sub>")) [] : inlinesToNodes xs ++ + [node (INLINE_HTML (T.pack "</sub>")) []]) ++ ) inlineToNodes (SmallCaps xs) = ((node (INLINE_HTML (T.pack "<span style=\"font-variant:small-caps;\">")) [] : inlinesToNodes xs ++ diff --git a/src/Text/Pandoc/Writers/DokuWiki.hs b/src/Text/Pandoc/Writers/DokuWiki.hs index 7164c6e86..7ebe09db7 100644 --- a/src/Text/Pandoc/Writers/DokuWiki.hs +++ b/src/Text/Pandoc/Writers/DokuWiki.hs @@ -451,7 +451,7 @@ inlineToDokuWiki _ (Code _ str) = inlineToDokuWiki _ (Str str) = return $ escapeString str -inlineToDokuWiki _ (Math _ str) = return $ "<math>" ++ str ++ "</math>" +inlineToDokuWiki _ (Math _ str) = return $ "$" ++ str ++ "$" -- note: str should NOT be escaped inlineToDokuWiki _ (RawInline f str) diff --git a/src/Text/Pandoc/Writers/HTML.hs b/src/Text/Pandoc/Writers/HTML.hs index 8de34ace8..915b193f7 100644 --- a/src/Text/Pandoc/Writers/HTML.hs +++ b/src/Text/Pandoc/Writers/HTML.hs @@ -375,8 +375,8 @@ obfuscateLink opts (renderHtml -> txt) s = (linkText, altText) = if txt == drop 7 s' -- autolink then ("e", name' ++ " at " ++ domain') - else ("'" ++ txt ++ "'", txt ++ " (" ++ name' ++ " at " ++ - domain' ++ ")") + else ("'" ++ obfuscateString txt ++ "'", + txt ++ " (" ++ name' ++ " at " ++ domain' ++ ")") in case meth of ReferenceObfuscation -> -- need to use preEscapedString or &'s are escaped to & in URL @@ -458,6 +458,7 @@ blockToHtml opts (Div attr@(_,classes,_) bs) = do if speakerNotes then case writerSlideVariant opts of RevealJsSlides -> addAttrs opts' attr $ H5.aside $ contents' + DZSlides -> addAttrs opts' attr $ H5.div $ contents' NoSlides -> addAttrs opts' attr $ H.div $ contents' _ -> mempty else addAttrs opts attr $ H.div $ contents' |