diff options
-rw-r--r-- | README | 3 | ||||
-rw-r--r-- | man/man1/html2markdown.1 | 2 | ||||
-rw-r--r-- | man/man1/pandoc.1 | 3 | ||||
-rw-r--r-- | src/Main.hs | 21 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/HTML.hs | 9 | ||||
-rw-r--r-- | src/Text/Pandoc/Shared.hs | 2 |
6 files changed, 31 insertions, 9 deletions
@@ -241,6 +241,9 @@ preserved, rather than converted to spaces (the default). `--strict` specifies that strict markdown syntax is to be used, without pandoc's usual extensions and variants (described below). +`--inline-links` causes links in HTML to be parsed as inline links, rather +than reference links. + `-R` or `--parse-raw` causes the HTML and LaTeX readers to parse HTML codes and LaTeX environments that it can't translate as raw HTML or LaTeX. Raw HTML can be printed in markdown, reStructuredText, HTML, diff --git a/man/man1/html2markdown.1 b/man/man1/html2markdown.1 index 78c27808e..3ffda7a97 100644 --- a/man/man1/html2markdown.1 +++ b/man/man1/html2markdown.1 @@ -32,6 +32,8 @@ Write output to \fIFILE\fR instead of STDOUT. .B \-\-strict Use strict markdown syntax, with no extensions or variants. .TP +.B \-\-inline\-links +Parse links in HTML as inline links, rather than reference links. .TP .B \-R, \-\-parse-raw Parse untranslatable HTML codes as raw HTML. diff --git a/man/man1/pandoc.1 b/man/man1/pandoc.1 index 8c3c62e15..8a0012a3d 100644 --- a/man/man1/pandoc.1 +++ b/man/man1/pandoc.1 @@ -105,6 +105,9 @@ Specify tab stop (default is 4). .B \-\-strict Use strict markdown syntax, with no extensions or variants. .TP +.B \-\-inline\-links +Parse links in HTML as inline links, rather than reference links. +.TP .B \-R, \-\-parse-raw Parse untranslatable HTML codes and LaTeX environments as raw HTML or LaTeX, instead of ignoring them. diff --git a/src/Main.hs b/src/Main.hs index 275a89f0d..0bb246fa5 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -118,6 +118,7 @@ data Opt = Opt , optDumpArgs :: Bool -- ^ Output command-line arguments , optIgnoreArgs :: Bool -- ^ Ignore command-line arguments , optStrict :: Bool -- ^ Use strict markdown syntax + , optInlineLinks :: Bool -- ^ Use inline links in parsing HTML } -- | Defaults for command-line options. @@ -143,6 +144,7 @@ defaultOpts = Opt , optDumpArgs = False , optIgnoreArgs = False , optStrict = False + , optInlineLinks = False } -- | A list of functions, each transforming the options data structure @@ -188,6 +190,11 @@ options = (\opt -> return opt { optStrict = True } )) "" -- "Use strict markdown syntax with no extensions" + , Option "" ["inline-links"] + (NoArg + (\opt -> return opt { optInlineLinks = True } )) + "" -- "Use inline links in parsing HTML" + , Option "R" ["parse-raw"] (NoArg (\opt -> return opt { optParseRaw = True })) @@ -398,6 +405,7 @@ main = do , optDumpArgs = dumpArgs , optIgnoreArgs = ignoreArgs , optStrict = strict + , optInlineLinks = inlineLinks } = opts if dumpArgs @@ -440,12 +448,13 @@ main = do let removeCRs str = filter (/= '\r') str -- remove DOS-style line endings let filter = tabFilter . addBlank . removeCRs let startParserState = - defaultParserState { stateParseRaw = parseRaw, - stateTabStop = tabStop, - stateStandalone = standalone && (not strict), - stateSmart = smart || writerName' == "latex", - stateColumns = columns, - stateStrict = strict } + defaultParserState { stateParseRaw = parseRaw, + stateTabStop = tabStop, + stateStandalone = standalone && (not strict), + stateSmart = smart || writerName' == "latex", + stateColumns = columns, + stateStrict = strict, + stateInlineLinks = inlineLinks } let csslink = if (css == "") then "" else "<link rel=\"stylesheet\" href=\"" ++ css ++ diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 3fcb33698..8ebf02c33 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -449,15 +449,18 @@ extractAttribute name ((attrName, contents):rest) = then Just (decodeEntities contents) else extractAttribute name rest -link = try (do +link = try $ do (tag, attributes) <- htmlTag "a" url <- case (extractAttribute "href" attributes) of Just url -> do {return url} Nothing -> fail "no href" let title = fromMaybe "" (extractAttribute "title" attributes) label <- inlinesTilEnd "a" - ref <- generateReference url title - return (Link (normalizeSpaces label) ref)) + state <- getState + ref <- if stateInlineLinks state + then return (Src url title) + else generateReference url title + return $ Link (normalizeSpaces label) ref image = try (do (tag, attributes) <- htmlTag "img" diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 87718030f..55e093ee7 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -133,6 +133,7 @@ data ParserState = ParserState stateSmart :: Bool, -- ^ Use smart typography stateColumns :: Int, -- ^ Number of columns in -- terminal (used for tables) + stateInlineLinks :: Bool, -- ^ Parse html links as inline stateHeaderTable :: [HeaderType] -- ^ List of header types used, -- in what order (rst only) } @@ -155,6 +156,7 @@ defaultParserState = stateStrict = False, stateSmart = False, stateColumns = 80, + stateInlineLinks = False, stateHeaderTable = [] } -- | Indent string as a block. |