aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2021-05-24 10:17:37 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2021-05-27 10:38:25 -0700
commit834da53058069fe50da510fa86e0807a7ff7868f (patch)
tree791cdc1a5a2e459efac2b68a97285e2a5e737b70
parent0226d2320f5a57475ec260b9d0ad3ad4260ecf38 (diff)
downloadpandoc-834da53058069fe50da510fa86e0807a7ff7868f.tar.gz
Add `rebase_relative_paths` extension.
- Add manual entry for (non-default) extension `rebase_relative_paths`. - Add constructor `Ext_rebase_relative_paths` to `Extensions` in Text.Pandoc.Extensions [API change]. When enabled, this extension rewrites relative image and link paths by prepending the (relative) directory of the containing file. - Make Markdown reader sensitive to the new extension. - Add tests for #3752. Closes #3752. NB. currently the extension applies to markdown and associated readers but not commonmark/gfm.
-rw-r--r--MANUAL.txt28
-rw-r--r--pandoc.cabal4
-rw-r--r--src/Text/Pandoc/Extensions.hs3
-rw-r--r--src/Text/Pandoc/Readers/Markdown.hs36
-rw-r--r--test/command/3752.md35
-rw-r--r--test/command/chap1/spider.pngbin0 -> 63531 bytes
-rw-r--r--test/command/chap1/text.md11
-rw-r--r--test/command/chap2/spider.pngbin0 -> 9861 bytes
-rw-r--r--test/command/chap2/text.md3
9 files changed, 111 insertions, 9 deletions
diff --git a/MANUAL.txt b/MANUAL.txt
index 6dc783e8c..48bf60d6e 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -3755,7 +3755,7 @@ definition:
Note that space between items in a definition list is required.
(A variant that loosens this requirement, but disallows "lazy"
hard wrapping, can be activated with `compact_definition_lists`: see
-[Non-pandoc extensions], below.)
+[Non-default extensions], below.)
[^3]: I have been influenced by the suggestions of [David
Wheeler](https://justatheory.com/2009/02/modest-markdown-proposal/).
@@ -5051,13 +5051,37 @@ author-in-text style inside notes when using a note style.
[finding and editing styles]: https://citationstyles.org/authors/
[CSL locale files]: https://github.com/citation-style-language/locales
-## Non-pandoc extensions
+## Non-default extensions
The following Markdown syntax extensions are not enabled by default
in pandoc, but may be enabled by adding `+EXTENSION` to the format
name, where `EXTENSION` is the name of the extension. Thus, for
example, `markdown+hard_line_breaks` is Markdown with hard line breaks.
+#### Extension: `rebase_relative_paths` ####
+
+Rewrite relative paths for Markdown links and images, depending
+on the path of the file containing the link or image link. For
+each link or image, pandoc will compute the directory of the
+containing file, relative to the working directory, and prepend
+the resulting path to the link or image path.
+
+The use of this extension is best understood by example.
+Suppose you have a a subdirectory for each chapter of a book,
+`chap1`, `chap2`, `chap3`. Each contains a file `text.md` and a
+number of images used in the chapter. You would like to have
+`![image](spider.jpg)` in `chap1/text.md` refer to
+`chap1/spider.jpg` and `![image](spider.jpg)` in `chap2/text.md`
+refer to `chap2/spider.jpg`. To do this, use
+
+ pandoc chap*/*.md -f markdown+rebase_relative_paths
+
+Without this extension, you would have to use
+`![image](chap1/spider.jpg)` in `chap1/text.md` and
+`![image](chap2/spider.jpg)` in `chap2/text.md`. Links with
+relative paths will be rewritten in the same way as images.
+*This option currently only affects Markdown input.*
+
#### Extension: `attributes` ####
Allows attributes to be attached to any inline or block-level
diff --git a/pandoc.cabal b/pandoc.cabal
index c74df3e69..c8343d16e 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -214,6 +214,10 @@ extra-source-files:
test/command/C.txt
test/command/D.txt
test/command/01.csv
+ test/command/chap1/spider.png
+ test/command/chap2/spider.png
+ test/command/chap1/text.md
+ test/command/chap2/text.md
test/command/defaults1.yaml
test/command/defaults2.yaml
test/command/defaults3.yaml
diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs
index 6423d5f56..c4d54c06e 100644
--- a/src/Text/Pandoc/Extensions.hs
+++ b/src/Text/Pandoc/Extensions.hs
@@ -136,6 +136,8 @@ data Extension =
| Ext_raw_html -- ^ Allow raw HTML
| Ext_raw_tex -- ^ Allow raw TeX (other than math)
| Ext_raw_markdown -- ^ Parse markdown in ipynb as raw markdown
+ | Ext_rebase_relative_paths -- ^ Rebase relative image and link paths,
+ -- relative to directory of containing file
| Ext_shortcut_reference_links -- ^ Shortcut reference links
| Ext_simple_tables -- ^ Pandoc-style simple tables
| Ext_smart -- ^ "Smart" quotes, apostrophes, ellipses, dashes
@@ -462,6 +464,7 @@ getAllExtensions f = universalExtensions <> getAll f
, Ext_gutenberg
, Ext_smart
, Ext_literate_haskell
+ , Ext_rebase_relative_paths
]
getAll "markdown_strict" = allMarkdownExtensions
getAll "markdown_phpextra" = allMarkdownExtensions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 34f16ab4e..968c6c165 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -29,7 +29,7 @@ import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.ByteString.Lazy as BL
-import System.FilePath (addExtension, takeExtension)
+import System.FilePath (addExtension, takeExtension, isAbsolute, takeDirectory)
import Text.HTML.TagSoup hiding (Row)
import Text.Pandoc.Builder (Blocks, Inlines)
import qualified Text.Pandoc.Builder as B
@@ -1836,9 +1836,12 @@ regLink :: PandocMonad m
-> MarkdownParser m (F Inlines)
regLink constructor lab = try $ do
(src, tit) <- source
+ rebase <- option False (True <$ guardEnabled Ext_rebase_relative_paths)
+ pos <- getPosition
+ let src' = if rebase then rebasePath pos src else src
attr <- option nullAttr $
guardEnabled Ext_link_attributes >> attributes
- return $ constructor attr src tit <$> lab
+ return $ constructor attr src' tit <$> lab
-- a link like [this][ref] or [this][] or [this]
referenceLink :: PandocMonad m
@@ -1854,6 +1857,8 @@ referenceLink constructor (lab, raw) = do
return (mempty, "")))
<|>
try ((guardDisabled Ext_spaced_reference_links <|> spnl) >> reference)
+ rebase <- option False (True <$ guardEnabled Ext_rebase_relative_paths)
+ pos <- getPosition
when (raw' == "") $ guardEnabled Ext_shortcut_reference_links
let labIsRef = raw' == "" || raw' == "[]"
let key = toKey $ if labIsRef then raw else raw'
@@ -1878,7 +1883,9 @@ referenceLink constructor (lab, raw) = do
Just ((src, tit), _) -> constructor nullAttr src tit <$> lab
Nothing -> makeFallback
else makeFallback
- Just ((src,tit), attr) -> constructor attr src tit <$> lab
+ Just ((src,tit), attr) ->
+ let src' = if rebase then rebasePath pos src else src
+ in constructor attr src' tit <$> lab
dropBrackets :: Text -> Text
dropBrackets = dropRB . dropLB
@@ -1911,15 +1918,30 @@ autoLink = try $ do
return $ return $ B.linkWith attr (src <> escapeURI extra) ""
(B.str $ orig <> extra)
+-- | Rebase a relative path, by adding the (relative) directory
+-- of the containing source position. Absolute links and URLs
+-- are untouched.
+rebasePath :: SourcePos -> Text -> Text
+rebasePath pos path = do
+ let fp = sourceName pos
+ in if isAbsolute (T.unpack path) || isURI path
+ then path
+ else
+ case takeDirectory fp of
+ "" -> path
+ "." -> path
+ d -> T.pack d <> "/" <> path
+
image :: PandocMonad m => MarkdownParser m (F Inlines)
image = try $ do
char '!'
(lab,raw) <- reference
defaultExt <- getOption readerDefaultImageExtension
- let constructor attr' src = case takeExtension (T.unpack src) of
- "" -> B.imageWith attr' (T.pack $ addExtension (T.unpack src)
- $ T.unpack defaultExt)
- _ -> B.imageWith attr' src
+ let constructor attr' src =
+ case takeExtension (T.unpack src) of
+ "" -> B.imageWith attr' (T.pack $ addExtension (T.unpack src)
+ $ T.unpack defaultExt)
+ _ -> B.imageWith attr' src
regLink constructor lab <|> referenceLink constructor (lab,raw)
note :: PandocMonad m => MarkdownParser m (F Inlines)
diff --git a/test/command/3752.md b/test/command/3752.md
new file mode 100644
index 000000000..76d51989b
--- /dev/null
+++ b/test/command/3752.md
@@ -0,0 +1,35 @@
+```
+% pandoc command/chap1/text.md command/chap2/text.md -f markdown+rebase_relative_paths --verbose -t docx | pandoc -f docx -t plain
+^D
+[INFO] Loaded command/chap1/spider.png from ./command/chap1/spider.png
+[INFO] Loaded command/chap1/../../lalune.jpg from ./command/chap1/../../lalune.jpg
+[INFO] Loaded command/chap2/spider.png from ./command/chap2/spider.png
+Chapter one
+
+A spider: [spider]
+
+The moon: [moon]
+
+Link to spider picture.
+
+URL left alone: manual.
+
+Absolute path left alone: absolute.
+
+Chapter two
+
+A spider: [spider]
+```
+
+```
+% pandoc command/chap1/text.md command/chap2/text.md -f markdown+rebase_relative_paths -t html
+^D
+<h1 id="chapter-one">Chapter one</h1>
+<p>A spider: <img src="command/chap1/spider.png" alt="spider" /></p>
+<p>The moon: <img src="command/chap1/../../lalune.jpg" alt="moon" /></p>
+<p>Link to <a href="command/chap1/spider.png">spider picture</a>.</p>
+<p>URL left alone: <a href="https://pandoc.org/MANUAL.html">manual</a>.</p>
+<p>Absolute path left alone: <a href="/foo/bar/baz.png">absolute</a>.</p>
+<h1 id="chapter-two">Chapter two</h1>
+<p>A spider: <img src="command/chap2/spider.png" alt="spider" /></p>
+```
diff --git a/test/command/chap1/spider.png b/test/command/chap1/spider.png
new file mode 100644
index 000000000..7ee9fe339
--- /dev/null
+++ b/test/command/chap1/spider.png
Binary files differ
diff --git a/test/command/chap1/text.md b/test/command/chap1/text.md
new file mode 100644
index 000000000..88b30313d
--- /dev/null
+++ b/test/command/chap1/text.md
@@ -0,0 +1,11 @@
+# Chapter one
+
+A spider: ![spider](spider.png)
+
+The moon: ![moon](../../lalune.jpg)
+
+Link to [spider picture](spider.png).
+
+URL left alone: [manual](https://pandoc.org/MANUAL.html).
+
+Absolute path left alone: [absolute](/foo/bar/baz.png).
diff --git a/test/command/chap2/spider.png b/test/command/chap2/spider.png
new file mode 100644
index 000000000..5377e940b
--- /dev/null
+++ b/test/command/chap2/spider.png
Binary files differ
diff --git a/test/command/chap2/text.md b/test/command/chap2/text.md
new file mode 100644
index 000000000..435a266d7
--- /dev/null
+++ b/test/command/chap2/text.md
@@ -0,0 +1,3 @@
+# Chapter two
+
+A spider: ![spider](spider.png)