Markdown reader: Made implicit header references case-insensitive.

Added `stateHeaderKeys` to `ParserState`; this is a `KeyTable` like `stateKeys`, but it only gets consulted if we don't find a match in `stateKeys`, and if `Ext_implicit_header_references` is enabled. Closes #1606.
author: John MacFarlane <jgm@berkeley.edu> 2015-05-13 23:02:54 -0700
committer: John MacFarlane <jgm@berkeley.edu> 2015-05-13 23:12:58 -0700
commit: 24ee1ab4f739d9f89e3d2741d841a99ebaaafabf (patch)
tree: a07826e6f6499c4279a9742c57f2d6c1c16083a2
parent: dc51114320e49945be191cbc3886ad6b4970c72a (diff)
download: pandoc-24ee1ab4f739d9f89e3d2741d841a99ebaaafabf.tar.gz
6 files changed, 45 insertions, 16 deletions
diff --git a/README b/README
index 4225da89e..9afacf637 100644
--- a/README
+++ b/README
@@ -1269,10 +1269,17 @@ If there are multiple headers with identical text, the corresponding
 reference will link to the first one only, and you will need to use explicit
 links to link to the others, as described above.
 
-Unlike regular reference links, these references are case-sensitive.
+Like regular reference links, these references are case-insensitive.
 
-Note:  if you have defined an explicit identifier for a header,
-then implicit references to it will not work.
+Explicit link reference definitions always take priority over
+implicit header references.  So, in the following example, the
+link will point to `bar`, not to `#foo`:
+
+    # Foo
+
+    [foo]: bar
+
+    See [foo]
 
 Block quotations
 ----------------
diff --git a/changelog b/changelog
index 2de22f487..2f00ca093 100644
--- a/changelog
+++ b/changelog
@@ -35,6 +35,8 @@ pandoc (1.14)
 
   * Markdown reader:
 
+    + Reference links with `implicit_header_references` are no longer
+      case-sensitive (#1606).
     + Definition lists:  don't require indent for first line (#2087).
       Previously the body of the definition (after the `:` or `~` marker)
       needed to be in column 4.  This commit relaxes that requirement,
@@ -439,7 +441,10 @@ pandoc (1.14)
     `extra-source-files`.  This should make maintenance of these components
     easier going forward.
 
-  * `Text.Pandoc.Parsing`: Added new `<+?>` combinator (Nikolay Yakimov).
+  * `Text.Pandoc.Parsing`:
+
+    + Added new `<+?>` combinator (Nikolay Yakimov).
+    + Added `stateHeaderKeys` to `ParserState`.
 
   * `make_deb.sh` fixes:
 
diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 91385eae8..82e7e2c33 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -903,7 +903,8 @@ data ParserState = ParserState
       stateAllowLinks      :: Bool,          -- ^ Allow parsing of links
       stateMaxNestingLevel :: Int,           -- ^ Max # of nested Strong/Emph
       stateLastStrPos      :: Maybe SourcePos, -- ^ Position after last str parsed
-      stateKeys            :: KeyTable,      -- ^ List of reference keys (with fallbacks)
+      stateKeys            :: KeyTable,      -- ^ List of reference keys
+      stateHeaderKeys      :: KeyTable,      -- ^ List of implicit header ref keys
       stateSubstitutions   :: SubstTable,    -- ^ List of substitution references
       stateNotes           :: NoteTable,     -- ^ List of notes (raw bodies)
       stateNotes'          :: NoteTable',    -- ^ List of notes (parsed bodies)
@@ -1001,6 +1002,7 @@ defaultParserState =
                   stateMaxNestingLevel = 6,
                   stateLastStrPos      = Nothing,
                   stateKeys            = M.empty,
+                  stateHeaderKeys      = M.empty,
                   stateSubstitutions   = M.empty,
                   stateNotes           = [],
                   stateNotes'          = [],
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index e697febf6..3b5ae0978 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -509,9 +509,12 @@ atxHeader = try $ do
   notFollowedBy $ guardEnabled Ext_fancy_lists >>
                   (char '.' <|> char ')') -- this would be a list
   skipSpaces
-  text <- trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline)
+  (text, raw) <- withRaw $
+          trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline)
   attr <- atxClosing
-  attr' <- registerHeader attr (runF text defaultParserState)
+  attr'@(ident,_,_) <- registerHeader attr (runF text defaultParserState)
+  guardDisabled Ext_implicit_header_references
+    <|> registerImplicitHeader raw ident
   return $ B.headerWith attr' level <$> text
 
 atxClosing :: MarkdownParser Attr
@@ -544,15 +547,24 @@ setextHeader = try $ do
   -- This lookahead prevents us from wasting time parsing Inlines
   -- unless necessary -- it gives a significant performance boost.
   lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline
-  text <- trimInlinesF . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline)
+  (text, raw) <- withRaw $
+       trimInlinesF . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline)
   attr <- setextHeaderEnd
   underlineChar <- oneOf setextHChars
   many (char underlineChar)
   blanklines
   let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1
-  attr' <- registerHeader attr (runF text defaultParserState)
+  attr'@(ident,_,_) <- registerHeader attr (runF text defaultParserState)
+  guardDisabled Ext_implicit_header_references
+    <|> registerImplicitHeader raw ident
   return $ B.headerWith attr' level <$> text
 
+registerImplicitHeader :: String -> String -> MarkdownParser ()
+registerImplicitHeader raw ident = do
+  let key = toKey $ "[" ++ raw ++ "]"
+  updateState (\s -> s { stateHeaderKeys =
+                         M.insert key ('#':ident,"") (stateHeaderKeys s) })
+
 --
 -- hrule block
 --
@@ -1700,7 +1712,7 @@ referenceLink :: (String -> String -> Inlines -> Inlines)
               -> (F Inlines, String) -> MarkdownParser (F Inlines)
 referenceLink constructor (lab, raw) = do
   sp <- (True <$ lookAhead (char ' ')) <|> return False
-  (ref,raw') <- option (mempty, "") $
+  (_,raw') <- option (mempty, "") $
       lookAhead (try (spnl >> normalCite >> return (mempty, "")))
       <|>
       try (spnl >> reference)
@@ -1720,13 +1732,13 @@ referenceLink constructor (lab, raw) = do
   return $ do
     keys <- asksF stateKeys
     case M.lookup key keys of
-       Nothing        -> do
-         headers <- asksF stateHeaders
-         ref' <- if labIsRef then lab else ref
+       Nothing        ->
          if implicitHeaderRefs
-            then case M.lookup ref' headers of
-                   Just ident -> constructor ('#':ident) "" <$> lab
-                   Nothing    -> makeFallback
+            then do
+              headerKeys <- asksF stateHeaderKeys
+              case M.lookup key headerKeys of
+                   Just (src, tit) -> constructor src tit <$> lab
+                   Nothing         -> makeFallback
             else makeFallback
        Just (src,tit) -> constructor src tit <$> lab
 
diff --git a/tests/markdown-reader-more.native b/tests/markdown-reader-more.native
index 8fee4953e..b046d44d5 100644
--- a/tests/markdown-reader-more.native
+++ b/tests/markdown-reader-more.native
@@ -76,6 +76,7 @@
 ,Header 3 ("my-other-header",[],[]) [Str "My",Space,Str "other",Space,Str "header"]
 ,Para [Str "A",Space,Str "link",Space,Str "to",Space,Link [Str "My",Space,Str "header"] ("#my-header-1",""),Str "."]
 ,Para [Str "Another",Space,Str "link",Space,Str "to",Space,Link [Str "it"] ("#my-header-1",""),Str "."]
+,Para [Str "Should",Space,Str "be",Space,Link [Str "case",Space,Str "insensitive"] ("#my-header-1",""),Str "."]
 ,Para [Str "Link",Space,Str "to",Space,Link [Str "Explicit",Space,Str "header",Space,Str "attributes"] ("#foobar",""),Str "."]
 ,Para [Str "But",Space,Str "this",Space,Str "is",Space,Str "not",Space,Str "a",Space,Str "link",Space,Str "to",Space,Link [Str "My",Space,Str "other",Space,Str "header"] ("/foo",""),Str ",",Space,Str "since",Space,Str "the",Space,Str "reference",Space,Str "is",Space,Str "defined."]
 ,Header 2 ("foobar",["baz"],[("key","val")]) [Str "Explicit",Space,Str "header",Space,Str "attributes"]
diff --git a/tests/markdown-reader-more.txt b/tests/markdown-reader-more.txt
index 4a476adf0..4906a2eea 100644
--- a/tests/markdown-reader-more.txt
+++ b/tests/markdown-reader-more.txt
@@ -168,6 +168,8 @@ A link to [My header].
 
 Another link to [it][My header].
 
+Should be [case insensitive][my header].
+
 Link to [Explicit header attributes].
 
 [my other header]: /foo
author	John MacFarlane <jgm@berkeley.edu>	2015-05-13 23:02:54 -0700
committer	John MacFarlane <jgm@berkeley.edu>	2015-05-13 23:12:58 -0700
commit	24ee1ab4f739d9f89e3d2741d841a99ebaaafabf (patch)
tree	a07826e6f6499c4279a9742c57f2d6c1c16083a2
parent	dc51114320e49945be191cbc3886ad6b4970c72a (diff)
download	pandoc-24ee1ab4f739d9f89e3d2741d841a99ebaaafabf.tar.gz