From 2f724aaaa4875a21560870d25c3d212f974c6dde Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Thu, 24 Apr 2014 17:42:01 +0200
Subject: Org reader: Read anchors as empty spans

Anchors (like <<this>>) are parsed as empty spans.
---
 src/Text/Pandoc/Readers/Org.hs | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'src')

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 7a50b1db9..7f1893936 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -740,6 +740,7 @@ inline =
          , linebreak
          , footnote
          , linkOrImage
+         , anchor
          , str
          , endline
          , emph
@@ -886,6 +887,30 @@ isImageFilename filename =
    imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
    protocols = [ "file", "http", "https" ]
 
+-- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
+-- @anchor-id@ set as id.  Legal anchors in org-mode are defined through
+-- @org-target-regexp@, which is fairly liberal.  Since no link is created if
+-- @anchor-id@ contains spaces, we are more restrictive in what is accepted as
+-- an anchor.
+
+anchor :: OrgParser (F Inlines)
+anchor =  try $ pure <$> (B.spanWith <$> attributes <*> pure mempty)
+ where
+       name = string "<<"
+              *> many1 (noneOf "\t\n\r<>\"' ")
+              <* string ">>"
+       attributes = name >>= \n -> return (solidify n, [], [])
+
+-- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'.  This mirrors
+-- the org function @org-export-solidify-link-text@.
+
+solidify :: String -> String
+solidify = map replaceSpecialChar
+ where replaceSpecialChar c
+           | isAlphaNum c    = c
+           | c `elem` "_.-:" = c
+           | otherwise       = '-'
+
 emph      :: OrgParser (F Inlines)
 emph      = fmap B.emph         <$> emphasisBetween '/'
 
-- 
cgit v1.2.3