From ceeb701c254c6dc4c054e10dd151d9ef6f751ad7 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Wed, 14 May 2014 14:49:30 +0200
Subject: Org reader: support Pandocs citation extension

Citations are defined via the "normal citation" syntax used in markdown,
with the sole difference that newlines are not allowed between "[...]".
This is for consistency, as org-mode generally disallows newlines
between square brackets.

The extension is turned on by default and can be turned off via the
default syntax-extension mechanism, i.e. by specifying "org-citation" as
the input format.
Move `citeKey` from Readers.Markdown into Parsing

The function can be used by other readers, so it is made accessible for
all parsers.
---
 src/Text/Pandoc.hs             |  2 +-
 src/Text/Pandoc/Readers/Org.hs | 55 ++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 54 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index dd5bc18f6..130338f0e 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -275,6 +275,7 @@ getDefaultExtensions "markdown_mmd" = multimarkdownExtensions
 getDefaultExtensions "markdown_github" = githubMarkdownExtensions
 getDefaultExtensions "markdown"        = pandocExtensions
 getDefaultExtensions "plain"           = pandocExtensions
+getDefaultExtensions "org"             = Set.fromList [Ext_citations]
 getDefaultExtensions "textile"         = Set.fromList [Ext_auto_identifiers, Ext_raw_tex]
 getDefaultExtensions _                 = Set.fromList [Ext_auto_identifiers]
 
@@ -319,4 +320,3 @@ readJSON _ = either error id . eitherDecode' . UTF8.fromStringLazy
 
 writeJSON :: WriterOptions -> Pandoc -> String
 writeJSON _ = UTF8.toStringLazy . encode
-
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 5dbcaee98..86dda2732 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -869,6 +869,7 @@ inline :: OrgParser (F Inlines)
 inline =
   choice [ whitespace
          , linebreak
+         , cite
          , footnote
          , linkOrImage
          , anchor
@@ -933,6 +934,51 @@ endline = try $ do
   updateLastPreCharPos
   return . return $ B.space
 
+cite :: OrgParser (F Inlines)
+cite = try $ do
+  guardEnabled Ext_citations
+  (cs, raw) <- withRaw normalCite
+  return $ (flip B.cite (B.text raw)) <$> cs
+
+normalCite :: OrgParser (F [Citation])
+normalCite = try $  char '['
+                 *> skipSpaces
+                 *> citeList
+                 <* skipSpaces
+                 <* char ']'
+
+citeList :: OrgParser (F [Citation])
+citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces)
+
+citation :: OrgParser (F Citation)
+citation = try $ do
+  pref <- prefix
+  (suppress_author, key) <- citeKey
+  suff <- suffix
+  return $ do
+    x <- pref
+    y <- suff
+    return $ Citation{ citationId      = key
+                     , citationPrefix  = B.toList x
+                     , citationSuffix  = B.toList y
+                     , citationMode    = if suppress_author
+                                            then SuppressAuthor
+                                            else NormalCitation
+                     , citationNoteNum = 0
+                     , citationHash    = 0
+                     }
+ where
+   prefix = trimInlinesF . mconcat <$>
+            manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
+   suffix = try $ do
+     hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
+     skipSpaces
+     rest <- trimInlinesF . mconcat <$>
+             many (notFollowedBy (oneOf ";]") *> inline)
+     return $ if hasSpace
+              then (B.space <>) <$> rest
+              else rest
+
 footnote :: OrgParser (F Inlines)
 footnote = try $ inlineNote <|> referencedNote
 
@@ -1007,7 +1053,7 @@ selfTarget :: OrgParser String
 selfTarget = try $ char '[' *> linkTarget <* char ']'
 
 linkTarget :: OrgParser String
-linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]")
+linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
 
 applyCustomLinkFormat :: String -> OrgParser (F String)
 applyCustomLinkFormat link = do
@@ -1083,7 +1129,12 @@ inlineCodeBlock = try $ do
   let attrClasses = [translateLang lang, rundocBlockClass]
   let attrKeyVal  = map toRundocAttrib (("language", lang) : opts)
   returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
- where enclosedByPair s e p = char s *> many1Till p (char e)
+
+enclosedByPair :: Char          -- ^ opening char
+               -> Char          -- ^ closing char
+               -> OrgParser a   -- ^ parser
+               -> OrgParser [a]
+enclosedByPair s e p = char s *> many1Till p (char e)
 
 emph      :: OrgParser (F Inlines)
 emph      = fmap B.emph         <$> emphasisBetween '/'
-- 
cgit v1.2.3