From 9df589b9c5a4f2dcb19445239dfae41b54625330 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Wed, 14 May 2014 14:45:37 +0200
Subject: Introduce class HasLastStrPosition, generalize functions

Both `ParserState` and `OrgParserState` keep track of the parser position at
which the last string ended.  This patch introduces a new class
`HasLastStrPosition` and makes the above types instances of that class.  This
enables the generalization of functions updating the state or checking if one
is right after a string.
---
 src/Text/Pandoc/Parsing.hs          | 32 +++++++++++++++++++++++---------
 src/Text/Pandoc/Readers/Markdown.hs | 11 +++--------
 src/Text/Pandoc/Readers/Org.hs      | 11 ++++-------
 3 files changed, 30 insertions(+), 24 deletions(-)

(limited to 'src/Text/Pandoc')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index d1e55cbc4..344f6c7ba 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -54,7 +54,6 @@ module Text.Pandoc.Parsing ( (>>~),
                              withRaw,
                              escaped,
                              characterReference,
-                             updateLastStrPos,
                              anyOrderedListMarker,
                              orderedListMarker,
                              charRef,
@@ -66,11 +65,14 @@ module Text.Pandoc.Parsing ( (>>~),
                              testStringWith,
                              guardEnabled,
                              guardDisabled,
+                             updateLastStrPos,
+                             notAfterString,
                              ParserState (..),
                              HasReaderOptions (..),
                              HasHeaderMap (..),
                              HasIdentifierList (..),
                              HasMacros (..),
+                             HasLastStrPosition (..),
                              defaultParserState,
                              HeaderType (..),
                              ParserContext (..),
@@ -904,6 +906,14 @@ instance HasMacros ParserState where
   extractMacros        = stateMacros
   updateMacros f st    = st{ stateMacros = f $ stateMacros st }
 
+class HasLastStrPosition st where
+  setLastStrPos  :: SourcePos -> st -> st
+  getLastStrPos  :: st -> Maybe SourcePos
+
+instance HasLastStrPosition ParserState where
+  setLastStrPos pos st = st{ stateLastStrPos = Just pos }
+  getLastStrPos st     = stateLastStrPos st
+
 defaultParserState :: ParserState
 defaultParserState =
     ParserState { stateOptions         = def,
@@ -938,6 +948,17 @@ guardEnabled ext = getOption readerExtensions >>= guard . Set.member ext
 guardDisabled :: HasReaderOptions st => Extension -> Parser s st ()
 guardDisabled ext = getOption readerExtensions >>= guard . not . Set.member ext
 
+-- | Update the position on which the last string ended.
+updateLastStrPos :: HasLastStrPosition st => Parser s st ()
+updateLastStrPos = getPosition >>= updateState . setLastStrPos
+
+-- | Whether we are right after the end of a string.
+notAfterString :: HasLastStrPosition st => Parser s st Bool
+notAfterString = do
+  pos <- getPosition
+  st  <- getState
+  return $ getLastStrPos st /= Just pos
+
 data HeaderType
     = SingleHeader Char  -- ^ Single line of characters underneath
     | DoubleHeader Char  -- ^ Lines of characters above and below
@@ -1049,17 +1070,11 @@ charOrRef cs =
                        guard (c `elem` cs)
                        return c)
 
-updateLastStrPos :: Parser [Char] ParserState ()
-updateLastStrPos = getPosition >>= \p ->
-  updateState $ \s -> s{ stateLastStrPos = Just p }
-
 singleQuoteStart :: Parser [Char] ParserState ()
 singleQuoteStart = do
   failIfInQuoteContext InSingleQuote
-  pos <- getPosition
-  st <- getState
   -- single quote start can't be right after str
-  guard $ stateLastStrPos st /= Just pos
+  guard =<< notAfterString
   () <$ charOrRef "'\8216\145"
 
 singleQuoteEnd :: Parser [Char] st ()
@@ -1156,4 +1171,3 @@ applyMacros' target = do
      then do macros <- extractMacros `fmap` getState
              return $ applyMacros macros target
      else return target
-
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index d1637b701..1ac98e94c 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1474,9 +1474,7 @@ strongOrEmph =  enclosure '*' <|> (checkIntraword >> enclosure '_')
   where  checkIntraword = do
            exts <- getOption readerExtensions
            when (Ext_intraword_underscores `Set.member` exts) $ do
-             pos <- getPosition
-             lastStrPos <- stateLastStrPos <$> getState
-             guard $ lastStrPos /= Just pos
+             guard =<< notAfterString
 
 -- | Parses a list of inlines between start and end delimiters.
 inlinesBetween :: (Show b)
@@ -1518,8 +1516,7 @@ nonEndline = satisfy (/='\n')
 str :: MarkdownParser (F Inlines)
 str = do
   result <- many1 alphaNum
-  pos <- getPosition
-  updateState $ \s -> s{ stateLastStrPos = Just pos }
+  updateLastStrPos
   let spacesToNbr = map (\c -> if c == ' ' then '\160' else c)
   isSmart <- getOption readerSmart
   if isSmart
@@ -1821,9 +1818,7 @@ citeKey :: MarkdownParser (Bool, String)
 citeKey = try $ do
   -- make sure we're not right after an alphanumeric,
   -- since foo@bar.baz is probably an email address
-  lastStrPos <- stateLastStrPos <$> getState
-  pos <- getPosition
-  guard $ lastStrPos /= Just pos
+  guard =<< notAfterString
   suppress_author <- option False (char '-' >> return True)
   char '@'
   first <- letter <|> char '_'
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 2e4a29beb..5dbcaee98 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -105,6 +105,10 @@ instance HasMeta OrgParserState where
   deleteMeta field st =
     st{ orgStateMeta = deleteMeta field $ orgStateMeta st }
 
+instance HasLastStrPosition OrgParserState where
+  getLastStrPos = orgStateLastStrPos
+  setLastStrPos pos st = st{ orgStateLastStrPos = Just pos }
+
 instance Default OrgParserState where
   def = defaultOrgParserState
 
@@ -1274,13 +1278,6 @@ afterEmphasisPreChar = do
   lastPrePos <- orgStateLastPreCharPos <$> getState
   return . fromMaybe True $ (== pos) <$> lastPrePos
 
--- | Whether we are right after the end of a string
-notAfterString :: OrgParser Bool
-notAfterString = do
-  pos <- getPosition
-  lastStrPos <- orgStateLastStrPos <$> getState
-  return $ lastStrPos /= Just pos
-
 -- | Whether the parser is right after a forbidden border char
 notAfterForbiddenBorderChar :: OrgParser Bool
 notAfterForbiddenBorderChar = do
-- 
cgit v1.2.3


From 2423f9e6b180bc6b04d222a4b574de995d296f80 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Wed, 14 May 2014 14:58:05 +0200
Subject: Move `citeKey` from Readers.Markdown to Parsing

The function can be used by other readers, so it is made accessible for
all parsers.
---
 src/Text/Pandoc/Parsing.hs          | 13 +++++++++++++
 src/Text/Pandoc/Readers/Markdown.hs | 14 --------------
 2 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'src/Text/Pandoc')

diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs
index 344f6c7ba..4cd6591c0 100644
--- a/src/Text/Pandoc/Parsing.hs
+++ b/src/Text/Pandoc/Parsing.hs
@@ -94,6 +94,7 @@ module Text.Pandoc.Parsing ( (>>~),
                              apostrophe,
                              dash,
                              nested,
+                             citeKey,
                              macro,
                              applyMacros',
                              Parser,
@@ -1144,6 +1145,18 @@ nested p = do
   updateState $ \st -> st{ stateMaxNestingLevel = nestlevel }
   return res
 
+citeKey :: HasLastStrPosition st => Parser [Char] st (Bool, String)
+citeKey = try $ do
+  guard =<< notAfterString
+  suppress_author <- option False (char '-' *> return True)
+  char '@'
+  firstChar <- letter <|> char '_'
+  let regchar = satisfy (\c -> isAlphaNum c || c == '_')
+  let internal p = try $ p <* lookAhead regchar
+  rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/")
+  let key = firstChar:rest
+  return (suppress_author, key)
+
 --
 -- Macros
 --
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 1ac98e94c..5129bc2e3 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -1814,20 +1814,6 @@ normalCite = try $ do
   char ']'
   return citations
 
-citeKey :: MarkdownParser (Bool, String)
-citeKey = try $ do
-  -- make sure we're not right after an alphanumeric,
-  -- since foo@bar.baz is probably an email address
-  guard =<< notAfterString
-  suppress_author <- option False (char '-' >> return True)
-  char '@'
-  first <- letter <|> char '_'
-  let regchar = satisfy (\c -> isAlphaNum c || c == '_')
-  let internal p = try $ p >>~ lookAhead regchar
-  rest <- many $ regchar <|> internal (oneOf ":.#$%&-+?<>~/")
-  let key = first:rest
-  return (suppress_author, key)
-
 suffix :: MarkdownParser (F Inlines)
 suffix = try $ do
   hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
-- 
cgit v1.2.3


From ceeb701c254c6dc4c054e10dd151d9ef6f751ad7 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Wed, 14 May 2014 14:49:30 +0200
Subject: Org reader: support Pandocs citation extension

Citations are defined via the "normal citation" syntax used in markdown,
with the sole difference that newlines are not allowed between "[...]".
This is for consistency, as org-mode generally disallows newlines
between square brackets.

The extension is turned on by default and can be turned off via the
default syntax-extension mechanism, i.e. by specifying "org-citation" as
the input format.
Move `citeKey` from Readers.Markdown into Parsing

The function can be used by other readers, so it is made accessible for
all parsers.
---
 src/Text/Pandoc.hs             |  2 +-
 src/Text/Pandoc/Readers/Org.hs | 55 ++++++++++++++++++++++++++++++++++++++++--
 tests/Tests/Readers/Org.hs     | 22 +++++++++++++++++
 3 files changed, 76 insertions(+), 3 deletions(-)

(limited to 'src/Text/Pandoc')

diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs
index dd5bc18f6..130338f0e 100644
--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@@ -275,6 +275,7 @@ getDefaultExtensions "markdown_mmd" = multimarkdownExtensions
 getDefaultExtensions "markdown_github" = githubMarkdownExtensions
 getDefaultExtensions "markdown"        = pandocExtensions
 getDefaultExtensions "plain"           = pandocExtensions
+getDefaultExtensions "org"             = Set.fromList [Ext_citations]
 getDefaultExtensions "textile"         = Set.fromList [Ext_auto_identifiers, Ext_raw_tex]
 getDefaultExtensions _                 = Set.fromList [Ext_auto_identifiers]
 
@@ -319,4 +320,3 @@ readJSON _ = either error id . eitherDecode' . UTF8.fromStringLazy
 
 writeJSON :: WriterOptions -> Pandoc -> String
 writeJSON _ = UTF8.toStringLazy . encode
-
diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 5dbcaee98..86dda2732 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -869,6 +869,7 @@ inline :: OrgParser (F Inlines)
 inline =
   choice [ whitespace
          , linebreak
+         , cite
          , footnote
          , linkOrImage
          , anchor
@@ -933,6 +934,51 @@ endline = try $ do
   updateLastPreCharPos
   return . return $ B.space
 
+cite :: OrgParser (F Inlines)
+cite = try $ do
+  guardEnabled Ext_citations
+  (cs, raw) <- withRaw normalCite
+  return $ (flip B.cite (B.text raw)) <$> cs
+
+normalCite :: OrgParser (F [Citation])
+normalCite = try $  char '['
+                 *> skipSpaces
+                 *> citeList
+                 <* skipSpaces
+                 <* char ']'
+
+citeList :: OrgParser (F [Citation])
+citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces)
+
+citation :: OrgParser (F Citation)
+citation = try $ do
+  pref <- prefix
+  (suppress_author, key) <- citeKey
+  suff <- suffix
+  return $ do
+    x <- pref
+    y <- suff
+    return $ Citation{ citationId      = key
+                     , citationPrefix  = B.toList x
+                     , citationSuffix  = B.toList y
+                     , citationMode    = if suppress_author
+                                            then SuppressAuthor
+                                            else NormalCitation
+                     , citationNoteNum = 0
+                     , citationHash    = 0
+                     }
+ where
+   prefix = trimInlinesF . mconcat <$>
+            manyTill inline (char ']' <|> (']' <$ lookAhead citeKey))
+   suffix = try $ do
+     hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
+     skipSpaces
+     rest <- trimInlinesF . mconcat <$>
+             many (notFollowedBy (oneOf ";]") *> inline)
+     return $ if hasSpace
+              then (B.space <>) <$> rest
+              else rest
+
 footnote :: OrgParser (F Inlines)
 footnote = try $ inlineNote <|> referencedNote
 
@@ -1007,7 +1053,7 @@ selfTarget :: OrgParser String
 selfTarget = try $ char '[' *> linkTarget <* char ']'
 
 linkTarget :: OrgParser String
-linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]")
+linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]")
 
 applyCustomLinkFormat :: String -> OrgParser (F String)
 applyCustomLinkFormat link = do
@@ -1083,7 +1129,12 @@ inlineCodeBlock = try $ do
   let attrClasses = [translateLang lang, rundocBlockClass]
   let attrKeyVal  = map toRundocAttrib (("language", lang) : opts)
   returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode
- where enclosedByPair s e p = char s *> many1Till p (char e)
+
+enclosedByPair :: Char          -- ^ opening char
+               -> Char          -- ^ closing char
+               -> OrgParser a   -- ^ parser
+               -> OrgParser [a]
+enclosedByPair s e p = char s *> many1Till p (char e)
 
 emph      :: OrgParser (F Inlines)
 emph      = fmap B.emph         <$> emphasisBetween '/'
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 4ef7a7731..ca97ba348 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -225,6 +225,28 @@ tests =
                              ]
                            )
                            "echo 'Hello, World'")
+
+      , "Citation" =:
+          "[@nonexistent]" =?>
+          let citation = Citation
+                         { citationId = "nonexistent"
+                         , citationPrefix = []
+                         , citationSuffix = []
+                         , citationMode = NormalCitation
+                         , citationNoteNum = 0
+                         , citationHash = 0}
+          in (para $ cite [citation] "[@nonexistent]")
+
+      , "Citation containing text" =:
+          "[see @item1 p. 34-35]" =?>
+          let citation = Citation
+                         { citationId = "item1"
+                         , citationPrefix = [Str "see"]
+                         , citationSuffix = [Space ,Str "p.",Space,Str "34-35"]
+                         , citationMode = NormalCitation
+                         , citationNoteNum = 0
+                         , citationHash = 0}
+          in (para $ cite [citation] "[see @item1 p. 34-35]")
       ]
 
   , testGroup "Meta Information" $
-- 
cgit v1.2.3