diff options
author | John MacFarlane <jgm@berkeley.edu> | 2016-06-05 11:58:47 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2016-06-05 11:58:47 -0700 |
commit | a184aec7076eedf01deea08a2ec64c5340d00e9c (patch) | |
tree | fb244f17220372fd41130b7d71b6a88c988582d7 | |
parent | 97f8f4ad4bc190ba0c27713eeea94f661634d418 (diff) | |
parent | 8a9f5915ab822b476c270f46e8a800982b018ba3 (diff) | |
download | pandoc-a184aec7076eedf01deea08a2ec64c5340d00e9c.tar.gz |
Merge pull request #2964 from tarleb/org-berkeley-ref
Org reader: "Berkeley style" citation support
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Inlines.hs | 135 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Parsing.hs | 1 | ||||
-rw-r--r-- | tests/Tests/Readers/Org.hs | 49 |
3 files changed, 177 insertions, 8 deletions
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 001aeb569..dc6b739fe 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -49,11 +49,13 @@ import Text.Pandoc.Readers.LaTeX ( inlineCommand, rawLaTeXInline ) import Text.TeXMath ( readTeX, writePandoc, DisplayType(..) ) import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap -import Control.Monad ( guard, mplus, mzero, when ) +import Prelude hiding (sequence) +import Control.Monad ( guard, mplus, mzero, when, void ) import Data.Char ( isAlphaNum, isSpace ) -import Data.List ( isPrefixOf ) +import Data.List ( intersperse, isPrefixOf ) import Data.Maybe ( fromMaybe ) import qualified Data.Map as M +import Data.Traversable (sequence) -- -- Functions acting on the parser state @@ -127,7 +129,7 @@ inlines = trimInlinesF . mconcat <$> many1 inline -- treat these as potentially non-text when parsing inline: specialChars :: [Char] -specialChars = "\"$'()*+-,./:<=>[\\]^_{|}~" +specialChars = "\"$'()*+-,./:;<=>[\\]^_{|}~" whitespace :: OrgParser (F Inlines) @@ -166,19 +168,42 @@ endline = try $ do updateLastPreCharPos return . return $ B.softbreak + +-- +-- Citations +-- + +-- The state of citations is a bit confusing due to the lack of an official +-- syntax and multiple syntaxes coexisting. The pandocOrgCite syntax was the +-- first to be implemented here and is almost identical to Markdown's citation +-- syntax. The org-ref package is in wide use to handle citations, but the +-- syntax is a bit limiting and not quite as simple to write. The +-- semi-offical Org-mode citation syntax is based on John MacFarlane's Pandoc +-- sytax and Org-oriented enhancements contributed by Richard Lawrence and +-- others. It's dubbed Berkeley syntax due the place of activity of its main +-- contributors. All this should be consolidated once an official Org-mode +-- citation syntax has emerged. + cite :: OrgParser (F Inlines) -cite = try $ do +cite = try $ berkeleyCite <|> do guardEnabled Ext_citations - (cs, raw) <- withRaw (pandocOrgCite <|> orgRefCite) + (cs, raw) <- withRaw $ choice + [ pandocOrgCite + , orgRefCite + , berkeleyTextualCite + ] return $ (flip B.cite (B.text raw)) <$> cs --- | A citation in Pandoc Org-mode style (@[\@citekey]@). +-- | A citation in Pandoc Org-mode style (@[prefix \@citekey suffix]@). pandocOrgCite :: OrgParser (F [Citation]) pandocOrgCite = try $ char '[' *> skipSpaces *> citeList <* skipSpaces <* char ']' orgRefCite :: OrgParser (F [Citation]) -orgRefCite = try $ normalOrgRefCite <|> (fmap (:[]) <$> linkLikeOrgRefCite) +orgRefCite = try $ choice + [ normalOrgRefCite + , fmap (:[]) <$> linkLikeOrgRefCite + ] normalOrgRefCite :: OrgParser (F [Citation]) normalOrgRefCite = try $ do @@ -199,6 +224,100 @@ normalOrgRefCite = try $ do , citationHash = 0 } +-- | Read an Berkeley-style Org-mode citation. Berkeley citation style was +-- develop and adjusted to Org-mode style by John MacFarlane and Richard +-- Lawrence, respectively, both philosophers at UC Berkeley. +berkeleyCite :: OrgParser (F Inlines) +berkeleyCite = try $ do + bcl <- berkeleyCitationList + return $ do + parens <- berkeleyCiteParens <$> bcl + prefix <- berkeleyCiteCommonPrefix <$> bcl + suffix <- berkeleyCiteCommonSuffix <$> bcl + citationList <- berkeleyCiteCitations <$> bcl + if parens + then return . toCite . addToFirstAndLast prefix suffix $ citationList + else return $ maybe mempty (<> " ") prefix + <> (toListOfCites $ map toInTextMode citationList) + <> maybe mempty (", " <>) suffix + where + toCite :: [Citation] -> Inlines + toCite cs = B.cite cs mempty + + toListOfCites :: [Citation] -> Inlines + toListOfCites = mconcat . intersperse ", " . map (\c -> B.cite [c] mempty) + + toInTextMode :: Citation -> Citation + toInTextMode c = c { citationMode = AuthorInText } + + addToFirstAndLast :: Maybe Inlines -> Maybe Inlines -> [Citation] -> [Citation] + addToFirstAndLast pre suf (c:cs) = + let firstCite = maybe c + (\p -> c { citationPrefix = B.toList p <> citationPrefix c }) + pre + cites = firstCite:cs + lc = last cites + lastCite = maybe lc + (\s -> lc { citationSuffix = B.toList s <> citationSuffix lc }) + suf + in init cites ++ [lastCite] + addToFirstAndLast _ _ _ = [] + +data BerkeleyCitationList = BerkeleyCitationList + { berkeleyCiteParens :: Bool + , berkeleyCiteCommonPrefix :: Maybe Inlines + , berkeleyCiteCommonSuffix :: Maybe Inlines + , berkeleyCiteCitations :: [Citation] + } +berkeleyCitationList :: OrgParser (F BerkeleyCitationList) +berkeleyCitationList = try $ do + char '[' + parens <- choice [ False <$ berkeleyBareTag, True <$ berkeleyParensTag ] + char ':' + skipSpaces + commonPrefix <- optionMaybe (try $ citationListPart <* char ';') + citations <- citeList + commonSuffix <- optionMaybe (try $ citationListPart) + char ']' + return (BerkeleyCitationList parens + <$> sequence commonPrefix + <*> sequence commonSuffix + <*> citations) + where + citationListPart :: OrgParser (F Inlines) + citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do + notFollowedBy' citeKey + notFollowedBy (oneOf ";]") + inline + +berkeleyBareTag :: OrgParser () +berkeleyBareTag = try $ void berkeleyBareTag' + +berkeleyParensTag :: OrgParser () +berkeleyParensTag = try . void $ enclosedByPair '(' ')' berkeleyBareTag' + +berkeleyBareTag' :: OrgParser () +berkeleyBareTag' = try $ void (string "cite") + +berkeleyTextualCite :: OrgParser (F [Citation]) +berkeleyTextualCite = try $ do + (suppressAuthor, key) <- citeKey + returnF . return $ Citation + { citationId = key + , citationPrefix = mempty + , citationSuffix = mempty + , citationMode = if suppressAuthor then SuppressAuthor else AuthorInText + , citationNoteNum = 0 + , citationHash = 0 + } + +-- The following is what a Berkeley-style bracketed textual citation parser +-- would look like. However, as these citations are a subset of Pandoc's Org +-- citation style, this isn't used. +-- berkeleyBracketedTextualCite :: OrgParser (F [Citation]) +-- berkeleyBracketedTextualCite = try . (fmap head) $ +-- enclosedByPair '[' ']' berkeleyTextualCite + -- | Read a link-like org-ref style citation. The citation includes pre and -- post text. However, multiple citations are not possible due to limitations -- in the syntax. @@ -243,7 +362,7 @@ orgRefCiteMode = ] citeList :: OrgParser (F [Citation]) -citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces) +citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces) citation :: OrgParser (F Citation) citation = try $ do diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index 8cf0c696c..95415f823 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -97,6 +97,7 @@ module Text.Pandoc.Readers.Org.Parsing , try , sepBy , sepBy1 + , sepEndBy1 , option , optional , optionMaybe diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 9bd999b01..ab50aa49c 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -336,6 +336,55 @@ tests = } in (para $ cite [citation] "[[citep:Dominik201408][See page 20::, for example]]") + , testGroup "Berkeley-style citations" $ + let pandocCite = Citation + { citationId = "Pandoc" + , citationPrefix = mempty + , citationSuffix = mempty + , citationMode = NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } + pandocInText = pandocCite { citationMode = AuthorInText } + dominikCite = Citation + { citationId = "Dominik201408" + , citationPrefix = mempty + , citationSuffix = mempty + , citationMode = NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } + dominikInText = dominikCite { citationMode = AuthorInText } + in [ + "Berkeley-style in-text citation" =: + "See @Dominik201408." =?> + (para $ "See " + <> cite [dominikInText] "@Dominik201408" + <> ".") + + , "Berkeley-style parenthetical citation list" =: + "[(cite): see; @Dominik201408;also @Pandoc; and others]" =?> + let pandocCite' = pandocCite { + citationPrefix = toList "also" + , citationSuffix = toList "and others" + } + dominikCite' = dominikCite { + citationPrefix = toList "see" + } + in (para $ cite [dominikCite', pandocCite'] "") + + , "Berkeley-style plain citation list" =: + "[cite: See; @Dominik201408; and @Pandoc; and others]" =?> + let pandocCite' = pandocInText { + citationPrefix = toList "and" + } + in (para $ "See " + <> cite [dominikInText] "" + <> "," <> space + <> cite [pandocCite'] "" + <> "," <> space <> "and others") + ] + , "Inline LaTeX symbol" =: "\\dots" =?> para "…" |