diff options
author | Albert Krewinkel <albert@zeitkraut.de> | 2016-05-31 12:01:48 +0200 |
---|---|---|
committer | Albert Krewinkel <albert@zeitkraut.de> | 2016-06-05 11:28:57 +0200 |
commit | 8a9f5915ab822b476c270f46e8a800982b018ba3 (patch) | |
tree | fb244f17220372fd41130b7d71b6a88c988582d7 /src/Text/Pandoc/Readers | |
parent | 06dfe3276dcec57d50ecd32d6f0df7269095584c (diff) | |
download | pandoc-8a9f5915ab822b476c270f46e8a800982b018ba3.tar.gz |
Org reader: add support for "Berkeley-style" cites
A specification for an official Org-mode citation syntax was drafted by
Richard Lawrence and enhanced with the help of others on the orgmode
mailing list. Basic support for this citation style is added to the
reader.
This closes #1978.
Diffstat (limited to 'src/Text/Pandoc/Readers')
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Inlines.hs | 133 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Parsing.hs | 1 |
2 files changed, 127 insertions, 7 deletions
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 084134004..dc6b739fe 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -49,11 +49,13 @@ import Text.Pandoc.Readers.LaTeX ( inlineCommand, rawLaTeXInline ) import Text.TeXMath ( readTeX, writePandoc, DisplayType(..) ) import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap -import Control.Monad ( guard, mplus, mzero, when ) +import Prelude hiding (sequence) +import Control.Monad ( guard, mplus, mzero, when, void ) import Data.Char ( isAlphaNum, isSpace ) -import Data.List ( isPrefixOf ) +import Data.List ( intersperse, isPrefixOf ) import Data.Maybe ( fromMaybe ) import qualified Data.Map as M +import Data.Traversable (sequence) -- -- Functions acting on the parser state @@ -166,19 +168,42 @@ endline = try $ do updateLastPreCharPos return . return $ B.softbreak + +-- +-- Citations +-- + +-- The state of citations is a bit confusing due to the lack of an official +-- syntax and multiple syntaxes coexisting. The pandocOrgCite syntax was the +-- first to be implemented here and is almost identical to Markdown's citation +-- syntax. The org-ref package is in wide use to handle citations, but the +-- syntax is a bit limiting and not quite as simple to write. The +-- semi-offical Org-mode citation syntax is based on John MacFarlane's Pandoc +-- sytax and Org-oriented enhancements contributed by Richard Lawrence and +-- others. It's dubbed Berkeley syntax due the place of activity of its main +-- contributors. All this should be consolidated once an official Org-mode +-- citation syntax has emerged. + cite :: OrgParser (F Inlines) -cite = try $ do +cite = try $ berkeleyCite <|> do guardEnabled Ext_citations - (cs, raw) <- withRaw (pandocOrgCite <|> orgRefCite) + (cs, raw) <- withRaw $ choice + [ pandocOrgCite + , orgRefCite + , berkeleyTextualCite + ] return $ (flip B.cite (B.text raw)) <$> cs --- | A citation in Pandoc Org-mode style (@[\@citekey]@). +-- | A citation in Pandoc Org-mode style (@[prefix \@citekey suffix]@). pandocOrgCite :: OrgParser (F [Citation]) pandocOrgCite = try $ char '[' *> skipSpaces *> citeList <* skipSpaces <* char ']' orgRefCite :: OrgParser (F [Citation]) -orgRefCite = try $ normalOrgRefCite <|> (fmap (:[]) <$> linkLikeOrgRefCite) +orgRefCite = try $ choice + [ normalOrgRefCite + , fmap (:[]) <$> linkLikeOrgRefCite + ] normalOrgRefCite :: OrgParser (F [Citation]) normalOrgRefCite = try $ do @@ -199,6 +224,100 @@ normalOrgRefCite = try $ do , citationHash = 0 } +-- | Read an Berkeley-style Org-mode citation. Berkeley citation style was +-- develop and adjusted to Org-mode style by John MacFarlane and Richard +-- Lawrence, respectively, both philosophers at UC Berkeley. +berkeleyCite :: OrgParser (F Inlines) +berkeleyCite = try $ do + bcl <- berkeleyCitationList + return $ do + parens <- berkeleyCiteParens <$> bcl + prefix <- berkeleyCiteCommonPrefix <$> bcl + suffix <- berkeleyCiteCommonSuffix <$> bcl + citationList <- berkeleyCiteCitations <$> bcl + if parens + then return . toCite . addToFirstAndLast prefix suffix $ citationList + else return $ maybe mempty (<> " ") prefix + <> (toListOfCites $ map toInTextMode citationList) + <> maybe mempty (", " <>) suffix + where + toCite :: [Citation] -> Inlines + toCite cs = B.cite cs mempty + + toListOfCites :: [Citation] -> Inlines + toListOfCites = mconcat . intersperse ", " . map (\c -> B.cite [c] mempty) + + toInTextMode :: Citation -> Citation + toInTextMode c = c { citationMode = AuthorInText } + + addToFirstAndLast :: Maybe Inlines -> Maybe Inlines -> [Citation] -> [Citation] + addToFirstAndLast pre suf (c:cs) = + let firstCite = maybe c + (\p -> c { citationPrefix = B.toList p <> citationPrefix c }) + pre + cites = firstCite:cs + lc = last cites + lastCite = maybe lc + (\s -> lc { citationSuffix = B.toList s <> citationSuffix lc }) + suf + in init cites ++ [lastCite] + addToFirstAndLast _ _ _ = [] + +data BerkeleyCitationList = BerkeleyCitationList + { berkeleyCiteParens :: Bool + , berkeleyCiteCommonPrefix :: Maybe Inlines + , berkeleyCiteCommonSuffix :: Maybe Inlines + , berkeleyCiteCitations :: [Citation] + } +berkeleyCitationList :: OrgParser (F BerkeleyCitationList) +berkeleyCitationList = try $ do + char '[' + parens <- choice [ False <$ berkeleyBareTag, True <$ berkeleyParensTag ] + char ':' + skipSpaces + commonPrefix <- optionMaybe (try $ citationListPart <* char ';') + citations <- citeList + commonSuffix <- optionMaybe (try $ citationListPart) + char ']' + return (BerkeleyCitationList parens + <$> sequence commonPrefix + <*> sequence commonSuffix + <*> citations) + where + citationListPart :: OrgParser (F Inlines) + citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do + notFollowedBy' citeKey + notFollowedBy (oneOf ";]") + inline + +berkeleyBareTag :: OrgParser () +berkeleyBareTag = try $ void berkeleyBareTag' + +berkeleyParensTag :: OrgParser () +berkeleyParensTag = try . void $ enclosedByPair '(' ')' berkeleyBareTag' + +berkeleyBareTag' :: OrgParser () +berkeleyBareTag' = try $ void (string "cite") + +berkeleyTextualCite :: OrgParser (F [Citation]) +berkeleyTextualCite = try $ do + (suppressAuthor, key) <- citeKey + returnF . return $ Citation + { citationId = key + , citationPrefix = mempty + , citationSuffix = mempty + , citationMode = if suppressAuthor then SuppressAuthor else AuthorInText + , citationNoteNum = 0 + , citationHash = 0 + } + +-- The following is what a Berkeley-style bracketed textual citation parser +-- would look like. However, as these citations are a subset of Pandoc's Org +-- citation style, this isn't used. +-- berkeleyBracketedTextualCite :: OrgParser (F [Citation]) +-- berkeleyBracketedTextualCite = try . (fmap head) $ +-- enclosedByPair '[' ']' berkeleyTextualCite + -- | Read a link-like org-ref style citation. The citation includes pre and -- post text. However, multiple citations are not possible due to limitations -- in the syntax. @@ -243,7 +362,7 @@ orgRefCiteMode = ] citeList :: OrgParser (F [Citation]) -citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces) +citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces) citation :: OrgParser (F Citation) citation = try $ do diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index 8cf0c696c..95415f823 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -97,6 +97,7 @@ module Text.Pandoc.Readers.Org.Parsing , try , sepBy , sepBy1 + , sepEndBy1 , option , optional , optionMaybe |