From 06dfe3276dcec57d50ecd32d6f0df7269095584c Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Tue, 31 May 2016 11:58:40 +0200 Subject: Org reader: add semicolon to list of special chars Semicolons are used as special characters in citations syntax. This ensures the correct parsing of Pandoc-style citations: [prefix; @key; suffix] Previously, parsing would have failed unless there was a space or other special character as the last character. --- src/Text/Pandoc/Readers/Org/Inlines.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 001aeb569..084134004 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -127,7 +127,7 @@ inlines = trimInlinesF . mconcat <$> many1 inline -- treat these as potentially non-text when parsing inline: specialChars :: [Char] -specialChars = "\"$'()*+-,./:<=>[\\]^_{|}~" +specialChars = "\"$'()*+-,./:;<=>[\\]^_{|}~" whitespace :: OrgParser (F Inlines) -- cgit v1.2.3 From 8a9f5915ab822b476c270f46e8a800982b018ba3 Mon Sep 17 00:00:00 2001 From: Albert Krewinkel Date: Tue, 31 May 2016 12:01:48 +0200 Subject: Org reader: add support for "Berkeley-style" cites A specification for an official Org-mode citation syntax was drafted by Richard Lawrence and enhanced with the help of others on the orgmode mailing list. Basic support for this citation style is added to the reader. This closes #1978. --- src/Text/Pandoc/Readers/Org/Inlines.hs | 133 +++++++++++++++++++++++++++++++-- src/Text/Pandoc/Readers/Org/Parsing.hs | 1 + tests/Tests/Readers/Org.hs | 49 ++++++++++++ 3 files changed, 176 insertions(+), 7 deletions(-) diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 084134004..dc6b739fe 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -49,11 +49,13 @@ import Text.Pandoc.Readers.LaTeX ( inlineCommand, rawLaTeXInline ) import Text.TeXMath ( readTeX, writePandoc, DisplayType(..) ) import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap -import Control.Monad ( guard, mplus, mzero, when ) +import Prelude hiding (sequence) +import Control.Monad ( guard, mplus, mzero, when, void ) import Data.Char ( isAlphaNum, isSpace ) -import Data.List ( isPrefixOf ) +import Data.List ( intersperse, isPrefixOf ) import Data.Maybe ( fromMaybe ) import qualified Data.Map as M +import Data.Traversable (sequence) -- -- Functions acting on the parser state @@ -166,19 +168,42 @@ endline = try $ do updateLastPreCharPos return . return $ B.softbreak + +-- +-- Citations +-- + +-- The state of citations is a bit confusing due to the lack of an official +-- syntax and multiple syntaxes coexisting. The pandocOrgCite syntax was the +-- first to be implemented here and is almost identical to Markdown's citation +-- syntax. The org-ref package is in wide use to handle citations, but the +-- syntax is a bit limiting and not quite as simple to write. The +-- semi-offical Org-mode citation syntax is based on John MacFarlane's Pandoc +-- sytax and Org-oriented enhancements contributed by Richard Lawrence and +-- others. It's dubbed Berkeley syntax due the place of activity of its main +-- contributors. All this should be consolidated once an official Org-mode +-- citation syntax has emerged. + cite :: OrgParser (F Inlines) -cite = try $ do +cite = try $ berkeleyCite <|> do guardEnabled Ext_citations - (cs, raw) <- withRaw (pandocOrgCite <|> orgRefCite) + (cs, raw) <- withRaw $ choice + [ pandocOrgCite + , orgRefCite + , berkeleyTextualCite + ] return $ (flip B.cite (B.text raw)) <$> cs --- | A citation in Pandoc Org-mode style (@[\@citekey]@). +-- | A citation in Pandoc Org-mode style (@[prefix \@citekey suffix]@). pandocOrgCite :: OrgParser (F [Citation]) pandocOrgCite = try $ char '[' *> skipSpaces *> citeList <* skipSpaces <* char ']' orgRefCite :: OrgParser (F [Citation]) -orgRefCite = try $ normalOrgRefCite <|> (fmap (:[]) <$> linkLikeOrgRefCite) +orgRefCite = try $ choice + [ normalOrgRefCite + , fmap (:[]) <$> linkLikeOrgRefCite + ] normalOrgRefCite :: OrgParser (F [Citation]) normalOrgRefCite = try $ do @@ -199,6 +224,100 @@ normalOrgRefCite = try $ do , citationHash = 0 } +-- | Read an Berkeley-style Org-mode citation. Berkeley citation style was +-- develop and adjusted to Org-mode style by John MacFarlane and Richard +-- Lawrence, respectively, both philosophers at UC Berkeley. +berkeleyCite :: OrgParser (F Inlines) +berkeleyCite = try $ do + bcl <- berkeleyCitationList + return $ do + parens <- berkeleyCiteParens <$> bcl + prefix <- berkeleyCiteCommonPrefix <$> bcl + suffix <- berkeleyCiteCommonSuffix <$> bcl + citationList <- berkeleyCiteCitations <$> bcl + if parens + then return . toCite . addToFirstAndLast prefix suffix $ citationList + else return $ maybe mempty (<> " ") prefix + <> (toListOfCites $ map toInTextMode citationList) + <> maybe mempty (", " <>) suffix + where + toCite :: [Citation] -> Inlines + toCite cs = B.cite cs mempty + + toListOfCites :: [Citation] -> Inlines + toListOfCites = mconcat . intersperse ", " . map (\c -> B.cite [c] mempty) + + toInTextMode :: Citation -> Citation + toInTextMode c = c { citationMode = AuthorInText } + + addToFirstAndLast :: Maybe Inlines -> Maybe Inlines -> [Citation] -> [Citation] + addToFirstAndLast pre suf (c:cs) = + let firstCite = maybe c + (\p -> c { citationPrefix = B.toList p <> citationPrefix c }) + pre + cites = firstCite:cs + lc = last cites + lastCite = maybe lc + (\s -> lc { citationSuffix = B.toList s <> citationSuffix lc }) + suf + in init cites ++ [lastCite] + addToFirstAndLast _ _ _ = [] + +data BerkeleyCitationList = BerkeleyCitationList + { berkeleyCiteParens :: Bool + , berkeleyCiteCommonPrefix :: Maybe Inlines + , berkeleyCiteCommonSuffix :: Maybe Inlines + , berkeleyCiteCitations :: [Citation] + } +berkeleyCitationList :: OrgParser (F BerkeleyCitationList) +berkeleyCitationList = try $ do + char '[' + parens <- choice [ False <$ berkeleyBareTag, True <$ berkeleyParensTag ] + char ':' + skipSpaces + commonPrefix <- optionMaybe (try $ citationListPart <* char ';') + citations <- citeList + commonSuffix <- optionMaybe (try $ citationListPart) + char ']' + return (BerkeleyCitationList parens + <$> sequence commonPrefix + <*> sequence commonSuffix + <*> citations) + where + citationListPart :: OrgParser (F Inlines) + citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do + notFollowedBy' citeKey + notFollowedBy (oneOf ";]") + inline + +berkeleyBareTag :: OrgParser () +berkeleyBareTag = try $ void berkeleyBareTag' + +berkeleyParensTag :: OrgParser () +berkeleyParensTag = try . void $ enclosedByPair '(' ')' berkeleyBareTag' + +berkeleyBareTag' :: OrgParser () +berkeleyBareTag' = try $ void (string "cite") + +berkeleyTextualCite :: OrgParser (F [Citation]) +berkeleyTextualCite = try $ do + (suppressAuthor, key) <- citeKey + returnF . return $ Citation + { citationId = key + , citationPrefix = mempty + , citationSuffix = mempty + , citationMode = if suppressAuthor then SuppressAuthor else AuthorInText + , citationNoteNum = 0 + , citationHash = 0 + } + +-- The following is what a Berkeley-style bracketed textual citation parser +-- would look like. However, as these citations are a subset of Pandoc's Org +-- citation style, this isn't used. +-- berkeleyBracketedTextualCite :: OrgParser (F [Citation]) +-- berkeleyBracketedTextualCite = try . (fmap head) $ +-- enclosedByPair '[' ']' berkeleyTextualCite + -- | Read a link-like org-ref style citation. The citation includes pre and -- post text. However, multiple citations are not possible due to limitations -- in the syntax. @@ -243,7 +362,7 @@ orgRefCiteMode = ] citeList :: OrgParser (F [Citation]) -citeList = sequence <$> sepBy1 citation (try $ char ';' *> skipSpaces) +citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces) citation :: OrgParser (F Citation) citation = try $ do diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index 8cf0c696c..95415f823 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -97,6 +97,7 @@ module Text.Pandoc.Readers.Org.Parsing , try , sepBy , sepBy1 + , sepEndBy1 , option , optional , optionMaybe diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs index 9bd999b01..ab50aa49c 100644 --- a/tests/Tests/Readers/Org.hs +++ b/tests/Tests/Readers/Org.hs @@ -336,6 +336,55 @@ tests = } in (para $ cite [citation] "[[citep:Dominik201408][See page 20::, for example]]") + , testGroup "Berkeley-style citations" $ + let pandocCite = Citation + { citationId = "Pandoc" + , citationPrefix = mempty + , citationSuffix = mempty + , citationMode = NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } + pandocInText = pandocCite { citationMode = AuthorInText } + dominikCite = Citation + { citationId = "Dominik201408" + , citationPrefix = mempty + , citationSuffix = mempty + , citationMode = NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } + dominikInText = dominikCite { citationMode = AuthorInText } + in [ + "Berkeley-style in-text citation" =: + "See @Dominik201408." =?> + (para $ "See " + <> cite [dominikInText] "@Dominik201408" + <> ".") + + , "Berkeley-style parenthetical citation list" =: + "[(cite): see; @Dominik201408;also @Pandoc; and others]" =?> + let pandocCite' = pandocCite { + citationPrefix = toList "also" + , citationSuffix = toList "and others" + } + dominikCite' = dominikCite { + citationPrefix = toList "see" + } + in (para $ cite [dominikCite', pandocCite'] "") + + , "Berkeley-style plain citation list" =: + "[cite: See; @Dominik201408; and @Pandoc; and others]" =?> + let pandocCite' = pandocInText { + citationPrefix = toList "and" + } + in (para $ "See " + <> cite [dominikInText] "" + <> "," <> space + <> cite [pandocCite'] "" + <> "," <> space <> "and others") + ] + , "Inline LaTeX symbol" =: "\\dots" =?> para "…" -- cgit v1.2.3