aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Org/Inlines.hs
diff options
context:
space:
mode:
authorIgor Pashev <pashev.igor@gmail.com>2021-12-29 15:00:59 +0200
committerIgor Pashev <pashev.igor@gmail.com>2021-12-29 15:00:59 +0200
commitb4361712899fd0183fea5513180cb383979616de (patch)
tree688ab7ee2ab3a8cd32b4e37b506099aec95388f7 /src/Text/Pandoc/Readers/Org/Inlines.hs
parent726ad97faebe59e024d68d293e663c02bbe423c8 (diff)
parentd960282b105a6469c760b4308a3b81da723b7256 (diff)
downloadpandoc-b4361712899fd0183fea5513180cb383979616de.tar.gz
Merge https://github.com/jgm/pandoc
Diffstat (limited to 'src/Text/Pandoc/Readers/Org/Inlines.hs')
-rw-r--r--src/Text/Pandoc/Readers/Org/Inlines.hs314
1 files changed, 166 insertions, 148 deletions
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index 6862dd71e..617f98a10 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -31,11 +31,10 @@ import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
import Text.TeXMath (DisplayType (..), readTeX, writePandoc)
import Text.Pandoc.Sources (ToSources(..))
import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
-
-import Control.Monad (guard, mplus, mzero, unless, void, when)
+import Safe (lastMay)
+import Control.Monad (guard, mplus, mzero, unless, when, void)
import Control.Monad.Trans (lift)
import Data.Char (isAlphaNum, isSpace)
-import Data.List (intersperse)
import qualified Data.Map as M
import Data.Text (Text)
import qualified Data.Text as T
@@ -148,31 +147,177 @@ endline = try $ do
-- Citations
--
--- The state of citations is a bit confusing due to the lack of an official
--- syntax and multiple syntaxes coexisting. The pandocOrgCite syntax was the
--- first to be implemented here and is almost identical to Markdown's citation
--- syntax. The org-ref package is in wide use to handle citations, but the
--- syntax is a bit limiting and not quite as simple to write. The
--- semi-official Org-mode citation syntax is based on John MacFarlane's Pandoc
--- sytax and Org-oriented enhancements contributed by Richard Lawrence and
--- others. It's dubbed Berkeley syntax due the place of activity of its main
--- contributors. All this should be consolidated once an official Org-mode
--- citation syntax has emerged.
+-- We first try to parse official org-cite citations, then fall
+-- back to org-ref citations (which are still in wide use).
+
+-- | A citation in org-cite style
+orgCite :: PandocMonad m => OrgParser m (F [Citation])
+orgCite = try $ do
+ string "[cite"
+ (sty, _variants) <- citeStyle
+ char ':'
+ spnl
+ globalPref <- option mempty (try (citePrefix <* char ';'))
+ items <- citeItems
+ globalSuff <- option mempty (try (char ';' *> citeSuffix))
+ spnl
+ char ']'
+ return $ adjustCiteStyle sty .
+ addPrefixToFirstItem globalPref .
+ addSuffixToLastItem globalSuff $ items
+
+adjustCiteStyle :: CiteStyle -> (F [Citation]) -> (F [Citation])
+adjustCiteStyle sty cs = do
+ cs' <- cs
+ case cs' of
+ [] -> return []
+ (d:ds) -- TODO needs refinement
+ -> case sty of
+ TextStyle -> return $ d{ citationMode = AuthorInText
+ , citationSuffix = dropWhile (== Space)
+ (citationSuffix d)} : ds
+ NoAuthorStyle -> return $ d{ citationMode = SuppressAuthor } : ds
+ _ -> return (d:ds)
+
+addPrefixToFirstItem :: (F Inlines) -> (F [Citation]) -> (F [Citation])
+addPrefixToFirstItem aff cs = do
+ cs' <- cs
+ aff' <- aff
+ case cs' of
+ [] -> return []
+ (d:ds) -> return (d{ citationPrefix =
+ B.toList aff' <> citationPrefix d }:ds)
+
+addSuffixToLastItem :: (F Inlines) -> (F [Citation]) -> (F [Citation])
+addSuffixToLastItem aff cs = do
+ cs' <- cs
+ aff' <- aff
+ case lastMay cs' of
+ Nothing -> return cs'
+ Just d ->
+ return (init cs' ++ [d{ citationSuffix =
+ citationSuffix d <> B.toList aff' }])
+
+citeItems :: PandocMonad m => OrgParser m (F [Citation])
+citeItems = sequence <$> citeItem `sepBy1` (char ';')
+
+citeItem :: PandocMonad m => OrgParser m (F Citation)
+citeItem = do
+ pref <- citePrefix
+ itemKey <- orgCiteKey
+ suff <- citeSuffix
+ return $ do
+ pre' <- pref
+ suf' <- suff
+ return Citation
+ { citationId = itemKey
+ , citationPrefix = B.toList pre'
+ , citationSuffix = B.toList suf'
+ , citationMode = NormalCitation
+ , citationNoteNum = 0
+ , citationHash = 0
+ }
+
+orgCiteKey :: PandocMonad m => OrgParser m Text
+orgCiteKey = do
+ char '@'
+ T.pack <$> many1 (satisfy orgCiteKeyChar)
+
+orgCiteKeyChar :: Char -> Bool
+orgCiteKeyChar c =
+ isAlphaNum c || c `elem` ['.',':','?','!','`','\'','/','*','@','+','|',
+ '(',')','{','}','<','>','&','_','^','$','#',
+ '%','~','-']
+
+rawAffix :: PandocMonad m => Bool -> OrgParser m Text
+rawAffix isPrefix = snd <$> withRaw
+ (many
+ (affixChar
+ <|>
+ try (void (char '[' >> rawAffix isPrefix >> char ']'))))
+ where
+ affixChar = void $ satisfy $ \c ->
+ not (c == '^' || c == ';' || c == '[' || c == ']') &&
+ (not isPrefix || c /= '@')
+
+citePrefix :: PandocMonad m => OrgParser m (F Inlines)
+citePrefix =
+ rawAffix True >>= parseFromString (trimInlinesF . mconcat <$> many inline)
+
+citeSuffix :: PandocMonad m => OrgParser m (F Inlines)
+citeSuffix =
+ rawAffix False >>= parseFromString parseSuffix
+ where
+ parseSuffix = do
+ hasSpace <- option False
+ (True <$ try (spaceChar >> skipSpaces >> lookAhead nonspaceChar))
+ ils <- trimInlinesF . mconcat <$> many inline
+ return $ if hasSpace
+ then (B.space <>) <$> ils
+ else ils
+
+citeStyle :: PandocMonad m => OrgParser m (CiteStyle, [CiteVariant])
+citeStyle = option (DefStyle, []) $ do
+ sty <- option DefStyle $ try $ char '/' *> orgCiteStyle
+ variants <- option [] $ try $ char '/' *> orgCiteVariants
+ return (sty, variants)
+
+orgCiteStyle :: PandocMonad m => OrgParser m CiteStyle
+orgCiteStyle = choice $ map try
+ [ NoAuthorStyle <$ string "noauthor"
+ , NoAuthorStyle <$ string "na"
+ , LocatorsStyle <$ string "locators"
+ , LocatorsStyle <$ char 'l'
+ , NociteStyle <$ string "nocite"
+ , NociteStyle <$ char 'n'
+ , TextStyle <$ string "text"
+ , TextStyle <$ char 't'
+ ]
+
+orgCiteVariants :: PandocMonad m => OrgParser m [CiteVariant]
+orgCiteVariants =
+ (fullnameVariant `sepBy1` (char '-')) <|> (many1 onecharVariant)
+ where
+ fullnameVariant = choice $ map try
+ [ Bare <$ string "bare"
+ , Caps <$ string "caps"
+ , Full <$ string "full"
+ ]
+ onecharVariant = choice
+ [ Bare <$ char 'b'
+ , Caps <$ char 'c'
+ , Full <$ char 'f'
+ ]
+
+data CiteStyle =
+ NoAuthorStyle
+ | LocatorsStyle
+ | NociteStyle
+ | TextStyle
+ | DefStyle
+ deriving Show
+
+data CiteVariant =
+ Caps
+ | Bare
+ | Full
+ deriving Show
+
+
+spnl :: PandocMonad m => OrgParser m ()
+spnl =
+ skipSpaces *> optional (newline *> notFollowedBy blankline *> skipSpaces)
cite :: PandocMonad m => OrgParser m (F Inlines)
-cite = try $ berkeleyCite <|> do
+cite = do
guardEnabled Ext_citations
- (cs, raw) <- withRaw $ choice
- [ pandocOrgCite
+ (cs, raw) <- withRaw $ try $ choice
+ [ orgCite
, orgRefCite
- , berkeleyTextualCite
]
return $ flip B.cite (B.text raw) <$> cs
--- | A citation in Pandoc Org-mode style (@[prefix \@citekey suffix]@).
-pandocOrgCite :: PandocMonad m => OrgParser m (F [Citation])
-pandocOrgCite = try $
- char '[' *> skipSpaces *> citeList <* skipSpaces <* char ']'
+-- org-ref
orgRefCite :: PandocMonad m => OrgParser m (F [Citation])
orgRefCite = try $ choice
@@ -201,100 +346,6 @@ normalOrgRefCite = try $ do
, citationHash = 0
}
--- | Read an Berkeley-style Org-mode citation. Berkeley citation style was
--- develop and adjusted to Org-mode style by John MacFarlane and Richard
--- Lawrence, respectively, both philosophers at UC Berkeley.
-berkeleyCite :: PandocMonad m => OrgParser m (F Inlines)
-berkeleyCite = try $ do
- bcl <- berkeleyCitationList
- return $ do
- parens <- berkeleyCiteParens <$> bcl
- prefix <- berkeleyCiteCommonPrefix <$> bcl
- suffix <- berkeleyCiteCommonSuffix <$> bcl
- citationList <- berkeleyCiteCitations <$> bcl
- return $
- if parens
- then toCite
- . maybe id (alterFirst . prependPrefix) prefix
- . maybe id (alterLast . appendSuffix) suffix
- $ citationList
- else maybe mempty (<> " ") prefix
- <> toListOfCites (map toInTextMode citationList)
- <> maybe mempty (", " <>) suffix
- where
- toCite :: [Citation] -> Inlines
- toCite cs = B.cite cs mempty
-
- toListOfCites :: [Citation] -> Inlines
- toListOfCites = mconcat . intersperse ", " . map (\c -> B.cite [c] mempty)
-
- toInTextMode :: Citation -> Citation
- toInTextMode c = c { citationMode = AuthorInText }
-
- alterFirst, alterLast :: (a -> a) -> [a] -> [a]
- alterFirst _ [] = []
- alterFirst f (c:cs) = f c : cs
- alterLast f = reverse . alterFirst f . reverse
-
- prependPrefix, appendSuffix :: Inlines -> Citation -> Citation
- prependPrefix pre c = c { citationPrefix = B.toList pre <> citationPrefix c }
- appendSuffix suf c = c { citationSuffix = citationSuffix c <> B.toList suf }
-
-data BerkeleyCitationList = BerkeleyCitationList
- { berkeleyCiteParens :: Bool
- , berkeleyCiteCommonPrefix :: Maybe Inlines
- , berkeleyCiteCommonSuffix :: Maybe Inlines
- , berkeleyCiteCitations :: [Citation]
- }
-berkeleyCitationList :: PandocMonad m => OrgParser m (F BerkeleyCitationList)
-berkeleyCitationList = try $ do
- char '['
- parens <- choice [ False <$ berkeleyBareTag, True <$ berkeleyParensTag ]
- char ':'
- skipSpaces
- commonPrefix <- optionMaybe (try $ citationListPart <* char ';')
- citations <- citeList
- commonSuffix <- optionMaybe (try citationListPart)
- char ']'
- return (BerkeleyCitationList parens
- <$> sequence commonPrefix
- <*> sequence commonSuffix
- <*> citations)
- where
- citationListPart :: PandocMonad m => OrgParser m (F Inlines)
- citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do
- notFollowedBy' $ citeKey False
- notFollowedBy (oneOf ";]")
- inline
-
-berkeleyBareTag :: PandocMonad m => OrgParser m ()
-berkeleyBareTag = try $ void berkeleyBareTag'
-
-berkeleyParensTag :: PandocMonad m => OrgParser m ()
-berkeleyParensTag = try . void $ enclosedByPair1 '(' ')' berkeleyBareTag'
-
-berkeleyBareTag' :: PandocMonad m => OrgParser m ()
-berkeleyBareTag' = try $ void (string "cite")
-
-berkeleyTextualCite :: PandocMonad m => OrgParser m (F [Citation])
-berkeleyTextualCite = try $ do
- (suppressAuthor, key) <- citeKey False
- returnF . return $ Citation
- { citationId = key
- , citationPrefix = mempty
- , citationSuffix = mempty
- , citationMode = if suppressAuthor then SuppressAuthor else AuthorInText
- , citationNoteNum = 0
- , citationHash = 0
- }
-
--- The following is what a Berkeley-style bracketed textual citation parser
--- would look like. However, as these citations are a subset of Pandoc's Org
--- citation style, this isn't used.
--- berkeleyBracketedTextualCite :: PandocMonad m => OrgParser m (F [Citation])
--- berkeleyBracketedTextualCite = try . (fmap head) $
--- enclosedByPair1 '[' ']' berkeleyTextualCite
-
-- | Read a link-like org-ref style citation. The citation includes pre and
-- post text. However, multiple citations are not possible due to limitations
-- in the syntax.
@@ -345,39 +396,6 @@ orgRefCiteMode =
, ("citeyear", SuppressAuthor)
]
-citeList :: PandocMonad m => OrgParser m (F [Citation])
-citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces)
-
-citation :: PandocMonad m => OrgParser m (F Citation)
-citation = try $ do
- pref <- prefix
- (suppress_author, key) <- citeKey False
- suff <- suffix
- return $ do
- x <- pref
- y <- suff
- return Citation
- { citationId = key
- , citationPrefix = B.toList x
- , citationSuffix = B.toList y
- , citationMode = if suppress_author
- then SuppressAuthor
- else NormalCitation
- , citationNoteNum = 0
- , citationHash = 0
- }
- where
- prefix = trimInlinesF . mconcat <$>
- manyTill inline (char ']' <|> (']' <$ lookAhead (citeKey False)))
- suffix = try $ do
- hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
- skipSpaces
- rest <- trimInlinesF . mconcat <$>
- many (notFollowedBy (oneOf ";]") *> inline)
- return $ if hasSpace
- then (B.space <>) <$> rest
- else rest
-
footnote :: PandocMonad m => OrgParser m (F Inlines)
footnote = try $ do
note <- inlineNote <|> referencedNote