diff options
author | Igor Pashev <pashev.igor@gmail.com> | 2021-12-29 15:00:59 +0200 |
---|---|---|
committer | Igor Pashev <pashev.igor@gmail.com> | 2021-12-29 15:00:59 +0200 |
commit | b4361712899fd0183fea5513180cb383979616de (patch) | |
tree | 688ab7ee2ab3a8cd32b4e37b506099aec95388f7 /src/Text/Pandoc/Readers/Org/Inlines.hs | |
parent | 726ad97faebe59e024d68d293e663c02bbe423c8 (diff) | |
parent | d960282b105a6469c760b4308a3b81da723b7256 (diff) | |
download | pandoc-b4361712899fd0183fea5513180cb383979616de.tar.gz |
Merge https://github.com/jgm/pandoc
Diffstat (limited to 'src/Text/Pandoc/Readers/Org/Inlines.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Inlines.hs | 314 |
1 files changed, 166 insertions, 148 deletions
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 6862dd71e..617f98a10 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -31,11 +31,10 @@ import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) import Text.TeXMath (DisplayType (..), readTeX, writePandoc) import Text.Pandoc.Sources (ToSources(..)) import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap - -import Control.Monad (guard, mplus, mzero, unless, void, when) +import Safe (lastMay) +import Control.Monad (guard, mplus, mzero, unless, when, void) import Control.Monad.Trans (lift) import Data.Char (isAlphaNum, isSpace) -import Data.List (intersperse) import qualified Data.Map as M import Data.Text (Text) import qualified Data.Text as T @@ -148,31 +147,177 @@ endline = try $ do -- Citations -- --- The state of citations is a bit confusing due to the lack of an official --- syntax and multiple syntaxes coexisting. The pandocOrgCite syntax was the --- first to be implemented here and is almost identical to Markdown's citation --- syntax. The org-ref package is in wide use to handle citations, but the --- syntax is a bit limiting and not quite as simple to write. The --- semi-official Org-mode citation syntax is based on John MacFarlane's Pandoc --- sytax and Org-oriented enhancements contributed by Richard Lawrence and --- others. It's dubbed Berkeley syntax due the place of activity of its main --- contributors. All this should be consolidated once an official Org-mode --- citation syntax has emerged. +-- We first try to parse official org-cite citations, then fall +-- back to org-ref citations (which are still in wide use). + +-- | A citation in org-cite style +orgCite :: PandocMonad m => OrgParser m (F [Citation]) +orgCite = try $ do + string "[cite" + (sty, _variants) <- citeStyle + char ':' + spnl + globalPref <- option mempty (try (citePrefix <* char ';')) + items <- citeItems + globalSuff <- option mempty (try (char ';' *> citeSuffix)) + spnl + char ']' + return $ adjustCiteStyle sty . + addPrefixToFirstItem globalPref . + addSuffixToLastItem globalSuff $ items + +adjustCiteStyle :: CiteStyle -> (F [Citation]) -> (F [Citation]) +adjustCiteStyle sty cs = do + cs' <- cs + case cs' of + [] -> return [] + (d:ds) -- TODO needs refinement + -> case sty of + TextStyle -> return $ d{ citationMode = AuthorInText + , citationSuffix = dropWhile (== Space) + (citationSuffix d)} : ds + NoAuthorStyle -> return $ d{ citationMode = SuppressAuthor } : ds + _ -> return (d:ds) + +addPrefixToFirstItem :: (F Inlines) -> (F [Citation]) -> (F [Citation]) +addPrefixToFirstItem aff cs = do + cs' <- cs + aff' <- aff + case cs' of + [] -> return [] + (d:ds) -> return (d{ citationPrefix = + B.toList aff' <> citationPrefix d }:ds) + +addSuffixToLastItem :: (F Inlines) -> (F [Citation]) -> (F [Citation]) +addSuffixToLastItem aff cs = do + cs' <- cs + aff' <- aff + case lastMay cs' of + Nothing -> return cs' + Just d -> + return (init cs' ++ [d{ citationSuffix = + citationSuffix d <> B.toList aff' }]) + +citeItems :: PandocMonad m => OrgParser m (F [Citation]) +citeItems = sequence <$> citeItem `sepBy1` (char ';') + +citeItem :: PandocMonad m => OrgParser m (F Citation) +citeItem = do + pref <- citePrefix + itemKey <- orgCiteKey + suff <- citeSuffix + return $ do + pre' <- pref + suf' <- suff + return Citation + { citationId = itemKey + , citationPrefix = B.toList pre' + , citationSuffix = B.toList suf' + , citationMode = NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } + +orgCiteKey :: PandocMonad m => OrgParser m Text +orgCiteKey = do + char '@' + T.pack <$> many1 (satisfy orgCiteKeyChar) + +orgCiteKeyChar :: Char -> Bool +orgCiteKeyChar c = + isAlphaNum c || c `elem` ['.',':','?','!','`','\'','/','*','@','+','|', + '(',')','{','}','<','>','&','_','^','$','#', + '%','~','-'] + +rawAffix :: PandocMonad m => Bool -> OrgParser m Text +rawAffix isPrefix = snd <$> withRaw + (many + (affixChar + <|> + try (void (char '[' >> rawAffix isPrefix >> char ']')))) + where + affixChar = void $ satisfy $ \c -> + not (c == '^' || c == ';' || c == '[' || c == ']') && + (not isPrefix || c /= '@') + +citePrefix :: PandocMonad m => OrgParser m (F Inlines) +citePrefix = + rawAffix True >>= parseFromString (trimInlinesF . mconcat <$> many inline) + +citeSuffix :: PandocMonad m => OrgParser m (F Inlines) +citeSuffix = + rawAffix False >>= parseFromString parseSuffix + where + parseSuffix = do + hasSpace <- option False + (True <$ try (spaceChar >> skipSpaces >> lookAhead nonspaceChar)) + ils <- trimInlinesF . mconcat <$> many inline + return $ if hasSpace + then (B.space <>) <$> ils + else ils + +citeStyle :: PandocMonad m => OrgParser m (CiteStyle, [CiteVariant]) +citeStyle = option (DefStyle, []) $ do + sty <- option DefStyle $ try $ char '/' *> orgCiteStyle + variants <- option [] $ try $ char '/' *> orgCiteVariants + return (sty, variants) + +orgCiteStyle :: PandocMonad m => OrgParser m CiteStyle +orgCiteStyle = choice $ map try + [ NoAuthorStyle <$ string "noauthor" + , NoAuthorStyle <$ string "na" + , LocatorsStyle <$ string "locators" + , LocatorsStyle <$ char 'l' + , NociteStyle <$ string "nocite" + , NociteStyle <$ char 'n' + , TextStyle <$ string "text" + , TextStyle <$ char 't' + ] + +orgCiteVariants :: PandocMonad m => OrgParser m [CiteVariant] +orgCiteVariants = + (fullnameVariant `sepBy1` (char '-')) <|> (many1 onecharVariant) + where + fullnameVariant = choice $ map try + [ Bare <$ string "bare" + , Caps <$ string "caps" + , Full <$ string "full" + ] + onecharVariant = choice + [ Bare <$ char 'b' + , Caps <$ char 'c' + , Full <$ char 'f' + ] + +data CiteStyle = + NoAuthorStyle + | LocatorsStyle + | NociteStyle + | TextStyle + | DefStyle + deriving Show + +data CiteVariant = + Caps + | Bare + | Full + deriving Show + + +spnl :: PandocMonad m => OrgParser m () +spnl = + skipSpaces *> optional (newline *> notFollowedBy blankline *> skipSpaces) cite :: PandocMonad m => OrgParser m (F Inlines) -cite = try $ berkeleyCite <|> do +cite = do guardEnabled Ext_citations - (cs, raw) <- withRaw $ choice - [ pandocOrgCite + (cs, raw) <- withRaw $ try $ choice + [ orgCite , orgRefCite - , berkeleyTextualCite ] return $ flip B.cite (B.text raw) <$> cs --- | A citation in Pandoc Org-mode style (@[prefix \@citekey suffix]@). -pandocOrgCite :: PandocMonad m => OrgParser m (F [Citation]) -pandocOrgCite = try $ - char '[' *> skipSpaces *> citeList <* skipSpaces <* char ']' +-- org-ref orgRefCite :: PandocMonad m => OrgParser m (F [Citation]) orgRefCite = try $ choice @@ -201,100 +346,6 @@ normalOrgRefCite = try $ do , citationHash = 0 } --- | Read an Berkeley-style Org-mode citation. Berkeley citation style was --- develop and adjusted to Org-mode style by John MacFarlane and Richard --- Lawrence, respectively, both philosophers at UC Berkeley. -berkeleyCite :: PandocMonad m => OrgParser m (F Inlines) -berkeleyCite = try $ do - bcl <- berkeleyCitationList - return $ do - parens <- berkeleyCiteParens <$> bcl - prefix <- berkeleyCiteCommonPrefix <$> bcl - suffix <- berkeleyCiteCommonSuffix <$> bcl - citationList <- berkeleyCiteCitations <$> bcl - return $ - if parens - then toCite - . maybe id (alterFirst . prependPrefix) prefix - . maybe id (alterLast . appendSuffix) suffix - $ citationList - else maybe mempty (<> " ") prefix - <> toListOfCites (map toInTextMode citationList) - <> maybe mempty (", " <>) suffix - where - toCite :: [Citation] -> Inlines - toCite cs = B.cite cs mempty - - toListOfCites :: [Citation] -> Inlines - toListOfCites = mconcat . intersperse ", " . map (\c -> B.cite [c] mempty) - - toInTextMode :: Citation -> Citation - toInTextMode c = c { citationMode = AuthorInText } - - alterFirst, alterLast :: (a -> a) -> [a] -> [a] - alterFirst _ [] = [] - alterFirst f (c:cs) = f c : cs - alterLast f = reverse . alterFirst f . reverse - - prependPrefix, appendSuffix :: Inlines -> Citation -> Citation - prependPrefix pre c = c { citationPrefix = B.toList pre <> citationPrefix c } - appendSuffix suf c = c { citationSuffix = citationSuffix c <> B.toList suf } - -data BerkeleyCitationList = BerkeleyCitationList - { berkeleyCiteParens :: Bool - , berkeleyCiteCommonPrefix :: Maybe Inlines - , berkeleyCiteCommonSuffix :: Maybe Inlines - , berkeleyCiteCitations :: [Citation] - } -berkeleyCitationList :: PandocMonad m => OrgParser m (F BerkeleyCitationList) -berkeleyCitationList = try $ do - char '[' - parens <- choice [ False <$ berkeleyBareTag, True <$ berkeleyParensTag ] - char ':' - skipSpaces - commonPrefix <- optionMaybe (try $ citationListPart <* char ';') - citations <- citeList - commonSuffix <- optionMaybe (try citationListPart) - char ']' - return (BerkeleyCitationList parens - <$> sequence commonPrefix - <*> sequence commonSuffix - <*> citations) - where - citationListPart :: PandocMonad m => OrgParser m (F Inlines) - citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do - notFollowedBy' $ citeKey False - notFollowedBy (oneOf ";]") - inline - -berkeleyBareTag :: PandocMonad m => OrgParser m () -berkeleyBareTag = try $ void berkeleyBareTag' - -berkeleyParensTag :: PandocMonad m => OrgParser m () -berkeleyParensTag = try . void $ enclosedByPair1 '(' ')' berkeleyBareTag' - -berkeleyBareTag' :: PandocMonad m => OrgParser m () -berkeleyBareTag' = try $ void (string "cite") - -berkeleyTextualCite :: PandocMonad m => OrgParser m (F [Citation]) -berkeleyTextualCite = try $ do - (suppressAuthor, key) <- citeKey False - returnF . return $ Citation - { citationId = key - , citationPrefix = mempty - , citationSuffix = mempty - , citationMode = if suppressAuthor then SuppressAuthor else AuthorInText - , citationNoteNum = 0 - , citationHash = 0 - } - --- The following is what a Berkeley-style bracketed textual citation parser --- would look like. However, as these citations are a subset of Pandoc's Org --- citation style, this isn't used. --- berkeleyBracketedTextualCite :: PandocMonad m => OrgParser m (F [Citation]) --- berkeleyBracketedTextualCite = try . (fmap head) $ --- enclosedByPair1 '[' ']' berkeleyTextualCite - -- | Read a link-like org-ref style citation. The citation includes pre and -- post text. However, multiple citations are not possible due to limitations -- in the syntax. @@ -345,39 +396,6 @@ orgRefCiteMode = , ("citeyear", SuppressAuthor) ] -citeList :: PandocMonad m => OrgParser m (F [Citation]) -citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces) - -citation :: PandocMonad m => OrgParser m (F Citation) -citation = try $ do - pref <- prefix - (suppress_author, key) <- citeKey False - suff <- suffix - return $ do - x <- pref - y <- suff - return Citation - { citationId = key - , citationPrefix = B.toList x - , citationSuffix = B.toList y - , citationMode = if suppress_author - then SuppressAuthor - else NormalCitation - , citationNoteNum = 0 - , citationHash = 0 - } - where - prefix = trimInlinesF . mconcat <$> - manyTill inline (char ']' <|> (']' <$ lookAhead (citeKey False))) - suffix = try $ do - hasSpace <- option False (notFollowedBy nonspaceChar >> return True) - skipSpaces - rest <- trimInlinesF . mconcat <$> - many (notFollowedBy (oneOf ";]") *> inline) - return $ if hasSpace - then (B.space <>) <$> rest - else rest - footnote :: PandocMonad m => OrgParser m (F Inlines) footnote = try $ do note <- inlineNote <|> referencedNote |