aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Org
diff options
context:
space:
mode:
authorIgor Pashev <pashev.igor@gmail.com>2021-12-29 15:00:59 +0200
committerIgor Pashev <pashev.igor@gmail.com>2021-12-29 15:00:59 +0200
commitb4361712899fd0183fea5513180cb383979616de (patch)
tree688ab7ee2ab3a8cd32b4e37b506099aec95388f7 /src/Text/Pandoc/Readers/Org
parent726ad97faebe59e024d68d293e663c02bbe423c8 (diff)
parentd960282b105a6469c760b4308a3b81da723b7256 (diff)
downloadpandoc-b4361712899fd0183fea5513180cb383979616de.tar.gz
Merge https://github.com/jgm/pandoc
Diffstat (limited to 'src/Text/Pandoc/Readers/Org')
-rw-r--r--src/Text/Pandoc/Readers/Org/Blocks.hs24
-rw-r--r--src/Text/Pandoc/Readers/Org/DocumentTree.hs12
-rw-r--r--src/Text/Pandoc/Readers/Org/Inlines.hs314
-rw-r--r--src/Text/Pandoc/Readers/Org/Meta.hs4
4 files changed, 192 insertions, 162 deletions
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index f18d2f9a7..9a689b0e8 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -474,15 +474,16 @@ figure = try $ do
figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
figKeyVals = blockAttrKeyValues figAttrs
attr = (figLabel, mempty, figKeyVals)
- figTitle = (if isFigure then withFigPrefix else id) figName
- in
- B.para . B.imageWith attr imgSrc figTitle <$> figCaption
-
- withFigPrefix :: Text -> Text
- withFigPrefix cs =
- if "fig:" `T.isPrefixOf` cs
- then cs
- else "fig:" <> cs
+ in if isFigure
+ then (\c ->
+ B.simpleFigureWith
+ attr c imgSrc (unstackFig figName)) <$> figCaption
+ else B.para . B.imageWith attr imgSrc figName <$> figCaption
+ unstackFig :: Text -> Text
+ unstackFig figName =
+ if "fig:" `T.isPrefixOf` figName
+ then T.drop 4 figName
+ else figName
-- | Succeeds if looking at the end of the current paragraph
endOfParagraph :: Monad m => OrgParser m ()
@@ -889,7 +890,10 @@ listItem parseIndentedMarker = try . withContext ListItemState $ do
firstLine <- anyLineNewline
blank <- option "" ("\n" <$ blankline)
rest <- T.concat <$> many (listContinuation markerLength)
- contents <- parseFromString blocks $ firstLine <> blank <> rest
+ contents <- parseFromString (do initial <- paraOrPlain <|> pure mempty
+ subsequent <- blocks
+ return $ initial <> subsequent)
+ (firstLine <> blank <> rest)
return (maybe id (prependInlines . checkboxToInlines) box <$> contents)
-- | Prepend inlines to blocks, adding them to the first paragraph or
diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
index 2dcbecb1d..1c4f253cc 100644
--- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs
+++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
@@ -41,6 +41,7 @@ documentTree :: PandocMonad m
-> OrgParser m (F Inlines)
-> OrgParser m (F Headline)
documentTree blocks inline = do
+ properties <- option mempty propertiesDrawer
initialBlocks <- blocks
headlines <- sequence <$> manyTill (headline blocks inline 1) eof
title <- fmap docTitle . orgStateMeta <$> getState
@@ -54,7 +55,7 @@ documentTree blocks inline = do
, headlineText = B.fromList title'
, headlineTags = mempty
, headlinePlanning = emptyPlanning
- , headlineProperties = mempty
+ , headlineProperties = properties
, headlineContents = initialBlocks'
, headlineChildren = headlines'
}
@@ -163,8 +164,15 @@ unprunedHeadlineToBlocks hdln st =
in if not usingSelectedTags ||
any (`Set.member` orgStateSelectTags st) (headlineTags rootNode')
then do headlineBlocks <- headlineToBlocks rootNode'
+ -- add metadata from root node :PROPERTIES:
+ updateState $ \s ->
+ s{ orgStateMeta = foldr
+ (\(PropertyKey k, PropertyValue v) m ->
+ B.setMeta k v <$> m)
+ (orgStateMeta s)
+ (headlineProperties rootNode') }
-- ignore first headline, it's the document's title
- return . drop 1 . B.toList $ headlineBlocks
+ return $ drop 1 $ B.toList headlineBlocks
else do headlineBlocks <- mconcat <$> mapM headlineToBlocks
(headlineChildren rootNode')
return . B.toList $ headlineBlocks
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index 6862dd71e..617f98a10 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -31,11 +31,10 @@ import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline)
import Text.TeXMath (DisplayType (..), readTeX, writePandoc)
import Text.Pandoc.Sources (ToSources(..))
import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
-
-import Control.Monad (guard, mplus, mzero, unless, void, when)
+import Safe (lastMay)
+import Control.Monad (guard, mplus, mzero, unless, when, void)
import Control.Monad.Trans (lift)
import Data.Char (isAlphaNum, isSpace)
-import Data.List (intersperse)
import qualified Data.Map as M
import Data.Text (Text)
import qualified Data.Text as T
@@ -148,31 +147,177 @@ endline = try $ do
-- Citations
--
--- The state of citations is a bit confusing due to the lack of an official
--- syntax and multiple syntaxes coexisting. The pandocOrgCite syntax was the
--- first to be implemented here and is almost identical to Markdown's citation
--- syntax. The org-ref package is in wide use to handle citations, but the
--- syntax is a bit limiting and not quite as simple to write. The
--- semi-official Org-mode citation syntax is based on John MacFarlane's Pandoc
--- sytax and Org-oriented enhancements contributed by Richard Lawrence and
--- others. It's dubbed Berkeley syntax due the place of activity of its main
--- contributors. All this should be consolidated once an official Org-mode
--- citation syntax has emerged.
+-- We first try to parse official org-cite citations, then fall
+-- back to org-ref citations (which are still in wide use).
+
+-- | A citation in org-cite style
+orgCite :: PandocMonad m => OrgParser m (F [Citation])
+orgCite = try $ do
+ string "[cite"
+ (sty, _variants) <- citeStyle
+ char ':'
+ spnl
+ globalPref <- option mempty (try (citePrefix <* char ';'))
+ items <- citeItems
+ globalSuff <- option mempty (try (char ';' *> citeSuffix))
+ spnl
+ char ']'
+ return $ adjustCiteStyle sty .
+ addPrefixToFirstItem globalPref .
+ addSuffixToLastItem globalSuff $ items
+
+adjustCiteStyle :: CiteStyle -> (F [Citation]) -> (F [Citation])
+adjustCiteStyle sty cs = do
+ cs' <- cs
+ case cs' of
+ [] -> return []
+ (d:ds) -- TODO needs refinement
+ -> case sty of
+ TextStyle -> return $ d{ citationMode = AuthorInText
+ , citationSuffix = dropWhile (== Space)
+ (citationSuffix d)} : ds
+ NoAuthorStyle -> return $ d{ citationMode = SuppressAuthor } : ds
+ _ -> return (d:ds)
+
+addPrefixToFirstItem :: (F Inlines) -> (F [Citation]) -> (F [Citation])
+addPrefixToFirstItem aff cs = do
+ cs' <- cs
+ aff' <- aff
+ case cs' of
+ [] -> return []
+ (d:ds) -> return (d{ citationPrefix =
+ B.toList aff' <> citationPrefix d }:ds)
+
+addSuffixToLastItem :: (F Inlines) -> (F [Citation]) -> (F [Citation])
+addSuffixToLastItem aff cs = do
+ cs' <- cs
+ aff' <- aff
+ case lastMay cs' of
+ Nothing -> return cs'
+ Just d ->
+ return (init cs' ++ [d{ citationSuffix =
+ citationSuffix d <> B.toList aff' }])
+
+citeItems :: PandocMonad m => OrgParser m (F [Citation])
+citeItems = sequence <$> citeItem `sepBy1` (char ';')
+
+citeItem :: PandocMonad m => OrgParser m (F Citation)
+citeItem = do
+ pref <- citePrefix
+ itemKey <- orgCiteKey
+ suff <- citeSuffix
+ return $ do
+ pre' <- pref
+ suf' <- suff
+ return Citation
+ { citationId = itemKey
+ , citationPrefix = B.toList pre'
+ , citationSuffix = B.toList suf'
+ , citationMode = NormalCitation
+ , citationNoteNum = 0
+ , citationHash = 0
+ }
+
+orgCiteKey :: PandocMonad m => OrgParser m Text
+orgCiteKey = do
+ char '@'
+ T.pack <$> many1 (satisfy orgCiteKeyChar)
+
+orgCiteKeyChar :: Char -> Bool
+orgCiteKeyChar c =
+ isAlphaNum c || c `elem` ['.',':','?','!','`','\'','/','*','@','+','|',
+ '(',')','{','}','<','>','&','_','^','$','#',
+ '%','~','-']
+
+rawAffix :: PandocMonad m => Bool -> OrgParser m Text
+rawAffix isPrefix = snd <$> withRaw
+ (many
+ (affixChar
+ <|>
+ try (void (char '[' >> rawAffix isPrefix >> char ']'))))
+ where
+ affixChar = void $ satisfy $ \c ->
+ not (c == '^' || c == ';' || c == '[' || c == ']') &&
+ (not isPrefix || c /= '@')
+
+citePrefix :: PandocMonad m => OrgParser m (F Inlines)
+citePrefix =
+ rawAffix True >>= parseFromString (trimInlinesF . mconcat <$> many inline)
+
+citeSuffix :: PandocMonad m => OrgParser m (F Inlines)
+citeSuffix =
+ rawAffix False >>= parseFromString parseSuffix
+ where
+ parseSuffix = do
+ hasSpace <- option False
+ (True <$ try (spaceChar >> skipSpaces >> lookAhead nonspaceChar))
+ ils <- trimInlinesF . mconcat <$> many inline
+ return $ if hasSpace
+ then (B.space <>) <$> ils
+ else ils
+
+citeStyle :: PandocMonad m => OrgParser m (CiteStyle, [CiteVariant])
+citeStyle = option (DefStyle, []) $ do
+ sty <- option DefStyle $ try $ char '/' *> orgCiteStyle
+ variants <- option [] $ try $ char '/' *> orgCiteVariants
+ return (sty, variants)
+
+orgCiteStyle :: PandocMonad m => OrgParser m CiteStyle
+orgCiteStyle = choice $ map try
+ [ NoAuthorStyle <$ string "noauthor"
+ , NoAuthorStyle <$ string "na"
+ , LocatorsStyle <$ string "locators"
+ , LocatorsStyle <$ char 'l'
+ , NociteStyle <$ string "nocite"
+ , NociteStyle <$ char 'n'
+ , TextStyle <$ string "text"
+ , TextStyle <$ char 't'
+ ]
+
+orgCiteVariants :: PandocMonad m => OrgParser m [CiteVariant]
+orgCiteVariants =
+ (fullnameVariant `sepBy1` (char '-')) <|> (many1 onecharVariant)
+ where
+ fullnameVariant = choice $ map try
+ [ Bare <$ string "bare"
+ , Caps <$ string "caps"
+ , Full <$ string "full"
+ ]
+ onecharVariant = choice
+ [ Bare <$ char 'b'
+ , Caps <$ char 'c'
+ , Full <$ char 'f'
+ ]
+
+data CiteStyle =
+ NoAuthorStyle
+ | LocatorsStyle
+ | NociteStyle
+ | TextStyle
+ | DefStyle
+ deriving Show
+
+data CiteVariant =
+ Caps
+ | Bare
+ | Full
+ deriving Show
+
+
+spnl :: PandocMonad m => OrgParser m ()
+spnl =
+ skipSpaces *> optional (newline *> notFollowedBy blankline *> skipSpaces)
cite :: PandocMonad m => OrgParser m (F Inlines)
-cite = try $ berkeleyCite <|> do
+cite = do
guardEnabled Ext_citations
- (cs, raw) <- withRaw $ choice
- [ pandocOrgCite
+ (cs, raw) <- withRaw $ try $ choice
+ [ orgCite
, orgRefCite
- , berkeleyTextualCite
]
return $ flip B.cite (B.text raw) <$> cs
--- | A citation in Pandoc Org-mode style (@[prefix \@citekey suffix]@).
-pandocOrgCite :: PandocMonad m => OrgParser m (F [Citation])
-pandocOrgCite = try $
- char '[' *> skipSpaces *> citeList <* skipSpaces <* char ']'
+-- org-ref
orgRefCite :: PandocMonad m => OrgParser m (F [Citation])
orgRefCite = try $ choice
@@ -201,100 +346,6 @@ normalOrgRefCite = try $ do
, citationHash = 0
}
--- | Read an Berkeley-style Org-mode citation. Berkeley citation style was
--- develop and adjusted to Org-mode style by John MacFarlane and Richard
--- Lawrence, respectively, both philosophers at UC Berkeley.
-berkeleyCite :: PandocMonad m => OrgParser m (F Inlines)
-berkeleyCite = try $ do
- bcl <- berkeleyCitationList
- return $ do
- parens <- berkeleyCiteParens <$> bcl
- prefix <- berkeleyCiteCommonPrefix <$> bcl
- suffix <- berkeleyCiteCommonSuffix <$> bcl
- citationList <- berkeleyCiteCitations <$> bcl
- return $
- if parens
- then toCite
- . maybe id (alterFirst . prependPrefix) prefix
- . maybe id (alterLast . appendSuffix) suffix
- $ citationList
- else maybe mempty (<> " ") prefix
- <> toListOfCites (map toInTextMode citationList)
- <> maybe mempty (", " <>) suffix
- where
- toCite :: [Citation] -> Inlines
- toCite cs = B.cite cs mempty
-
- toListOfCites :: [Citation] -> Inlines
- toListOfCites = mconcat . intersperse ", " . map (\c -> B.cite [c] mempty)
-
- toInTextMode :: Citation -> Citation
- toInTextMode c = c { citationMode = AuthorInText }
-
- alterFirst, alterLast :: (a -> a) -> [a] -> [a]
- alterFirst _ [] = []
- alterFirst f (c:cs) = f c : cs
- alterLast f = reverse . alterFirst f . reverse
-
- prependPrefix, appendSuffix :: Inlines -> Citation -> Citation
- prependPrefix pre c = c { citationPrefix = B.toList pre <> citationPrefix c }
- appendSuffix suf c = c { citationSuffix = citationSuffix c <> B.toList suf }
-
-data BerkeleyCitationList = BerkeleyCitationList
- { berkeleyCiteParens :: Bool
- , berkeleyCiteCommonPrefix :: Maybe Inlines
- , berkeleyCiteCommonSuffix :: Maybe Inlines
- , berkeleyCiteCitations :: [Citation]
- }
-berkeleyCitationList :: PandocMonad m => OrgParser m (F BerkeleyCitationList)
-berkeleyCitationList = try $ do
- char '['
- parens <- choice [ False <$ berkeleyBareTag, True <$ berkeleyParensTag ]
- char ':'
- skipSpaces
- commonPrefix <- optionMaybe (try $ citationListPart <* char ';')
- citations <- citeList
- commonSuffix <- optionMaybe (try citationListPart)
- char ']'
- return (BerkeleyCitationList parens
- <$> sequence commonPrefix
- <*> sequence commonSuffix
- <*> citations)
- where
- citationListPart :: PandocMonad m => OrgParser m (F Inlines)
- citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do
- notFollowedBy' $ citeKey False
- notFollowedBy (oneOf ";]")
- inline
-
-berkeleyBareTag :: PandocMonad m => OrgParser m ()
-berkeleyBareTag = try $ void berkeleyBareTag'
-
-berkeleyParensTag :: PandocMonad m => OrgParser m ()
-berkeleyParensTag = try . void $ enclosedByPair1 '(' ')' berkeleyBareTag'
-
-berkeleyBareTag' :: PandocMonad m => OrgParser m ()
-berkeleyBareTag' = try $ void (string "cite")
-
-berkeleyTextualCite :: PandocMonad m => OrgParser m (F [Citation])
-berkeleyTextualCite = try $ do
- (suppressAuthor, key) <- citeKey False
- returnF . return $ Citation
- { citationId = key
- , citationPrefix = mempty
- , citationSuffix = mempty
- , citationMode = if suppressAuthor then SuppressAuthor else AuthorInText
- , citationNoteNum = 0
- , citationHash = 0
- }
-
--- The following is what a Berkeley-style bracketed textual citation parser
--- would look like. However, as these citations are a subset of Pandoc's Org
--- citation style, this isn't used.
--- berkeleyBracketedTextualCite :: PandocMonad m => OrgParser m (F [Citation])
--- berkeleyBracketedTextualCite = try . (fmap head) $
--- enclosedByPair1 '[' ']' berkeleyTextualCite
-
-- | Read a link-like org-ref style citation. The citation includes pre and
-- post text. However, multiple citations are not possible due to limitations
-- in the syntax.
@@ -345,39 +396,6 @@ orgRefCiteMode =
, ("citeyear", SuppressAuthor)
]
-citeList :: PandocMonad m => OrgParser m (F [Citation])
-citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces)
-
-citation :: PandocMonad m => OrgParser m (F Citation)
-citation = try $ do
- pref <- prefix
- (suppress_author, key) <- citeKey False
- suff <- suffix
- return $ do
- x <- pref
- y <- suff
- return Citation
- { citationId = key
- , citationPrefix = B.toList x
- , citationSuffix = B.toList y
- , citationMode = if suppress_author
- then SuppressAuthor
- else NormalCitation
- , citationNoteNum = 0
- , citationHash = 0
- }
- where
- prefix = trimInlinesF . mconcat <$>
- manyTill inline (char ']' <|> (']' <$ lookAhead (citeKey False)))
- suffix = try $ do
- hasSpace <- option False (notFollowedBy nonspaceChar >> return True)
- skipSpaces
- rest <- trimInlinesF . mconcat <$>
- many (notFollowedBy (oneOf ";]") *> inline)
- return $ if hasSpace
- then (B.space <>) <$> rest
- else rest
-
footnote :: PandocMonad m => OrgParser m (F Inlines)
footnote = try $ do
note <- inlineNote <|> referencedNote
diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs
index a1b21046a..ccb6744e7 100644
--- a/src/Text/Pandoc/Readers/Org/Meta.hs
+++ b/src/Text/Pandoc/Readers/Org/Meta.hs
@@ -27,13 +27,13 @@ import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class.PandocMonad (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Shared (blocksToInlines, safeRead)
+import Text.Pandoc.Network.HTTP (urlEncode)
import Control.Monad (mzero, void)
import Data.List (intercalate, intersperse)
import Data.Map (Map)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
-import Network.HTTP (urlEncode)
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Text as T
@@ -188,7 +188,7 @@ parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend
-- inefficient
replacePlain = try $ (\x -> T.concat . flip intersperse x)
<$> sequence [tillSpecifier 's', rest]
- replaceUrl = try $ (\x -> T.concat . flip intersperse x . T.pack . urlEncode . T.unpack)
+ replaceUrl = try $ (\x -> T.concat . flip intersperse x . urlEncode)
<$> sequence [tillSpecifier 'h', rest]
justAppend = try $ (<>) <$> rest