diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/Org')
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Blocks.hs | 24 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/DocumentTree.hs | 12 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Inlines.hs | 314 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Meta.hs | 4 |
4 files changed, 192 insertions, 162 deletions
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index f18d2f9a7..9a689b0e8 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -474,15 +474,16 @@ figure = try $ do figCaption = fromMaybe mempty $ blockAttrCaption figAttrs figKeyVals = blockAttrKeyValues figAttrs attr = (figLabel, mempty, figKeyVals) - figTitle = (if isFigure then withFigPrefix else id) figName - in - B.para . B.imageWith attr imgSrc figTitle <$> figCaption - - withFigPrefix :: Text -> Text - withFigPrefix cs = - if "fig:" `T.isPrefixOf` cs - then cs - else "fig:" <> cs + in if isFigure + then (\c -> + B.simpleFigureWith + attr c imgSrc (unstackFig figName)) <$> figCaption + else B.para . B.imageWith attr imgSrc figName <$> figCaption + unstackFig :: Text -> Text + unstackFig figName = + if "fig:" `T.isPrefixOf` figName + then T.drop 4 figName + else figName -- | Succeeds if looking at the end of the current paragraph endOfParagraph :: Monad m => OrgParser m () @@ -889,7 +890,10 @@ listItem parseIndentedMarker = try . withContext ListItemState $ do firstLine <- anyLineNewline blank <- option "" ("\n" <$ blankline) rest <- T.concat <$> many (listContinuation markerLength) - contents <- parseFromString blocks $ firstLine <> blank <> rest + contents <- parseFromString (do initial <- paraOrPlain <|> pure mempty + subsequent <- blocks + return $ initial <> subsequent) + (firstLine <> blank <> rest) return (maybe id (prependInlines . checkboxToInlines) box <$> contents) -- | Prepend inlines to blocks, adding them to the first paragraph or diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs index 2dcbecb1d..1c4f253cc 100644 --- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs +++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs @@ -41,6 +41,7 @@ documentTree :: PandocMonad m -> OrgParser m (F Inlines) -> OrgParser m (F Headline) documentTree blocks inline = do + properties <- option mempty propertiesDrawer initialBlocks <- blocks headlines <- sequence <$> manyTill (headline blocks inline 1) eof title <- fmap docTitle . orgStateMeta <$> getState @@ -54,7 +55,7 @@ documentTree blocks inline = do , headlineText = B.fromList title' , headlineTags = mempty , headlinePlanning = emptyPlanning - , headlineProperties = mempty + , headlineProperties = properties , headlineContents = initialBlocks' , headlineChildren = headlines' } @@ -163,8 +164,15 @@ unprunedHeadlineToBlocks hdln st = in if not usingSelectedTags || any (`Set.member` orgStateSelectTags st) (headlineTags rootNode') then do headlineBlocks <- headlineToBlocks rootNode' + -- add metadata from root node :PROPERTIES: + updateState $ \s -> + s{ orgStateMeta = foldr + (\(PropertyKey k, PropertyValue v) m -> + B.setMeta k v <$> m) + (orgStateMeta s) + (headlineProperties rootNode') } -- ignore first headline, it's the document's title - return . drop 1 . B.toList $ headlineBlocks + return $ drop 1 $ B.toList headlineBlocks else do headlineBlocks <- mconcat <$> mapM headlineToBlocks (headlineChildren rootNode') return . B.toList $ headlineBlocks diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index 6862dd71e..617f98a10 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -31,11 +31,10 @@ import Text.Pandoc.Readers.LaTeX (inlineCommand, rawLaTeXInline) import Text.TeXMath (DisplayType (..), readTeX, writePandoc) import Text.Pandoc.Sources (ToSources(..)) import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap - -import Control.Monad (guard, mplus, mzero, unless, void, when) +import Safe (lastMay) +import Control.Monad (guard, mplus, mzero, unless, when, void) import Control.Monad.Trans (lift) import Data.Char (isAlphaNum, isSpace) -import Data.List (intersperse) import qualified Data.Map as M import Data.Text (Text) import qualified Data.Text as T @@ -148,31 +147,177 @@ endline = try $ do -- Citations -- --- The state of citations is a bit confusing due to the lack of an official --- syntax and multiple syntaxes coexisting. The pandocOrgCite syntax was the --- first to be implemented here and is almost identical to Markdown's citation --- syntax. The org-ref package is in wide use to handle citations, but the --- syntax is a bit limiting and not quite as simple to write. The --- semi-official Org-mode citation syntax is based on John MacFarlane's Pandoc --- sytax and Org-oriented enhancements contributed by Richard Lawrence and --- others. It's dubbed Berkeley syntax due the place of activity of its main --- contributors. All this should be consolidated once an official Org-mode --- citation syntax has emerged. +-- We first try to parse official org-cite citations, then fall +-- back to org-ref citations (which are still in wide use). + +-- | A citation in org-cite style +orgCite :: PandocMonad m => OrgParser m (F [Citation]) +orgCite = try $ do + string "[cite" + (sty, _variants) <- citeStyle + char ':' + spnl + globalPref <- option mempty (try (citePrefix <* char ';')) + items <- citeItems + globalSuff <- option mempty (try (char ';' *> citeSuffix)) + spnl + char ']' + return $ adjustCiteStyle sty . + addPrefixToFirstItem globalPref . + addSuffixToLastItem globalSuff $ items + +adjustCiteStyle :: CiteStyle -> (F [Citation]) -> (F [Citation]) +adjustCiteStyle sty cs = do + cs' <- cs + case cs' of + [] -> return [] + (d:ds) -- TODO needs refinement + -> case sty of + TextStyle -> return $ d{ citationMode = AuthorInText + , citationSuffix = dropWhile (== Space) + (citationSuffix d)} : ds + NoAuthorStyle -> return $ d{ citationMode = SuppressAuthor } : ds + _ -> return (d:ds) + +addPrefixToFirstItem :: (F Inlines) -> (F [Citation]) -> (F [Citation]) +addPrefixToFirstItem aff cs = do + cs' <- cs + aff' <- aff + case cs' of + [] -> return [] + (d:ds) -> return (d{ citationPrefix = + B.toList aff' <> citationPrefix d }:ds) + +addSuffixToLastItem :: (F Inlines) -> (F [Citation]) -> (F [Citation]) +addSuffixToLastItem aff cs = do + cs' <- cs + aff' <- aff + case lastMay cs' of + Nothing -> return cs' + Just d -> + return (init cs' ++ [d{ citationSuffix = + citationSuffix d <> B.toList aff' }]) + +citeItems :: PandocMonad m => OrgParser m (F [Citation]) +citeItems = sequence <$> citeItem `sepBy1` (char ';') + +citeItem :: PandocMonad m => OrgParser m (F Citation) +citeItem = do + pref <- citePrefix + itemKey <- orgCiteKey + suff <- citeSuffix + return $ do + pre' <- pref + suf' <- suff + return Citation + { citationId = itemKey + , citationPrefix = B.toList pre' + , citationSuffix = B.toList suf' + , citationMode = NormalCitation + , citationNoteNum = 0 + , citationHash = 0 + } + +orgCiteKey :: PandocMonad m => OrgParser m Text +orgCiteKey = do + char '@' + T.pack <$> many1 (satisfy orgCiteKeyChar) + +orgCiteKeyChar :: Char -> Bool +orgCiteKeyChar c = + isAlphaNum c || c `elem` ['.',':','?','!','`','\'','/','*','@','+','|', + '(',')','{','}','<','>','&','_','^','$','#', + '%','~','-'] + +rawAffix :: PandocMonad m => Bool -> OrgParser m Text +rawAffix isPrefix = snd <$> withRaw + (many + (affixChar + <|> + try (void (char '[' >> rawAffix isPrefix >> char ']')))) + where + affixChar = void $ satisfy $ \c -> + not (c == '^' || c == ';' || c == '[' || c == ']') && + (not isPrefix || c /= '@') + +citePrefix :: PandocMonad m => OrgParser m (F Inlines) +citePrefix = + rawAffix True >>= parseFromString (trimInlinesF . mconcat <$> many inline) + +citeSuffix :: PandocMonad m => OrgParser m (F Inlines) +citeSuffix = + rawAffix False >>= parseFromString parseSuffix + where + parseSuffix = do + hasSpace <- option False + (True <$ try (spaceChar >> skipSpaces >> lookAhead nonspaceChar)) + ils <- trimInlinesF . mconcat <$> many inline + return $ if hasSpace + then (B.space <>) <$> ils + else ils + +citeStyle :: PandocMonad m => OrgParser m (CiteStyle, [CiteVariant]) +citeStyle = option (DefStyle, []) $ do + sty <- option DefStyle $ try $ char '/' *> orgCiteStyle + variants <- option [] $ try $ char '/' *> orgCiteVariants + return (sty, variants) + +orgCiteStyle :: PandocMonad m => OrgParser m CiteStyle +orgCiteStyle = choice $ map try + [ NoAuthorStyle <$ string "noauthor" + , NoAuthorStyle <$ string "na" + , LocatorsStyle <$ string "locators" + , LocatorsStyle <$ char 'l' + , NociteStyle <$ string "nocite" + , NociteStyle <$ char 'n' + , TextStyle <$ string "text" + , TextStyle <$ char 't' + ] + +orgCiteVariants :: PandocMonad m => OrgParser m [CiteVariant] +orgCiteVariants = + (fullnameVariant `sepBy1` (char '-')) <|> (many1 onecharVariant) + where + fullnameVariant = choice $ map try + [ Bare <$ string "bare" + , Caps <$ string "caps" + , Full <$ string "full" + ] + onecharVariant = choice + [ Bare <$ char 'b' + , Caps <$ char 'c' + , Full <$ char 'f' + ] + +data CiteStyle = + NoAuthorStyle + | LocatorsStyle + | NociteStyle + | TextStyle + | DefStyle + deriving Show + +data CiteVariant = + Caps + | Bare + | Full + deriving Show + + +spnl :: PandocMonad m => OrgParser m () +spnl = + skipSpaces *> optional (newline *> notFollowedBy blankline *> skipSpaces) cite :: PandocMonad m => OrgParser m (F Inlines) -cite = try $ berkeleyCite <|> do +cite = do guardEnabled Ext_citations - (cs, raw) <- withRaw $ choice - [ pandocOrgCite + (cs, raw) <- withRaw $ try $ choice + [ orgCite , orgRefCite - , berkeleyTextualCite ] return $ flip B.cite (B.text raw) <$> cs --- | A citation in Pandoc Org-mode style (@[prefix \@citekey suffix]@). -pandocOrgCite :: PandocMonad m => OrgParser m (F [Citation]) -pandocOrgCite = try $ - char '[' *> skipSpaces *> citeList <* skipSpaces <* char ']' +-- org-ref orgRefCite :: PandocMonad m => OrgParser m (F [Citation]) orgRefCite = try $ choice @@ -201,100 +346,6 @@ normalOrgRefCite = try $ do , citationHash = 0 } --- | Read an Berkeley-style Org-mode citation. Berkeley citation style was --- develop and adjusted to Org-mode style by John MacFarlane and Richard --- Lawrence, respectively, both philosophers at UC Berkeley. -berkeleyCite :: PandocMonad m => OrgParser m (F Inlines) -berkeleyCite = try $ do - bcl <- berkeleyCitationList - return $ do - parens <- berkeleyCiteParens <$> bcl - prefix <- berkeleyCiteCommonPrefix <$> bcl - suffix <- berkeleyCiteCommonSuffix <$> bcl - citationList <- berkeleyCiteCitations <$> bcl - return $ - if parens - then toCite - . maybe id (alterFirst . prependPrefix) prefix - . maybe id (alterLast . appendSuffix) suffix - $ citationList - else maybe mempty (<> " ") prefix - <> toListOfCites (map toInTextMode citationList) - <> maybe mempty (", " <>) suffix - where - toCite :: [Citation] -> Inlines - toCite cs = B.cite cs mempty - - toListOfCites :: [Citation] -> Inlines - toListOfCites = mconcat . intersperse ", " . map (\c -> B.cite [c] mempty) - - toInTextMode :: Citation -> Citation - toInTextMode c = c { citationMode = AuthorInText } - - alterFirst, alterLast :: (a -> a) -> [a] -> [a] - alterFirst _ [] = [] - alterFirst f (c:cs) = f c : cs - alterLast f = reverse . alterFirst f . reverse - - prependPrefix, appendSuffix :: Inlines -> Citation -> Citation - prependPrefix pre c = c { citationPrefix = B.toList pre <> citationPrefix c } - appendSuffix suf c = c { citationSuffix = citationSuffix c <> B.toList suf } - -data BerkeleyCitationList = BerkeleyCitationList - { berkeleyCiteParens :: Bool - , berkeleyCiteCommonPrefix :: Maybe Inlines - , berkeleyCiteCommonSuffix :: Maybe Inlines - , berkeleyCiteCitations :: [Citation] - } -berkeleyCitationList :: PandocMonad m => OrgParser m (F BerkeleyCitationList) -berkeleyCitationList = try $ do - char '[' - parens <- choice [ False <$ berkeleyBareTag, True <$ berkeleyParensTag ] - char ':' - skipSpaces - commonPrefix <- optionMaybe (try $ citationListPart <* char ';') - citations <- citeList - commonSuffix <- optionMaybe (try citationListPart) - char ']' - return (BerkeleyCitationList parens - <$> sequence commonPrefix - <*> sequence commonSuffix - <*> citations) - where - citationListPart :: PandocMonad m => OrgParser m (F Inlines) - citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do - notFollowedBy' $ citeKey False - notFollowedBy (oneOf ";]") - inline - -berkeleyBareTag :: PandocMonad m => OrgParser m () -berkeleyBareTag = try $ void berkeleyBareTag' - -berkeleyParensTag :: PandocMonad m => OrgParser m () -berkeleyParensTag = try . void $ enclosedByPair1 '(' ')' berkeleyBareTag' - -berkeleyBareTag' :: PandocMonad m => OrgParser m () -berkeleyBareTag' = try $ void (string "cite") - -berkeleyTextualCite :: PandocMonad m => OrgParser m (F [Citation]) -berkeleyTextualCite = try $ do - (suppressAuthor, key) <- citeKey False - returnF . return $ Citation - { citationId = key - , citationPrefix = mempty - , citationSuffix = mempty - , citationMode = if suppressAuthor then SuppressAuthor else AuthorInText - , citationNoteNum = 0 - , citationHash = 0 - } - --- The following is what a Berkeley-style bracketed textual citation parser --- would look like. However, as these citations are a subset of Pandoc's Org --- citation style, this isn't used. --- berkeleyBracketedTextualCite :: PandocMonad m => OrgParser m (F [Citation]) --- berkeleyBracketedTextualCite = try . (fmap head) $ --- enclosedByPair1 '[' ']' berkeleyTextualCite - -- | Read a link-like org-ref style citation. The citation includes pre and -- post text. However, multiple citations are not possible due to limitations -- in the syntax. @@ -345,39 +396,6 @@ orgRefCiteMode = , ("citeyear", SuppressAuthor) ] -citeList :: PandocMonad m => OrgParser m (F [Citation]) -citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces) - -citation :: PandocMonad m => OrgParser m (F Citation) -citation = try $ do - pref <- prefix - (suppress_author, key) <- citeKey False - suff <- suffix - return $ do - x <- pref - y <- suff - return Citation - { citationId = key - , citationPrefix = B.toList x - , citationSuffix = B.toList y - , citationMode = if suppress_author - then SuppressAuthor - else NormalCitation - , citationNoteNum = 0 - , citationHash = 0 - } - where - prefix = trimInlinesF . mconcat <$> - manyTill inline (char ']' <|> (']' <$ lookAhead (citeKey False))) - suffix = try $ do - hasSpace <- option False (notFollowedBy nonspaceChar >> return True) - skipSpaces - rest <- trimInlinesF . mconcat <$> - many (notFollowedBy (oneOf ";]") *> inline) - return $ if hasSpace - then (B.space <>) <$> rest - else rest - footnote :: PandocMonad m => OrgParser m (F Inlines) footnote = try $ do note <- inlineNote <|> referencedNote diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index a1b21046a..ccb6744e7 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -27,13 +27,13 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class.PandocMonad (PandocMonad) import Text.Pandoc.Definition import Text.Pandoc.Shared (blocksToInlines, safeRead) +import Text.Pandoc.Network.HTTP (urlEncode) import Control.Monad (mzero, void) import Data.List (intercalate, intersperse) import Data.Map (Map) import Data.Maybe (fromMaybe) import Data.Text (Text) -import Network.HTTP (urlEncode) import qualified Data.Map as Map import qualified Data.Set as Set import qualified Data.Text as T @@ -188,7 +188,7 @@ parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend -- inefficient replacePlain = try $ (\x -> T.concat . flip intersperse x) <$> sequence [tillSpecifier 's', rest] - replaceUrl = try $ (\x -> T.concat . flip intersperse x . T.pack . urlEncode . T.unpack) + replaceUrl = try $ (\x -> T.concat . flip intersperse x . urlEncode) <$> sequence [tillSpecifier 'h', rest] justAppend = try $ (<>) <$> rest |