diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/Org/Inlines.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Inlines.hs | 880 |
1 files changed, 0 insertions, 880 deletions
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs deleted file mode 100644 index f3671641a..000000000 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ /dev/null @@ -1,880 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -{- -Copyright (C) 2014-2016 Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --} - -{- | - Module : Text.Pandoc.Readers.Org.Options - Copyright : Copyright (C) 2014-2016 Albert Krewinkel - License : GNU GPL, version 2 or above - - Maintainer : Albert Krewinkel <tarleb+pandoc@moltkeplatz.de> - -Parsers for Org-mode inline elements. --} -module Text.Pandoc.Readers.Org.Inlines - ( inline - , inlines - , addToNotesTable - , linkTarget - ) where - -import Text.Pandoc.Readers.Org.BlockStarts ( endOfBlock, noteMarker ) -import Text.Pandoc.Readers.Org.ParserState -import Text.Pandoc.Readers.Org.Parsing -import Text.Pandoc.Readers.Org.Shared - ( cleanLinkString, isImageFilename, rundocBlockClass - , toRundocAttrib, translateLang ) - -import qualified Text.Pandoc.Builder as B -import Text.Pandoc.Builder ( Inlines ) -import Text.Pandoc.Definition -import Text.Pandoc.Options -import Text.Pandoc.Readers.LaTeX ( inlineCommand, rawLaTeXInline ) -import Text.TeXMath ( readTeX, writePandoc, DisplayType(..) ) -import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap -import Text.Pandoc.Class (PandocMonad) - -import Prelude hiding (sequence) -import Control.Monad ( guard, mplus, mzero, when, void ) -import Control.Monad.Trans ( lift ) -import Data.Char ( isAlphaNum, isSpace ) -import Data.List ( intersperse ) -import Data.Maybe ( fromMaybe ) -import qualified Data.Map as M -import Data.Monoid ( (<>) ) -import Data.Traversable (sequence) - --- --- Functions acting on the parser state --- -recordAnchorId :: PandocMonad m => String -> OrgParser m () -recordAnchorId i = updateState $ \s -> - s{ orgStateAnchorIds = i : (orgStateAnchorIds s) } - -pushToInlineCharStack :: PandocMonad m => Char -> OrgParser m () -pushToInlineCharStack c = updateState $ \s -> - s{ orgStateEmphasisCharStack = c:orgStateEmphasisCharStack s } - -popInlineCharStack :: PandocMonad m => OrgParser m () -popInlineCharStack = updateState $ \s -> - s{ orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ s } - -surroundingEmphasisChar :: PandocMonad m => OrgParser m [Char] -surroundingEmphasisChar = - take 1 . drop 1 . orgStateEmphasisCharStack <$> getState - -startEmphasisNewlinesCounting :: PandocMonad m => Int -> OrgParser m () -startEmphasisNewlinesCounting maxNewlines = updateState $ \s -> - s{ orgStateEmphasisNewlines = Just maxNewlines } - -decEmphasisNewlinesCount :: PandocMonad m => OrgParser m () -decEmphasisNewlinesCount = updateState $ \s -> - s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s } - -newlinesCountWithinLimits :: PandocMonad m => OrgParser m Bool -newlinesCountWithinLimits = do - st <- getState - return $ ((< 0) <$> orgStateEmphasisNewlines st) /= Just True - -resetEmphasisNewlines :: PandocMonad m => OrgParser m () -resetEmphasisNewlines = updateState $ \s -> - s{ orgStateEmphasisNewlines = Nothing } - -addToNotesTable :: PandocMonad m => OrgNoteRecord -> OrgParser m () -addToNotesTable note = do - oldnotes <- orgStateNotes' <$> getState - updateState $ \s -> s{ orgStateNotes' = note:oldnotes } - --- | Parse a single Org-mode inline element -inline :: PandocMonad m => OrgParser m (F Inlines) -inline = - choice [ whitespace - , linebreak - , cite - , footnote - , linkOrImage - , anchor - , inlineCodeBlock - , str - , endline - , emphasizedText - , code - , math - , displayMath - , verbatim - , subscript - , superscript - , inlineLaTeX - , exportSnippet - , smart - , symbol - ] <* (guard =<< newlinesCountWithinLimits) - <?> "inline" - --- | Read the rest of the input as inlines. -inlines :: PandocMonad m => OrgParser m (F Inlines) -inlines = trimInlinesF . mconcat <$> many1 inline - --- treat these as potentially non-text when parsing inline: -specialChars :: [Char] -specialChars = "\"$'()*+-,./:;<=>@[\\]^_{|}~" - - -whitespace :: PandocMonad m => OrgParser m (F Inlines) -whitespace = pure B.space <$ skipMany1 spaceChar - <* updateLastPreCharPos - <* updateLastForbiddenCharPos - <?> "whitespace" - -linebreak :: PandocMonad m => OrgParser m (F Inlines) -linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline - -str :: PandocMonad m => OrgParser m (F Inlines) -str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") - <* updateLastStrPos - --- | An endline character that can be treated as a space, not a structural --- break. This should reflect the values of the Emacs variable --- @org-element-pagaraph-separate@. -endline :: PandocMonad m => OrgParser m (F Inlines) -endline = try $ do - newline - notFollowedBy' endOfBlock - decEmphasisNewlinesCount - guard =<< newlinesCountWithinLimits - updateLastPreCharPos - return . return $ B.softbreak - - --- --- Citations --- - --- The state of citations is a bit confusing due to the lack of an official --- syntax and multiple syntaxes coexisting. The pandocOrgCite syntax was the --- first to be implemented here and is almost identical to Markdown's citation --- syntax. The org-ref package is in wide use to handle citations, but the --- syntax is a bit limiting and not quite as simple to write. The --- semi-offical Org-mode citation syntax is based on John MacFarlane's Pandoc --- sytax and Org-oriented enhancements contributed by Richard Lawrence and --- others. It's dubbed Berkeley syntax due the place of activity of its main --- contributors. All this should be consolidated once an official Org-mode --- citation syntax has emerged. - -cite :: PandocMonad m => OrgParser m (F Inlines) -cite = try $ berkeleyCite <|> do - guardEnabled Ext_citations - (cs, raw) <- withRaw $ choice - [ pandocOrgCite - , orgRefCite - , berkeleyTextualCite - ] - return $ (flip B.cite (B.text raw)) <$> cs - --- | A citation in Pandoc Org-mode style (@[prefix \@citekey suffix]@). -pandocOrgCite :: PandocMonad m => OrgParser m (F [Citation]) -pandocOrgCite = try $ - char '[' *> skipSpaces *> citeList <* skipSpaces <* char ']' - -orgRefCite :: PandocMonad m => OrgParser m (F [Citation]) -orgRefCite = try $ choice - [ normalOrgRefCite - , fmap (:[]) <$> linkLikeOrgRefCite - ] - -normalOrgRefCite :: PandocMonad m => OrgParser m (F [Citation]) -normalOrgRefCite = try $ do - mode <- orgRefCiteMode - firstCitation <- orgRefCiteList mode - moreCitations <- many (try $ char ',' *> orgRefCiteList mode) - return . sequence $ firstCitation : moreCitations - where - -- | A list of org-ref style citation keys, parsed as citation of the given - -- citation mode. - orgRefCiteList :: PandocMonad m => CitationMode -> OrgParser m (F Citation) - orgRefCiteList citeMode = try $ do - key <- orgRefCiteKey - returnF $ Citation - { citationId = key - , citationPrefix = mempty - , citationSuffix = mempty - , citationMode = citeMode - , citationNoteNum = 0 - , citationHash = 0 - } - --- | Read an Berkeley-style Org-mode citation. Berkeley citation style was --- develop and adjusted to Org-mode style by John MacFarlane and Richard --- Lawrence, respectively, both philosophers at UC Berkeley. -berkeleyCite :: PandocMonad m => OrgParser m (F Inlines) -berkeleyCite = try $ do - bcl <- berkeleyCitationList - return $ do - parens <- berkeleyCiteParens <$> bcl - prefix <- berkeleyCiteCommonPrefix <$> bcl - suffix <- berkeleyCiteCommonSuffix <$> bcl - citationList <- berkeleyCiteCitations <$> bcl - return $ - if parens - then toCite - . maybe id (\p -> alterFirst (prependPrefix p)) prefix - . maybe id (\s -> alterLast (appendSuffix s)) suffix - $ citationList - else maybe mempty (<> " ") prefix - <> (toListOfCites $ map toInTextMode citationList) - <> maybe mempty (", " <>) suffix - where - toCite :: [Citation] -> Inlines - toCite cs = B.cite cs mempty - - toListOfCites :: [Citation] -> Inlines - toListOfCites = mconcat . intersperse ", " . map (\c -> B.cite [c] mempty) - - toInTextMode :: Citation -> Citation - toInTextMode c = c { citationMode = AuthorInText } - - alterFirst, alterLast :: (a -> a) -> [a] -> [a] - alterFirst _ [] = [] - alterFirst f (c:cs) = (f c):cs - alterLast f = reverse . alterFirst f . reverse - - prependPrefix, appendSuffix :: Inlines -> Citation -> Citation - prependPrefix pre c = c { citationPrefix = B.toList pre <> citationPrefix c } - appendSuffix suf c = c { citationSuffix = citationSuffix c <> B.toList suf } - -data BerkeleyCitationList = BerkeleyCitationList - { berkeleyCiteParens :: Bool - , berkeleyCiteCommonPrefix :: Maybe Inlines - , berkeleyCiteCommonSuffix :: Maybe Inlines - , berkeleyCiteCitations :: [Citation] - } -berkeleyCitationList :: PandocMonad m => OrgParser m (F BerkeleyCitationList) -berkeleyCitationList = try $ do - char '[' - parens <- choice [ False <$ berkeleyBareTag, True <$ berkeleyParensTag ] - char ':' - skipSpaces - commonPrefix <- optionMaybe (try $ citationListPart <* char ';') - citations <- citeList - commonSuffix <- optionMaybe (try $ citationListPart) - char ']' - return (BerkeleyCitationList parens - <$> sequence commonPrefix - <*> sequence commonSuffix - <*> citations) - where - citationListPart :: PandocMonad m => OrgParser m (F Inlines) - citationListPart = fmap (trimInlinesF . mconcat) . try . many1 $ do - notFollowedBy' citeKey - notFollowedBy (oneOf ";]") - inline - -berkeleyBareTag :: PandocMonad m => OrgParser m () -berkeleyBareTag = try $ void berkeleyBareTag' - -berkeleyParensTag :: PandocMonad m => OrgParser m () -berkeleyParensTag = try . void $ enclosedByPair '(' ')' berkeleyBareTag' - -berkeleyBareTag' :: PandocMonad m => OrgParser m () -berkeleyBareTag' = try $ void (string "cite") - -berkeleyTextualCite :: PandocMonad m => OrgParser m (F [Citation]) -berkeleyTextualCite = try $ do - (suppressAuthor, key) <- citeKey - returnF . return $ Citation - { citationId = key - , citationPrefix = mempty - , citationSuffix = mempty - , citationMode = if suppressAuthor then SuppressAuthor else AuthorInText - , citationNoteNum = 0 - , citationHash = 0 - } - --- The following is what a Berkeley-style bracketed textual citation parser --- would look like. However, as these citations are a subset of Pandoc's Org --- citation style, this isn't used. --- berkeleyBracketedTextualCite :: PandocMonad m => OrgParser m (F [Citation]) --- berkeleyBracketedTextualCite = try . (fmap head) $ --- enclosedByPair '[' ']' berkeleyTextualCite - --- | Read a link-like org-ref style citation. The citation includes pre and --- post text. However, multiple citations are not possible due to limitations --- in the syntax. -linkLikeOrgRefCite :: PandocMonad m => OrgParser m (F Citation) -linkLikeOrgRefCite = try $ do - _ <- string "[[" - mode <- orgRefCiteMode - key <- orgRefCiteKey - _ <- string "][" - pre <- trimInlinesF . mconcat <$> manyTill inline (try $ string "::") - spc <- option False (True <$ spaceChar) - suf <- trimInlinesF . mconcat <$> manyTill inline (try $ string "]]") - return $ do - pre' <- pre - suf' <- suf - return Citation - { citationId = key - , citationPrefix = B.toList pre' - , citationSuffix = B.toList (if spc then B.space <> suf' else suf') - , citationMode = mode - , citationNoteNum = 0 - , citationHash = 0 - } - --- | Read a citation key. The characters allowed in citation keys are taken --- from the `org-ref-cite-re` variable in `org-ref.el`. -orgRefCiteKey :: PandocMonad m => OrgParser m String -orgRefCiteKey = try . many1 . satisfy $ \c -> - isAlphaNum c || c `elem` ("-_:\\./"::String) - --- | Supported citation types. Only a small subset of org-ref types is --- supported for now. TODO: rewrite this, use LaTeX reader as template. -orgRefCiteMode :: PandocMonad m => OrgParser m CitationMode -orgRefCiteMode = - choice $ map (\(s, mode) -> mode <$ try (string s <* char ':')) - [ ("cite", AuthorInText) - , ("citep", NormalCitation) - , ("citep*", NormalCitation) - , ("citet", AuthorInText) - , ("citet*", AuthorInText) - , ("citeyear", SuppressAuthor) - ] - -citeList :: PandocMonad m => OrgParser m (F [Citation]) -citeList = sequence <$> sepEndBy1 citation (try $ char ';' *> skipSpaces) - -citation :: PandocMonad m => OrgParser m (F Citation) -citation = try $ do - pref <- prefix - (suppress_author, key) <- citeKey - suff <- suffix - return $ do - x <- pref - y <- suff - return $ Citation{ citationId = key - , citationPrefix = B.toList x - , citationSuffix = B.toList y - , citationMode = if suppress_author - then SuppressAuthor - else NormalCitation - , citationNoteNum = 0 - , citationHash = 0 - } - where - prefix = trimInlinesF . mconcat <$> - manyTill inline (char ']' <|> (']' <$ lookAhead citeKey)) - suffix = try $ do - hasSpace <- option False (notFollowedBy nonspaceChar >> return True) - skipSpaces - rest <- trimInlinesF . mconcat <$> - many (notFollowedBy (oneOf ";]") *> inline) - return $ if hasSpace - then (B.space <>) <$> rest - else rest - -footnote :: PandocMonad m => OrgParser m (F Inlines) -footnote = try $ inlineNote <|> referencedNote - -inlineNote :: PandocMonad m => OrgParser m (F Inlines) -inlineNote = try $ do - string "[fn:" - ref <- many alphaNum - char ':' - note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']') - when (not $ null ref) $ - addToNotesTable ("fn:" ++ ref, note) - return $ B.note <$> note - -referencedNote :: PandocMonad m => OrgParser m (F Inlines) -referencedNote = try $ do - ref <- noteMarker - return $ do - notes <- asksF orgStateNotes' - case lookup ref notes of - Nothing -> return $ B.str $ "[" ++ ref ++ "]" - Just contents -> do - st <- askF - let contents' = runF contents st{ orgStateNotes' = [] } - return $ B.note contents' - -linkOrImage :: PandocMonad m => OrgParser m (F Inlines) -linkOrImage = explicitOrImageLink - <|> selflinkOrImage - <|> angleLink - <|> plainLink - <?> "link or image" - -explicitOrImageLink :: PandocMonad m => OrgParser m (F Inlines) -explicitOrImageLink = try $ do - char '[' - srcF <- applyCustomLinkFormat =<< possiblyEmptyLinkTarget - title <- enclosedRaw (char '[') (char ']') - title' <- parseFromString (mconcat <$> many inline) title - char ']' - return $ do - src <- srcF - case cleanLinkString title of - Just imgSrc | isImageFilename imgSrc -> - pure $ B.link src "" $ B.image imgSrc mempty mempty - _ -> - linkToInlinesF src =<< title' - -selflinkOrImage :: PandocMonad m => OrgParser m (F Inlines) -selflinkOrImage = try $ do - src <- char '[' *> linkTarget <* char ']' - return $ linkToInlinesF src (B.str src) - -plainLink :: PandocMonad m => OrgParser m (F Inlines) -plainLink = try $ do - (orig, src) <- uri - returnF $ B.link src "" (B.str orig) - -angleLink :: PandocMonad m => OrgParser m (F Inlines) -angleLink = try $ do - char '<' - link <- plainLink - char '>' - return link - -linkTarget :: PandocMonad m => OrgParser m String -linkTarget = enclosedByPair '[' ']' (noneOf "\n\r[]") - -possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m String -possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]") - -applyCustomLinkFormat :: String -> OrgParser m (F String) -applyCustomLinkFormat link = do - let (linkType, rest) = break (== ':') link - return $ do - formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters - return $ maybe link ($ drop 1 rest) formatter - --- | Take a link and return a function which produces new inlines when given --- description inlines. -linkToInlinesF :: String -> Inlines -> F Inlines -linkToInlinesF linkStr = - case linkStr of - "" -> pure . B.link mempty "" -- wiki link (empty by convention) - ('#':_) -> pure . B.link linkStr "" -- document-local fraction - _ -> case cleanLinkString linkStr of - (Just cleanedLink) -> if isImageFilename cleanedLink - then const . pure $ B.image cleanedLink "" "" - else pure . B.link cleanedLink "" - Nothing -> internalLink linkStr -- other internal link - -internalLink :: String -> Inlines -> F Inlines -internalLink link title = do - anchorB <- (link `elem`) <$> asksF orgStateAnchorIds - if anchorB - then return $ B.link ('#':link) "" title - else return $ B.emph title - --- | Parse an anchor like @<<anchor-id>>@ and return an empty span with --- @anchor-id@ set as id. Legal anchors in org-mode are defined through --- @org-target-regexp@, which is fairly liberal. Since no link is created if --- @anchor-id@ contains spaces, we are more restrictive in what is accepted as --- an anchor. - -anchor :: PandocMonad m => OrgParser m (F Inlines) -anchor = try $ do - anchorId <- parseAnchor - recordAnchorId anchorId - returnF $ B.spanWith (solidify anchorId, [], []) mempty - where - parseAnchor = string "<<" - *> many1 (noneOf "\t\n\r<>\"' ") - <* string ">>" - <* skipSpaces - --- | Replace every char but [a-zA-Z0-9_.-:] with a hypen '-'. This mirrors --- the org function @org-export-solidify-link-text@. - -solidify :: String -> String -solidify = map replaceSpecialChar - where replaceSpecialChar c - | isAlphaNum c = c - | c `elem` ("_.-:" :: String) = c - | otherwise = '-' - --- | Parses an inline code block and marks it as an babel block. -inlineCodeBlock :: PandocMonad m => OrgParser m (F Inlines) -inlineCodeBlock = try $ do - string "src_" - lang <- many1 orgArgWordChar - opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption - inlineCode <- enclosedByPair '{' '}' (noneOf "\n\r") - let attrClasses = [translateLang lang, rundocBlockClass] - let attrKeyVal = map toRundocAttrib (("language", lang) : opts) - returnF $ B.codeWith ("", attrClasses, attrKeyVal) inlineCode - where - inlineBlockOption :: PandocMonad m => OrgParser m (String, String) - inlineBlockOption = try $ do - argKey <- orgArgKey - paramValue <- option "yes" orgInlineParamValue - return (argKey, paramValue) - - orgInlineParamValue :: PandocMonad m => OrgParser m String - orgInlineParamValue = try $ - skipSpaces - *> notFollowedBy (char ':') - *> many1 (noneOf "\t\n\r ]") - <* skipSpaces - - -emphasizedText :: PandocMonad m => OrgParser m (F Inlines) -emphasizedText = do - state <- getState - guard . exportEmphasizedText . orgStateExportSettings $ state - try $ choice - [ emph - , strong - , strikeout - , underline - ] - -enclosedByPair :: PandocMonad m - => Char -- ^ opening char - -> Char -- ^ closing char - -> OrgParser m a -- ^ parser - -> OrgParser m [a] -enclosedByPair s e p = char s *> many1Till p (char e) - -emph :: PandocMonad m => OrgParser m (F Inlines) -emph = fmap B.emph <$> emphasisBetween '/' - -strong :: PandocMonad m => OrgParser m (F Inlines) -strong = fmap B.strong <$> emphasisBetween '*' - -strikeout :: PandocMonad m => OrgParser m (F Inlines) -strikeout = fmap B.strikeout <$> emphasisBetween '+' - --- There is no underline, so we use strong instead. -underline :: PandocMonad m => OrgParser m (F Inlines) -underline = fmap B.strong <$> emphasisBetween '_' - -verbatim :: PandocMonad m => OrgParser m (F Inlines) -verbatim = return . B.code <$> verbatimBetween '=' - -code :: PandocMonad m => OrgParser m (F Inlines) -code = return . B.code <$> verbatimBetween '~' - -subscript :: PandocMonad m => OrgParser m (F Inlines) -subscript = fmap B.subscript <$> try (char '_' *> subOrSuperExpr) - -superscript :: PandocMonad m => OrgParser m (F Inlines) -superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr) - -math :: PandocMonad m => OrgParser m (F Inlines) -math = return . B.math <$> choice [ math1CharBetween '$' - , mathStringBetween '$' - , rawMathBetween "\\(" "\\)" - ] - -displayMath :: PandocMonad m => OrgParser m (F Inlines) -displayMath = return . B.displayMath <$> choice [ rawMathBetween "\\[" "\\]" - , rawMathBetween "$$" "$$" - ] - -updatePositions :: PandocMonad m - => Char - -> OrgParser m Char -updatePositions c = do - when (c `elem` emphasisPreChars) updateLastPreCharPos - when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos - return c - -symbol :: PandocMonad m => OrgParser m (F Inlines) -symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions) - -emphasisBetween :: PandocMonad m - => Char - -> OrgParser m (F Inlines) -emphasisBetween c = try $ do - startEmphasisNewlinesCounting emphasisAllowedNewlines - res <- enclosedInlines (emphasisStart c) (emphasisEnd c) - isTopLevelEmphasis <- null . orgStateEmphasisCharStack <$> getState - when isTopLevelEmphasis - resetEmphasisNewlines - return res - -verbatimBetween :: PandocMonad m - => Char - -> OrgParser m String -verbatimBetween c = try $ - emphasisStart c *> - many1TillNOrLessNewlines 1 verbatimChar (emphasisEnd c) - where - verbatimChar = noneOf "\n\r" >>= updatePositions - --- | Parses a raw string delimited by @c@ using Org's math rules -mathStringBetween :: PandocMonad m - => Char - -> OrgParser m String -mathStringBetween c = try $ do - mathStart c - body <- many1TillNOrLessNewlines mathAllowedNewlines - (noneOf (c:"\n\r")) - (lookAhead $ mathEnd c) - final <- mathEnd c - return $ body ++ [final] - --- | Parse a single character between @c@ using math rules -math1CharBetween :: PandocMonad m - => Char - -> OrgParser m String -math1CharBetween c = try $ do - char c - res <- noneOf $ c:mathForbiddenBorderChars - char c - eof <|> () <$ lookAhead (oneOf mathPostChars) - return [res] - -rawMathBetween :: PandocMonad m - => String - -> String - -> OrgParser m String -rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e) - --- | Parses the start (opening character) of emphasis -emphasisStart :: PandocMonad m => Char -> OrgParser m Char -emphasisStart c = try $ do - guard =<< afterEmphasisPreChar - guard =<< notAfterString - char c - lookAhead (noneOf emphasisForbiddenBorderChars) - pushToInlineCharStack c - -- nested inlines are allowed, so mark this position as one which might be - -- followed by another inline. - updateLastPreCharPos - return c - --- | Parses the closing character of emphasis -emphasisEnd :: PandocMonad m => Char -> OrgParser m Char -emphasisEnd c = try $ do - guard =<< notAfterForbiddenBorderChar - char c - eof <|> () <$ lookAhead acceptablePostChars - updateLastStrPos - popInlineCharStack - return c - where acceptablePostChars = - surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars) - -mathStart :: PandocMonad m => Char -> OrgParser m Char -mathStart c = try $ - char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars)) - -mathEnd :: PandocMonad m => Char -> OrgParser m Char -mathEnd c = try $ do - res <- noneOf (c:mathForbiddenBorderChars) - char c - eof <|> () <$ lookAhead (oneOf mathPostChars) - return res - - -enclosedInlines :: PandocMonad m => OrgParser m a - -> OrgParser m b - -> OrgParser m (F Inlines) -enclosedInlines start end = try $ - trimInlinesF . mconcat <$> enclosed start end inline - -enclosedRaw :: PandocMonad m => OrgParser m a - -> OrgParser m b - -> OrgParser m String -enclosedRaw start end = try $ - start *> (onSingleLine <|> spanningTwoLines) - where onSingleLine = try $ many1Till (noneOf "\n\r") end - spanningTwoLines = try $ - anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine - --- | Like many1Till, but parses at most @n+1@ lines. @p@ must not consume --- newlines. -many1TillNOrLessNewlines :: PandocMonad m => Int - -> OrgParser m Char - -> OrgParser m a - -> OrgParser m String -many1TillNOrLessNewlines n p end = try $ - nMoreLines (Just n) mempty >>= oneOrMore - where - nMoreLines Nothing cs = return cs - nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine - nMoreLines k cs = try $ (final k cs <|> rest k cs) - >>= uncurry nMoreLines - final _ cs = (\x -> (Nothing, cs ++ x)) <$> try finalLine - rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline) - finalLine = try $ manyTill p end - minus1 k = k - 1 - oneOrMore cs = guard (not $ null cs) *> return cs - --- Org allows customization of the way it reads emphasis. We use the defaults --- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components` --- for details). - --- | Chars allowed to occur before emphasis (spaces and newlines are ok, too) -emphasisPreChars :: [Char] -emphasisPreChars = "\t \"'({" - --- | Chars allowed at after emphasis -emphasisPostChars :: [Char] -emphasisPostChars = "\t\n !\"'),-.:;?\\}" - --- | Chars not allowed at the (inner) border of emphasis -emphasisForbiddenBorderChars :: [Char] -emphasisForbiddenBorderChars = "\t\n\r \"'," - --- | The maximum number of newlines within -emphasisAllowedNewlines :: Int -emphasisAllowedNewlines = 1 - --- LaTeX-style math: see `org-latex-regexps` for details - --- | Chars allowed after an inline ($...$) math statement -mathPostChars :: [Char] -mathPostChars = "\t\n \"'),-.:;?" - --- | Chars not allowed at the (inner) border of math -mathForbiddenBorderChars :: [Char] -mathForbiddenBorderChars = "\t\n\r ,;.$" - --- | Maximum number of newlines in an inline math statement -mathAllowedNewlines :: Int -mathAllowedNewlines = 2 - --- | Whether we are right behind a char allowed before emphasis -afterEmphasisPreChar :: PandocMonad m => OrgParser m Bool -afterEmphasisPreChar = do - pos <- getPosition - lastPrePos <- orgStateLastPreCharPos <$> getState - return . fromMaybe True $ (== pos) <$> lastPrePos - --- | Whether the parser is right after a forbidden border char -notAfterForbiddenBorderChar :: PandocMonad m => OrgParser m Bool -notAfterForbiddenBorderChar = do - pos <- getPosition - lastFBCPos <- orgStateLastForbiddenCharPos <$> getState - return $ lastFBCPos /= Just pos - --- | Read a sub- or superscript expression -subOrSuperExpr :: PandocMonad m => OrgParser m (F Inlines) -subOrSuperExpr = try $ - choice [ id <$> charsInBalanced '{' '}' (noneOf "\n\r") - , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r") - , simpleSubOrSuperString - ] >>= parseFromString (mconcat <$> many inline) - where enclosing (left, right) s = left : s ++ [right] - -simpleSubOrSuperString :: PandocMonad m => OrgParser m String -simpleSubOrSuperString = try $ do - state <- getState - guard . exportSubSuperscripts . orgStateExportSettings $ state - choice [ string "*" - , mappend <$> option [] ((:[]) <$> oneOf "+-") - <*> many1 alphaNum - ] - -inlineLaTeX :: PandocMonad m => OrgParser m (F Inlines) -inlineLaTeX = try $ do - cmd <- inlineLaTeXCommand - ils <- (lift . lift) $ parseAsInlineLaTeX cmd - maybe mzero returnF $ - parseAsMath cmd `mplus` parseAsMathMLSym cmd `mplus` ils - where - parseAsMath :: String -> Maybe Inlines - parseAsMath cs = B.fromList <$> texMathToPandoc cs - - parseAsInlineLaTeX :: PandocMonad m => String -> m (Maybe Inlines) - parseAsInlineLaTeX cs = maybeRight <$> runParserT inlineCommand state "" cs - - parseAsMathMLSym :: String -> Maybe Inlines - parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs) - -- drop initial backslash and any trailing "{}" - where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1 - - state :: ParserState - state = def{ stateOptions = def{ readerExtensions = - enableExtension Ext_raw_tex (readerExtensions def) } } - - texMathToPandoc :: String -> Maybe [Inline] - texMathToPandoc cs = (maybeRight $ readTeX cs) >>= writePandoc DisplayInline - -maybeRight :: Either a b -> Maybe b -maybeRight = either (const Nothing) Just - -inlineLaTeXCommand :: PandocMonad m => OrgParser m String -inlineLaTeXCommand = try $ do - rest <- getInput - parsed <- (lift . lift) $ runParserT rawLaTeXInline def "source" rest - case parsed of - Right (RawInline _ cs) -> do - -- drop any trailing whitespace, those are not be part of the command as - -- far as org mode is concerned. - let cmdNoSpc = dropWhileEnd isSpace cs - let len = length cmdNoSpc - count len anyChar - return cmdNoSpc - _ -> mzero - --- Taken from Data.OldList. -dropWhileEnd :: (a -> Bool) -> [a] -> [a] -dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) [] - -exportSnippet :: PandocMonad m => OrgParser m (F Inlines) -exportSnippet = try $ do - string "@@" - format <- many1Till (alphaNum <|> char '-') (char ':') - snippet <- manyTill anyChar (try $ string "@@") - returnF $ B.rawInline format snippet - -smart :: PandocMonad m => OrgParser m (F Inlines) -smart = do - guardEnabled Ext_smart - doubleQuoted <|> singleQuoted <|> - choice (map (return <$>) [orgApostrophe, orgDash, orgEllipses]) - where - orgDash = do - guard =<< getExportSetting exportSpecialStrings - dash <* updatePositions '-' - orgEllipses = do - guard =<< getExportSetting exportSpecialStrings - ellipses <* updatePositions '.' - orgApostrophe = - (char '\'' <|> char '\8217') <* updateLastPreCharPos - <* updateLastForbiddenCharPos - *> return (B.str "\x2019") - -singleQuoted :: PandocMonad m => OrgParser m (F Inlines) -singleQuoted = try $ do - guard =<< getExportSetting exportSmartQuotes - singleQuoteStart - updatePositions '\'' - withQuoteContext InSingleQuote $ - fmap B.singleQuoted . trimInlinesF . mconcat <$> - many1Till inline (singleQuoteEnd <* updatePositions '\'') - --- doubleQuoted will handle regular double-quoted sections, as well --- as dialogues with an open double-quote without a close double-quote --- in the same paragraph. -doubleQuoted :: PandocMonad m => OrgParser m (F Inlines) -doubleQuoted = try $ do - guard =<< getExportSetting exportSmartQuotes - doubleQuoteStart - updatePositions '"' - contents <- mconcat <$> many (try $ notFollowedBy doubleQuoteEnd >> inline) - (withQuoteContext InDoubleQuote $ (doubleQuoteEnd <* updateLastForbiddenCharPos) >> return - (fmap B.doubleQuoted . trimInlinesF $ contents)) - <|> (return $ return (B.str "\8220") <> contents) |