diff options
Diffstat (limited to 'src/Text/Pandoc/Readers/Org/Inlines.hs')
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Inlines.hs | 154 |
1 files changed, 76 insertions, 78 deletions
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index cae590c5f..da638f717 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -20,7 +20,7 @@ import Prelude import Text.Pandoc.Readers.Org.BlockStarts (endOfBlock, noteMarker) import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing -import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename, +import Text.Pandoc.Readers.Org.Shared (cleanLinkText, isImageFilename, originalLang, translateLang, exportsCode) import Text.Pandoc.Builder (Inlines) @@ -38,12 +38,14 @@ import Control.Monad.Trans (lift) import Data.Char (isAlphaNum, isSpace) import Data.List (intersperse) import qualified Data.Map as M +import Data.Text (Text) +import qualified Data.Text as T import Data.Maybe (fromMaybe) -- -- Functions acting on the parser state -- -recordAnchorId :: PandocMonad m => String -> OrgParser m () +recordAnchorId :: PandocMonad m => Text -> OrgParser m () recordAnchorId i = updateState $ \s -> s{ orgStateAnchorIds = i : orgStateAnchorIds s } @@ -127,7 +129,7 @@ linebreak :: PandocMonad m => OrgParser m (F Inlines) linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline str :: PandocMonad m => OrgParser m (F Inlines) -str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") +str = return . B.str <$> many1Char (noneOf $ specialChars ++ "\n\r ") <* updateLastStrPos -- | An endline character that can be treated as a space, not a structural @@ -321,7 +323,7 @@ linkLikeOrgRefCite = try $ do -- | Read a citation key. The characters allowed in citation keys are taken -- from the `org-ref-cite-re` variable in `org-ref.el`. -orgRefCiteKey :: PandocMonad m => OrgParser m String +orgRefCiteKey :: PandocMonad m => OrgParser m Text orgRefCiteKey = let citeKeySpecialChars = "-_:\\./," :: String isCiteKeySpecialChar c = c `elem` citeKeySpecialChars @@ -329,7 +331,7 @@ orgRefCiteKey = endOfCitation = try $ do many $ satisfy isCiteKeySpecialChar satisfy $ not . isCiteKeyChar - in try $ satisfy isCiteKeyChar `many1Till` lookAhead endOfCitation + in try $ satisfy isCiteKeyChar `many1TillChar` lookAhead endOfCitation -- | Supported citation types. Only a small subset of org-ref types is @@ -384,11 +386,11 @@ footnote = try $ inlineNote <|> referencedNote inlineNote :: PandocMonad m => OrgParser m (F Inlines) inlineNote = try $ do string "[fn:" - ref <- many alphaNum + ref <- manyChar alphaNum char ':' note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']') - unless (null ref) $ - addToNotesTable ("fn:" ++ ref, note) + unless (T.null ref) $ + addToNotesTable ("fn:" <> ref, note) return $ B.note <$> note referencedNote :: PandocMonad m => OrgParser m (F Inlines) @@ -397,7 +399,7 @@ referencedNote = try $ do return $ do notes <- asksF orgStateNotes' case lookup ref notes of - Nothing -> return . B.str $ "[" ++ ref ++ "]" + Nothing -> return . B.str $ "[" <> ref <> "]" Just contents -> do st <- askF let contents' = runF contents st{ orgStateNotes' = [] } @@ -420,7 +422,7 @@ explicitOrImageLink = try $ do return $ do src <- srcF title <- titleF - case cleanLinkString descr of + case cleanLinkText descr of Just imgSrc | isImageFilename imgSrc -> return . B.link src "" $ B.image imgSrc mempty mempty _ -> @@ -429,10 +431,10 @@ explicitOrImageLink = try $ do selflinkOrImage :: PandocMonad m => OrgParser m (F Inlines) selflinkOrImage = try $ do target <- char '[' *> linkTarget <* char ']' - case cleanLinkString target of - Nothing -> case target of - '#':_ -> returnF $ B.link target "" (B.str target) - _ -> return $ internalLink target (B.str target) + case cleanLinkText target of + Nothing -> case T.uncons target of + Just ('#', _) -> returnF $ B.link target "" (B.str target) + _ -> return $ internalLink target (B.str target) Just nonDocTgt -> if isImageFilename nonDocTgt then returnF $ B.image nonDocTgt "" "" else returnF $ B.link nonDocTgt "" (B.str target) @@ -449,35 +451,35 @@ angleLink = try $ do char '>' return link -linkTarget :: PandocMonad m => OrgParser m String -linkTarget = enclosedByPair1 '[' ']' (noneOf "\n\r[]") +linkTarget :: PandocMonad m => OrgParser m Text +linkTarget = T.pack <$> enclosedByPair1 '[' ']' (noneOf "\n\r[]") -possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m String +possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m Text possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]") -applyCustomLinkFormat :: String -> OrgParser m (F String) +applyCustomLinkFormat :: Text -> OrgParser m (F Text) applyCustomLinkFormat link = do - let (linkType, rest) = break (== ':') link + let (linkType, rest) = T.break (== ':') link return $ do formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters - return $ maybe link ($ drop 1 rest) formatter + return $ maybe link ($ T.drop 1 rest) formatter -- | Take a link and return a function which produces new inlines when given -- description inlines. -linkToInlinesF :: String -> Inlines -> F Inlines +linkToInlinesF :: Text -> Inlines -> F Inlines linkToInlinesF linkStr = - case linkStr of - "" -> pure . B.link mempty "" -- wiki link (empty by convention) - ('#':_) -> pure . B.link linkStr "" -- document-local fraction - _ -> case cleanLinkString linkStr of - Just extTgt -> return . B.link extTgt "" - Nothing -> internalLink linkStr -- other internal link - -internalLink :: String -> Inlines -> F Inlines + case T.uncons linkStr of + Nothing -> pure . B.link mempty "" -- wiki link (empty by convention) + Just ('#', _) -> pure . B.link linkStr "" -- document-local fraction + _ -> case cleanLinkText linkStr of + Just extTgt -> return . B.link extTgt "" + Nothing -> internalLink linkStr -- other internal link + +internalLink :: Text -> Inlines -> F Inlines internalLink link title = do anchorB <- (link `elem`) <$> asksF orgStateAnchorIds if anchorB - then return $ B.link ('#':link) "" title + then return $ B.link ("#" <> link) "" title else return $ B.emph title -- | Parse an anchor like @<<anchor-id>>@ and return an empty span with @@ -493,15 +495,15 @@ anchor = try $ do returnF $ B.spanWith (solidify anchorId, [], []) mempty where parseAnchor = string "<<" - *> many1 (noneOf "\t\n\r<>\"' ") + *> many1Char (noneOf "\t\n\r<>\"' ") <* string ">>" <* skipSpaces -- | Replace every char but [a-zA-Z0-9_.-:] with a hyphen '-'. This mirrors -- the org function @org-export-solidify-link-text@. -solidify :: String -> String -solidify = map replaceSpecialChar +solidify :: Text -> Text +solidify = T.map replaceSpecialChar where replaceSpecialChar c | isAlphaNum c = c | c `elem` ("_.-:" :: String) = c @@ -511,25 +513,25 @@ solidify = map replaceSpecialChar inlineCodeBlock :: PandocMonad m => OrgParser m (F Inlines) inlineCodeBlock = try $ do string "src_" - lang <- many1 orgArgWordChar + lang <- many1Char orgArgWordChar opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption - inlineCode <- enclosedByPair1 '{' '}' (noneOf "\n\r") + inlineCode <- T.pack <$> enclosedByPair1 '{' '}' (noneOf "\n\r") let attrClasses = [translateLang lang] let attrKeyVal = originalLang lang <> opts let codeInlineBlck = B.codeWith ("", attrClasses, attrKeyVal) inlineCode returnF $ if exportsCode opts then codeInlineBlck else mempty where - inlineBlockOption :: PandocMonad m => OrgParser m (String, String) + inlineBlockOption :: PandocMonad m => OrgParser m (Text, Text) inlineBlockOption = try $ do argKey <- orgArgKey paramValue <- option "yes" orgInlineParamValue return (argKey, paramValue) - orgInlineParamValue :: PandocMonad m => OrgParser m String + orgInlineParamValue :: PandocMonad m => OrgParser m Text orgInlineParamValue = try $ skipSpaces *> notFollowedBy (char ':') - *> many1 (noneOf "\t\n\r ]") + *> many1Char (noneOf "\t\n\r ]") <* skipSpaces @@ -584,7 +586,7 @@ superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr) math :: PandocMonad m => OrgParser m (F Inlines) math = return . B.math <$> choice [ math1CharBetween '$' - , mathStringBetween '$' + , mathTextBetween '$' , rawMathBetween "\\(" "\\)" ] @@ -604,7 +606,7 @@ updatePositions c = do return c symbol :: PandocMonad m => OrgParser m (F Inlines) -symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions) +symbol = return . B.str . T.singleton <$> (oneOf specialChars >>= updatePositions) emphasisBetween :: PandocMonad m => Char @@ -619,7 +621,7 @@ emphasisBetween c = try $ do verbatimBetween :: PandocMonad m => Char - -> OrgParser m String + -> OrgParser m Text verbatimBetween c = try $ emphasisStart c *> many1TillNOrLessNewlines 1 verbatimChar (emphasisEnd c) @@ -627,33 +629,33 @@ verbatimBetween c = try $ verbatimChar = noneOf "\n\r" >>= updatePositions -- | Parses a raw string delimited by @c@ using Org's math rules -mathStringBetween :: PandocMonad m +mathTextBetween :: PandocMonad m => Char - -> OrgParser m String -mathStringBetween c = try $ do + -> OrgParser m Text +mathTextBetween c = try $ do mathStart c body <- many1TillNOrLessNewlines mathAllowedNewlines (noneOf (c:"\n\r")) (lookAhead $ mathEnd c) final <- mathEnd c - return $ body ++ [final] + return $ T.snoc body final -- | Parse a single character between @c@ using math rules math1CharBetween :: PandocMonad m => Char - -> OrgParser m String + -> OrgParser m Text math1CharBetween c = try $ do char c res <- noneOf $ c:mathForbiddenBorderChars char c eof <|> () <$ lookAhead (oneOf mathPostChars) - return [res] + return $ T.singleton res rawMathBetween :: PandocMonad m - => String - -> String - -> OrgParser m String -rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e) + => Text + -> Text + -> OrgParser m Text +rawMathBetween s e = try $ textStr s *> manyTillChar anyChar (try $ textStr e) -- | Parses the start (opening character) of emphasis emphasisStart :: PandocMonad m => Char -> OrgParser m Char @@ -702,10 +704,10 @@ enclosedInlines start end = try $ enclosedRaw :: (PandocMonad m, Show b) => OrgParser m a -> OrgParser m b - -> OrgParser m String + -> OrgParser m Text enclosedRaw start end = try $ start *> (onSingleLine <|> spanningTwoLines) - where onSingleLine = try $ many1Till (noneOf "\n\r") end + where onSingleLine = try $ many1TillChar (noneOf "\n\r") end spanningTwoLines = try $ anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine @@ -714,7 +716,7 @@ enclosedRaw start end = try $ many1TillNOrLessNewlines :: PandocMonad m => Int -> OrgParser m Char -> OrgParser m a - -> OrgParser m String + -> OrgParser m Text many1TillNOrLessNewlines n p end = try $ nMoreLines (Just n) mempty >>= oneOrMore where @@ -726,7 +728,7 @@ many1TillNOrLessNewlines n p end = try $ rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline) finalLine = try $ manyTill p end minus1 k = k - 1 - oneOrMore cs = cs <$ guard (not $ null cs) + oneOrMore cs = T.pack cs <$ guard (not $ null cs) -- Org allows customization of the way it reads emphasis. We use the defaults -- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components` @@ -773,17 +775,17 @@ subOrSuperExpr :: PandocMonad m => OrgParser m (F Inlines) subOrSuperExpr = try $ choice [ charsInBalanced '{' '}' (noneOf "\n\r") , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r") - , simpleSubOrSuperString + , simpleSubOrSuperText ] >>= parseFromString (mconcat <$> many inline) - where enclosing (left, right) s = left : s ++ [right] + where enclosing (left, right) s = T.cons left $ T.snoc s right -simpleSubOrSuperString :: PandocMonad m => OrgParser m String -simpleSubOrSuperString = try $ do +simpleSubOrSuperText :: PandocMonad m => OrgParser m Text +simpleSubOrSuperText = try $ do state <- getState guard . exportSubSuperscripts . orgStateExportSettings $ state - choice [ string "*" - , mappend <$> option [] ((:[]) <$> oneOf "+-") - <*> many1 alphaNum + choice [ textStr "*" + , mappend <$> option "" (T.singleton <$> oneOf "+-") + <*> many1Char alphaNum ] inlineLaTeX :: PandocMonad m => OrgParser m (F Inlines) @@ -793,28 +795,28 @@ inlineLaTeX = try $ do maybe mzero returnF $ parseAsMathMLSym cmd `mplus` parseAsMath cmd `mplus` ils where - parseAsMath :: String -> Maybe Inlines + parseAsMath :: Text -> Maybe Inlines parseAsMath cs = B.fromList <$> texMathToPandoc cs - parseAsInlineLaTeX :: PandocMonad m => String -> m (Maybe Inlines) + parseAsInlineLaTeX :: PandocMonad m => Text -> m (Maybe Inlines) parseAsInlineLaTeX cs = maybeRight <$> runParserT inlineCommand state "" cs - parseAsMathMLSym :: String -> Maybe Inlines + parseAsMathMLSym :: Text -> Maybe Inlines parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs) -- drop initial backslash and any trailing "{}" - where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1 + where clean = T.dropWhileEnd (`elem` ("{}" :: String)) . T.drop 1 state :: ParserState state = def{ stateOptions = def{ readerExtensions = enableExtension Ext_raw_tex (readerExtensions def) } } - texMathToPandoc :: String -> Maybe [Inline] + texMathToPandoc :: Text -> Maybe [Inline] texMathToPandoc cs = maybeRight (readTeX cs) >>= writePandoc DisplayInline maybeRight :: Either a b -> Maybe b maybeRight = either (const Nothing) Just -inlineLaTeXCommand :: PandocMonad m => OrgParser m String +inlineLaTeXCommand :: PandocMonad m => OrgParser m Text inlineLaTeXCommand = try $ do rest <- getInput st <- getState @@ -823,21 +825,17 @@ inlineLaTeXCommand = try $ do Right cs -> do -- drop any trailing whitespace, those are not be part of the command as -- far as org mode is concerned. - let cmdNoSpc = dropWhileEnd isSpace cs - let len = length cmdNoSpc + let cmdNoSpc = T.dropWhileEnd isSpace cs + let len = T.length cmdNoSpc count len anyChar return cmdNoSpc _ -> mzero --- Taken from Data.OldList. -dropWhileEnd :: (a -> Bool) -> [a] -> [a] -dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) [] - exportSnippet :: PandocMonad m => OrgParser m (F Inlines) exportSnippet = try $ do string "@@" - format <- many1Till (alphaNum <|> char '-') (char ':') - snippet <- manyTill anyChar (try $ string "@@") + format <- many1TillChar (alphaNum <|> char '-') (char ':') + snippet <- manyTillChar anyChar (try $ string "@@") returnF $ B.rawInline format snippet macro :: PandocMonad m => OrgParser m (F Inlines) @@ -845,7 +843,7 @@ macro = try $ do recursionDepth <- orgStateMacroDepth <$> getState guard $ recursionDepth < 15 string "{{{" - name <- many alphaNum + name <- manyChar alphaNum args <- ([] <$ string "}}}") <|> char '(' *> argument `sepBy` char ',' <* eoa expander <- lookupMacro name <$> getState @@ -857,7 +855,7 @@ macro = try $ do updateState $ \s -> s { orgStateMacroDepth = recursionDepth } return res where - argument = many $ notFollowedBy eoa *> noneOf "," + argument = manyChar $ notFollowedBy eoa *> noneOf "," eoa = string ")}}}" smart :: PandocMonad m => OrgParser m (F Inlines) |