diff options
author | despresc <christian.j.j.despres@gmail.com> | 2019-11-04 16:12:37 -0500 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2019-11-12 16:03:45 -0800 |
commit | 90e436d49604e3fd1ef9432fb23f6d7f6245c7fd (patch) | |
tree | 4e7f0692f989643189f1fc6786050d95e239a0ea /src/Text/Pandoc/Readers/Org | |
parent | d3966372f5049eea56213b069fc4d70d8af9144c (diff) | |
download | pandoc-90e436d49604e3fd1ef9432fb23f6d7f6245c7fd.tar.gz |
Switch to new pandoc-types and use Text instead of String [API change].
PR #5884.
+ Use pandoc-types 1.20 and texmath 0.12.
+ Text is now used instead of String, with a few exceptions.
+ In the MediaBag module, some of the types using Strings
were switched to use FilePath instead (not Text).
+ In the Parsing module, new parsers `manyChar`, `many1Char`,
`manyTillChar`, `many1TillChar`, `many1Till`, `manyUntil`,
`mantyUntilChar` have been added: these are like their
unsuffixed counterparts but pack some or all of their output.
+ `glob` in Text.Pandoc.Class still takes String since it seems
to be intended as an interface to Glob, which uses strings.
It seems to be used only once in the package, in the EPUB writer,
so that is not hard to change.
Diffstat (limited to 'src/Text/Pandoc/Readers/Org')
-rw-r--r-- | src/Text/Pandoc/Readers/Org/BlockStarts.hs | 25 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Blocks.hs | 219 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/DocumentTree.hs | 44 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/ExportSettings.hs | 40 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Inlines.hs | 154 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Meta.hs | 72 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/ParserState.hs | 27 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Parsing.hs | 27 | ||||
-rw-r--r-- | src/Text/Pandoc/Readers/Org/Shared.hs | 50 |
9 files changed, 338 insertions, 320 deletions
diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs index 58db4f46c..b4f3cc0d8 100644 --- a/src/Text/Pandoc/Readers/Org/BlockStarts.hs +++ b/src/Text/Pandoc/Readers/Org/BlockStarts.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.BlockStarts Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -25,6 +26,8 @@ module Text.Pandoc.Readers.Org.BlockStarts import Prelude import Control.Monad (void) +import Data.Text (Text) +import qualified Data.Text as T import Text.Pandoc.Readers.Org.Parsing -- | Horizontal Line (five -- dashes or more) @@ -49,15 +52,15 @@ gridTableStart :: Monad m => OrgParser m () gridTableStart = try $ skipSpaces <* char '+' <* char '-' -latexEnvStart :: Monad m => OrgParser m String +latexEnvStart :: Monad m => OrgParser m Text latexEnvStart = try $ skipSpaces *> string "\\begin{" *> latexEnvName <* string "}" <* blankline where - latexEnvName :: Monad m => OrgParser m String - latexEnvName = try $ mappend <$> many1 alphaNum <*> option "" (string "*") + latexEnvName :: Monad m => OrgParser m Text + latexEnvName = try $ mappend <$> many1Char alphaNum <*> option "" (textStr "*") bulletListStart :: Monad m => OrgParser m Int bulletListStart = try $ do @@ -68,7 +71,7 @@ bulletListStart = try $ do return (ind + 1) genericListStart :: Monad m - => OrgParser m String + => OrgParser m Text -> OrgParser m Int genericListStart listMarker = try $ do ind <- length <$> many spaceChar @@ -82,11 +85,11 @@ eol = void (char '\n') orderedListStart :: Monad m => OrgParser m Int orderedListStart = genericListStart orderedListMarker -- Ordered list markers allowed in org-mode - where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)") + where orderedListMarker = T.snoc <$> many1Char digit <*> oneOf ".)" -drawerStart :: Monad m => OrgParser m String +drawerStart :: Monad m => OrgParser m Text drawerStart = try $ skipSpaces *> drawerName <* skipSpaces <* newline - where drawerName = char ':' *> manyTill nonspaceChar (char ':') + where drawerName = char ':' *> manyTillChar nonspaceChar (char ':') metaLineStart :: Monad m => OrgParser m () metaLineStart = try $ skipSpaces <* string "#+" @@ -99,12 +102,12 @@ commentLineStart = try $ exampleLineStart :: Monad m => OrgParser m () exampleLineStart = () <$ try (skipSpaces *> string ": ") -noteMarker :: Monad m => OrgParser m String +noteMarker :: Monad m => OrgParser m Text noteMarker = try $ do char '[' - choice [ many1Till digit (char ']') - , (++) <$> string "fn:" - <*> many1Till (noneOf "\n\r\t ") (char ']') + choice [ many1TillChar digit (char ']') + , (<>) <$> textStr "fn:" + <*> many1TillChar (noneOf "\n\r\t ") (char ']') ] -- | Succeeds if the parser is at the end of a block. diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs index cba876f06..de51dec3d 100644 --- a/src/Text/Pandoc/Readers/Org/Blocks.hs +++ b/src/Text/Pandoc/Readers/Org/Blocks.hs @@ -1,6 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.Blocks Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -23,7 +24,7 @@ import Text.Pandoc.Readers.Org.Inlines import Text.Pandoc.Readers.Org.Meta (metaExport, metaKey, metaLine) import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing -import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename, +import Text.Pandoc.Readers.Org.Shared (cleanLinkText, isImageFilename, originalLang, translateLang, exportsCode) import Text.Pandoc.Builder (Blocks, Inlines) @@ -33,11 +34,13 @@ import Text.Pandoc.Options import Text.Pandoc.Shared (compactify, compactifyDL, safeRead) import Control.Monad (foldM, guard, mzero, void) -import Data.Char (isSpace, toLower, toUpper) +import Data.Char (isSpace) import Data.Default (Default) -import Data.List (foldl', isPrefixOf) +import Data.List (foldl') import Data.Maybe (fromMaybe, isJust, isNothing) +import Data.Text (Text) +import qualified Data.Text as T import qualified Text.Pandoc.Builder as B import qualified Text.Pandoc.Walk as Walk @@ -90,10 +93,10 @@ horizontalRule = return B.horizontalRule <$ try hline -- | Attributes that may be added to figures (like a name or caption). data BlockAttributes = BlockAttributes - { blockAttrName :: Maybe String - , blockAttrLabel :: Maybe String + { blockAttrName :: Maybe Text + , blockAttrLabel :: Maybe Text , blockAttrCaption :: Maybe (F Inlines) - , blockAttrKeyValues :: [(String, String)] + , blockAttrKeyValues :: [(Text, Text)] } -- | Convert BlockAttributes into pandoc Attr @@ -103,14 +106,14 @@ attrFromBlockAttributes BlockAttributes{..} = ident = fromMaybe mempty $ lookup "id" blockAttrKeyValues classes = case lookup "class" blockAttrKeyValues of Nothing -> [] - Just clsStr -> words clsStr + Just clsStr -> T.words clsStr kv = filter ((`notElem` ["id", "class"]) . fst) blockAttrKeyValues in (ident, classes, kv) -stringyMetaAttribute :: Monad m => OrgParser m (String, String) +stringyMetaAttribute :: Monad m => OrgParser m (Text, Text) stringyMetaAttribute = try $ do metaLineStart - attrName <- map toUpper <$> many1Till nonspaceChar (char ':') + attrName <- T.toUpper <$> many1TillChar nonspaceChar (char ':') skipSpaces attrValue <- anyLine <|> ("" <$ newline) return (attrName, attrValue) @@ -129,8 +132,8 @@ blockAttributes = try $ do let label = lookup "LABEL" kv caption' <- case caption of Nothing -> return Nothing - Just s -> Just <$> parseFromString inlines (s ++ "\n") - kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs + Just s -> Just <$> parseFromString inlines (s <> "\n") + kvAttrs' <- parseFromString keyValues . (<> "\n") $ fromMaybe mempty kvAttrs return BlockAttributes { blockAttrName = name , blockAttrLabel = label @@ -138,31 +141,31 @@ blockAttributes = try $ do , blockAttrKeyValues = kvAttrs' } where - isBlockAttr :: String -> Bool + isBlockAttr :: Text -> Bool isBlockAttr = flip elem [ "NAME", "LABEL", "CAPTION" , "ATTR_HTML", "ATTR_LATEX" , "RESULTS" ] - appendValues :: String -> Maybe String -> (String, String) -> Maybe String + appendValues :: Text -> Maybe Text -> (Text, Text) -> Maybe Text appendValues attrName accValue (key, value) = if key /= attrName then accValue else case accValue of - Just acc -> Just $ acc ++ ' ':value + Just acc -> Just $ acc <> " " <> value Nothing -> Just value -- | Parse key-value pairs for HTML attributes -keyValues :: Monad m => OrgParser m [(String, String)] +keyValues :: Monad m => OrgParser m [(Text, Text)] keyValues = try $ manyTill ((,) <$> key <*> value) newline where - key :: Monad m => OrgParser m String - key = try $ skipSpaces *> char ':' *> many1 nonspaceChar + key :: Monad m => OrgParser m Text + key = try $ skipSpaces *> char ':' *> many1Char nonspaceChar - value :: Monad m => OrgParser m String - value = skipSpaces *> manyTill anyChar endOfValue + value :: Monad m => OrgParser m Text + value = skipSpaces *> manyTillChar anyChar endOfValue endOfValue :: Monad m => OrgParser m () endOfValue = @@ -180,7 +183,7 @@ orgBlock = try $ do blockAttrs <- blockAttributes blkType <- blockHeaderStart ($ blkType) $ - case map toLower blkType of + case T.toLower blkType of "export" -> exportBlock "comment" -> rawBlockLines (const mempty) "html" -> rawBlockLines (return . B.rawBlock (lowercase blkType)) @@ -194,13 +197,13 @@ orgBlock = try $ do let (ident, classes, kv) = attrFromBlockAttributes blockAttrs in fmap $ B.divWith (ident, classes ++ [blkType], kv) where - blockHeaderStart :: Monad m => OrgParser m String + blockHeaderStart :: Monad m => OrgParser m Text blockHeaderStart = try $ skipSpaces *> stringAnyCase "#+begin_" *> orgArgWord - lowercase :: String -> String - lowercase = map toLower + lowercase :: Text -> Text + lowercase = T.toLower -exampleBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks) +exampleBlock :: PandocMonad m => BlockAttributes -> Text -> OrgParser m (F Blocks) exampleBlock blockAttrs _label = do skipSpaces (classes, kv) <- switchesAsAttributes @@ -210,54 +213,54 @@ exampleBlock blockAttrs _label = do let codeBlck = B.codeBlockWith (id', "example":classes, kv) content return . return $ codeBlck -rawBlockLines :: Monad m => (String -> F Blocks) -> String -> OrgParser m (F Blocks) +rawBlockLines :: Monad m => (Text -> F Blocks) -> Text -> OrgParser m (F Blocks) rawBlockLines f blockType = ignHeaders *> (f <$> rawBlockContent blockType) -parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> String -> OrgParser m (F Blocks) +parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> Text -> OrgParser m (F Blocks) parseBlockLines f blockType = ignHeaders *> (f <$> parsedBlockContent) where parsedBlockContent :: PandocMonad m => OrgParser m (F Blocks) parsedBlockContent = try $ do raw <- rawBlockContent blockType - parseFromString blocks (raw ++ "\n") + parseFromString blocks (raw <> "\n") -- | Read the raw string content of a block -rawBlockContent :: Monad m => String -> OrgParser m String +rawBlockContent :: Monad m => Text -> OrgParser m Text rawBlockContent blockType = try $ do blkLines <- manyTill rawLine blockEnder tabLen <- getOption readerTabStop trimP <- orgStateTrimLeadBlkIndent <$> getState - let stripIndent strs = if trimP then map (drop (shortestIndent strs)) strs else strs - (unlines + let stripIndent strs = if trimP then map (T.drop (shortestIndent strs)) strs else strs + (T.unlines . stripIndent . map (tabsToSpaces tabLen . commaEscaped) $ blkLines) <$ updateState (\s -> s { orgStateTrimLeadBlkIndent = True }) where - rawLine :: Monad m => OrgParser m String + rawLine :: Monad m => OrgParser m Text rawLine = try $ ("" <$ blankline) <|> anyLine blockEnder :: Monad m => OrgParser m () blockEnder = try $ skipSpaces <* stringAnyCase ("#+end_" <> blockType) - shortestIndent :: [String] -> Int - shortestIndent = foldr (min . length . takeWhile isSpace) maxBound - . filter (not . null) - - tabsToSpaces :: Int -> String -> String - tabsToSpaces _ [] = [] - tabsToSpaces tabLen cs'@(c:cs) = - case c of - ' ' -> ' ':tabsToSpaces tabLen cs - '\t' -> replicate tabLen ' ' ++ tabsToSpaces tabLen cs - _ -> cs' - - commaEscaped :: String -> String - commaEscaped (',':cs@('*':_)) = cs - commaEscaped (',':cs@('#':'+':_)) = cs - commaEscaped (' ':cs) = ' ':commaEscaped cs - commaEscaped ('\t':cs) = '\t':commaEscaped cs - commaEscaped cs = cs + shortestIndent :: [Text] -> Int + shortestIndent = foldr (min . T.length . T.takeWhile isSpace) maxBound + . filter (not . T.null) + + tabsToSpaces :: Int -> Text -> Text + tabsToSpaces tabStop t = + let (ind, suff) = T.span (\c -> c == ' ' || c == '\t') t + tabNum = T.length $ T.filter (== '\n') ind + spaceNum = T.length ind - tabNum + in T.replicate (spaceNum + tabStop * tabNum) " " <> suff + + commaEscaped t = + let (ind, suff) = T.span (\c -> c == ' ' || c == '\t') t + in case T.uncons suff of + Just (',', cs) + | "*" <- T.take 1 cs -> ind <> cs + | "#+" <- T.take 2 cs -> ind <> cs + _ -> t -- | Read but ignore all remaining block headers. ignHeaders :: Monad m => OrgParser m () @@ -265,34 +268,34 @@ ignHeaders = (() <$ newline) <|> (() <$ anyLine) -- | Read a block containing code intended for export in specific backends -- only. -exportBlock :: Monad m => String -> OrgParser m (F Blocks) +exportBlock :: Monad m => Text -> OrgParser m (F Blocks) exportBlock blockType = try $ do exportType <- skipSpaces *> orgArgWord <* ignHeaders contents <- rawBlockContent blockType - returnF (B.rawBlock (map toLower exportType) contents) + returnF (B.rawBlock (T.toLower exportType) contents) -verseBlock :: PandocMonad m => String -> OrgParser m (F Blocks) +verseBlock :: PandocMonad m => Text -> OrgParser m (F Blocks) verseBlock blockType = try $ do ignHeaders content <- rawBlockContent blockType fmap B.lineBlock . sequence - <$> mapM parseVerseLine (lines content) + <$> mapM parseVerseLine (T.lines content) where -- replace initial spaces with nonbreaking spaces to preserve -- indentation, parse the rest as normal inline - parseVerseLine :: PandocMonad m => String -> OrgParser m (F Inlines) + parseVerseLine :: PandocMonad m => Text -> OrgParser m (F Inlines) parseVerseLine cs = do - let (initialSpaces, indentedLine) = span isSpace cs - let nbspIndent = if null initialSpaces + let (initialSpaces, indentedLine) = T.span isSpace cs + let nbspIndent = if T.null initialSpaces then mempty - else B.str $ map (const '\160') initialSpaces - line <- parseFromString inlines (indentedLine ++ "\n") + else B.str $ T.map (const '\160') initialSpaces + line <- parseFromString inlines (indentedLine <> "\n") return (trimInlinesF $ pure nbspIndent <> line) -- | Read a code block and the associated results block if present. Which of -- boths blocks is included in the output is determined using the "exports" -- argument in the block header. -codeBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks) +codeBlock :: PandocMonad m => BlockAttributes -> Text -> OrgParser m (F Blocks) codeBlock blockAttrs blockType = do skipSpaces (classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders) @@ -314,7 +317,7 @@ codeBlock blockAttrs blockType = do labelledBlock :: F Inlines -> F Blocks labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], [])) - exportsResults :: [(String, String)] -> Bool + exportsResults :: [(Text, Text)] -> Bool exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports" -- | Parse the result of an evaluated babel code block. @@ -329,7 +332,7 @@ babelResultsBlock = try $ do resultsMarker = try . void $ stringAnyCase "#+RESULTS:" *> blankline -- | Parse code block arguments -codeHeaderArgs :: Monad m => OrgParser m ([String], [(String, String)]) +codeHeaderArgs :: Monad m => OrgParser m ([Text], [(Text, Text)]) codeHeaderArgs = try $ do language <- skipSpaces *> orgArgWord (switchClasses, switchKv) <- switchesAsAttributes @@ -338,14 +341,14 @@ codeHeaderArgs = try $ do , originalLang language <> switchKv <> parameters ) -switchesAsAttributes :: Monad m => OrgParser m ([String], [(String, String)]) +switchesAsAttributes :: Monad m => OrgParser m ([Text], [(Text, Text)]) switchesAsAttributes = try $ do switches <- skipSpaces *> try (switch `sepBy` many1 spaceChar) return $ foldr addToAttr ([], []) switches where - addToAttr :: (Char, Maybe String, SwitchPolarity) - -> ([String], [(String, String)]) - -> ([String], [(String, String)]) + addToAttr :: (Char, Maybe Text, SwitchPolarity) + -> ([Text], [(Text, Text)]) + -> ([Text], [(Text, Text)]) addToAttr ('n', lineNum, pol) (cls, kv) = let kv' = case lineNum of Just num -> ("startFrom", num):kv @@ -365,15 +368,15 @@ switchPolarity :: Monad m => OrgParser m SwitchPolarity switchPolarity = (SwitchMinus <$ char '-') <|> (SwitchPlus <$ char '+') -- | Parses a source block switch option. -switch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity) +switch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity) switch = try $ lineNumberSwitch <|> labelSwitch <|> whitespaceSwitch <|> simpleSwitch where simpleSwitch = (\pol c -> (c, Nothing, pol)) <$> switchPolarity <*> letter labelSwitch = genericSwitch 'l' $ - char '"' *> many1Till nonspaceChar (char '"') + char '"' *> many1TillChar nonspaceChar (char '"') -whitespaceSwitch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity) +whitespaceSwitch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity) whitespaceSwitch = do string "-i" updateState $ \s -> s { orgStateTrimLeadBlkIndent = False } @@ -382,8 +385,8 @@ whitespaceSwitch = do -- | Generic source block switch-option parser. genericSwitch :: Monad m => Char - -> OrgParser m String - -> OrgParser m (Char, Maybe String, SwitchPolarity) + -> OrgParser m Text + -> OrgParser m (Char, Maybe Text, SwitchPolarity) genericSwitch c p = try $ do polarity <- switchPolarity <* char c <* skipSpaces arg <- optionMaybe p @@ -391,17 +394,17 @@ genericSwitch c p = try $ do -- | Reads a line number switch option. The line number switch can be used with -- example and source blocks. -lineNumberSwitch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity) -lineNumberSwitch = genericSwitch 'n' (many digit) +lineNumberSwitch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity) +lineNumberSwitch = genericSwitch 'n' (manyChar digit) -blockOption :: Monad m => OrgParser m (String, String) +blockOption :: Monad m => OrgParser m (Text, Text) blockOption = try $ do argKey <- orgArgKey paramValue <- option "yes" orgParamValue return (argKey, paramValue) -orgParamValue :: Monad m => OrgParser m String -orgParamValue = try $ +orgParamValue :: Monad m => OrgParser m Text +orgParamValue = try $ fmap T.pack $ skipSpaces *> notFollowedBy orgArgKey *> noneOf "\n\r" `many1Till` endOfValue @@ -420,7 +423,7 @@ orgParamValue = try $ -- export setting. genericDrawer :: PandocMonad m => OrgParser m (F Blocks) genericDrawer = try $ do - name <- map toUpper <$> drawerStart + name <- T.toUpper <$> drawerStart content <- manyTill drawerLine (try drawerEnd) state <- getState -- Include drawer if it is explicitly included in or not explicitly excluded @@ -432,16 +435,16 @@ genericDrawer = try $ do Right names | name `notElem` names -> return mempty _ -> drawerDiv name <$> parseLines content where - parseLines :: PandocMonad m => [String] -> OrgParser m (F Blocks) - parseLines = parseFromString blocks . (++ "\n") . unlines + parseLines :: PandocMonad m => [Text] -> OrgParser m (F Blocks) + parseLines = parseFromString blocks . (<> "\n") . T.unlines - drawerDiv :: String -> F Blocks -> F Blocks + drawerDiv :: Text -> F Blocks -> F Blocks drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty) -drawerLine :: Monad m => OrgParser m String +drawerLine :: Monad m => OrgParser m Text drawerLine = anyLine -drawerEnd :: Monad m => OrgParser m String +drawerEnd :: Monad m => OrgParser m Text drawerEnd = try $ skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline @@ -456,17 +459,17 @@ figure :: PandocMonad m => OrgParser m (F Blocks) figure = try $ do figAttrs <- blockAttributes src <- skipSpaces *> selfTarget <* skipSpaces <* endOfParagraph - case cleanLinkString src of + case cleanLinkText src of Nothing -> mzero Just imgSrc -> do guard (isImageFilename imgSrc) let isFigure = isJust $ blockAttrCaption figAttrs return $ imageBlock isFigure figAttrs imgSrc where - selfTarget :: PandocMonad m => OrgParser m String + selfTarget :: PandocMonad m => OrgParser m Text selfTarget = try $ char '[' *> linkTarget <* char ']' - imageBlock :: Bool -> BlockAttributes -> String -> F Blocks + imageBlock :: Bool -> BlockAttributes -> Text -> F Blocks imageBlock isFigure figAttrs imgSrc = let figName = fromMaybe mempty $ blockAttrName figAttrs @@ -478,11 +481,11 @@ figure = try $ do in B.para . B.imageWith attr imgSrc figTitle <$> figCaption - withFigPrefix :: String -> String + withFigPrefix :: Text -> Text withFigPrefix cs = - if "fig:" `isPrefixOf` cs + if "fig:" `T.isPrefixOf` cs then cs - else "fig:" ++ cs + else "fig:" <> cs -- | Succeeds if looking at the end of the current paragraph endOfParagraph :: Monad m => OrgParser m () @@ -495,12 +498,12 @@ endOfParagraph = try $ skipSpaces *> newline *> endOfBlock -- | Example code marked up by a leading colon. example :: Monad m => OrgParser m (F Blocks) -example = try $ returnF . exampleCode =<< unlines <$> many1 exampleLine +example = try $ returnF . exampleCode =<< T.unlines <$> many1 exampleLine where - exampleLine :: Monad m => OrgParser m String + exampleLine :: Monad m => OrgParser m Text exampleLine = try $ exampleLineStart *> anyLine -exampleCode :: String -> Blocks +exampleCode :: Text -> Blocks exampleCode = B.codeBlockWith ("", ["example"], []) @@ -516,7 +519,7 @@ include :: PandocMonad m => OrgParser m (F Blocks) include = try $ do metaLineStart <* stringAnyCase "include:" <* skipSpaces filename <- includeTarget - includeArgs <- many (try $ skipSpaces *> many1 alphaNum) + includeArgs <- many (try $ skipSpaces *> many1Char alphaNum) params <- keyValues blocksParser <- case includeArgs of ("example" : _) -> return $ pure . B.codeBlock <$> parseRaw @@ -535,10 +538,10 @@ include = try $ do char '"' manyTill (noneOf "\n\r\t") (char '"') - parseRaw :: PandocMonad m => OrgParser m String - parseRaw = many anyChar + parseRaw :: PandocMonad m => OrgParser m Text + parseRaw = manyChar anyChar - blockFilter :: [(String, String)] -> [Block] -> [Block] + blockFilter :: [(Text, Text)] -> [Block] -> [Block] blockFilter params blks = let minlvl = lookup "minlevel" params in case (minlvl >>= safeRead :: Maybe Int) of @@ -660,7 +663,7 @@ columnPropertyCell = emptyCell <|> propCell <?> "alignment info" <$> (skipSpaces *> char '<' *> optionMaybe tableAlignFromChar) - <*> (optionMaybe (many1 digit >>= safeRead) + <*> (optionMaybe (many1Char digit >>= safeRead) <* char '>' <* emptyCell) @@ -739,10 +742,10 @@ latexFragment = try $ do , "\\end{", e, "}\n" ] -latexEnd :: Monad m => String -> OrgParser m () +latexEnd :: Monad m => Text -> OrgParser m () latexEnd envName = try $ () <$ skipSpaces - <* string ("\\end{" ++ envName ++ "}") + <* textStr ("\\end{" <> envName <> "}") <* blankline @@ -813,12 +816,12 @@ definitionListItem :: PandocMonad m -> OrgParser m (F (Inlines, [Blocks])) definitionListItem parseIndentedMarker = try $ do markerLength <- parseIndentedMarker - term <- manyTill (noneOf "\n\r") (try definitionMarker) + term <- manyTillChar (noneOf "\n\r") (try definitionMarker) line1 <- anyLineNewline blank <- option "" ("\n" <$ blankline) - cont <- concat <$> many (listContinuation markerLength) + cont <- T.concat <$> many (listContinuation markerLength) term' <- parseFromString inlines term - contents' <- parseFromString blocks $ line1 ++ blank ++ cont + contents' <- parseFromString blocks $ line1 <> blank <> cont return $ (,) <$> term' <*> fmap (:[]) contents' where definitionMarker = @@ -832,16 +835,16 @@ listItem parseIndentedMarker = try . withContext ListItemState $ do markerLength <- try parseIndentedMarker firstLine <- anyLineNewline blank <- option "" ("\n" <$ blankline) - rest <- concat <$> many (listContinuation markerLength) - parseFromString blocks $ firstLine ++ blank ++ rest + rest <- T.concat <$> many (listContinuation markerLength) + parseFromString blocks $ firstLine <> blank <> rest -- continuation of a list item - indented and separated by blankline or endline. -- Note: nested lists are parsed as continuations. -listContinuation :: PandocMonad m => Int -> OrgParser m String +listContinuation :: PandocMonad m => Int -> OrgParser m Text listContinuation markerLength = try $ do notFollowedBy' blankline - mappend <$> (concat <$> many1 (listContinuation' markerLength)) - <*> many blankline + mappend <$> (T.concat <$> many1 (listContinuation' markerLength)) + <*> manyChar blankline where listContinuation' indentation = blockLines indentation <|> listLine indentation @@ -853,6 +856,6 @@ listContinuation markerLength = try $ do >> blockAttributes >>= (\blockAttrs -> case attrFromBlockAttributes blockAttrs of - ("", [], []) -> count 1 anyChar + ("", [], []) -> countChar 1 anyChar _ -> indentWith indentation)) >> (snd <$> withRaw orgBlock) diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs index c96087be7..09a501b68 100644 --- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs +++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs @@ -1,5 +1,6 @@ -{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.DocumentTree Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -17,9 +18,9 @@ module Text.Pandoc.Readers.Org.DocumentTree import Prelude import Control.Arrow ((***)) import Control.Monad (guard, void) -import Data.Char (toLower, toUpper) import Data.List (intersperse) import Data.Maybe (mapMaybe) +import Data.Text (Text) import Text.Pandoc.Builder (Blocks, Inlines) import Text.Pandoc.Class (PandocMonad) import Text.Pandoc.Definition @@ -28,6 +29,7 @@ import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing import qualified Data.Set as Set +import qualified Data.Text as T import qualified Text.Pandoc.Builder as B -- @@ -59,28 +61,28 @@ documentTree blocks inline = do } -- | Create a tag containing the given string. -toTag :: String -> Tag +toTag :: Text -> Tag toTag = Tag -- | The key (also called name or type) of a property. -newtype PropertyKey = PropertyKey { fromKey :: String } +newtype PropertyKey = PropertyKey { fromKey :: Text } deriving (Show, Eq, Ord) -- | Create a property key containing the given string. Org mode keys are -- case insensitive and are hence converted to lower case. -toPropertyKey :: String -> PropertyKey -toPropertyKey = PropertyKey . map toLower +toPropertyKey :: Text -> PropertyKey +toPropertyKey = PropertyKey . T.toLower -- | The value assigned to a property. -newtype PropertyValue = PropertyValue { fromValue :: String } +newtype PropertyValue = PropertyValue { fromValue :: Text } -- | Create a property value containing the given string. -toPropertyValue :: String -> PropertyValue +toPropertyValue :: Text -> PropertyValue toPropertyValue = PropertyValue -- | Check whether the property value is non-nil (i.e. truish). isNonNil :: PropertyValue -> Bool -isNonNil p = map toLower (fromValue p) `notElem` ["()", "{}", "nil"] +isNonNil p = T.toLower (fromValue p) `notElem` ["()", "{}", "nil"] -- | Key/value pairs from a PROPERTIES drawer type Properties = [(PropertyKey, PropertyValue)] @@ -273,7 +275,7 @@ headlineToHeader hdln = do todoKeyword :: Monad m => OrgParser m TodoMarker todoKeyword = try $ do taskStates <- activeTodoMarkers <$> getState - let kwParser tdm = try (tdm <$ string (todoMarkerName tdm) + let kwParser tdm = try (tdm <$ textStr (todoMarkerName tdm) <* spaceChar <* updateLastPreCharPos) choice (map kwParser taskStates) @@ -281,26 +283,26 @@ todoKeyword = try $ do todoKeywordToInlines :: TodoMarker -> Inlines todoKeywordToInlines tdm = let todoText = todoMarkerName tdm - todoState = map toLower . show $ todoMarkerState tdm + todoState = T.toLower . T.pack . show $ todoMarkerState tdm classes = [todoState, todoText] in B.spanWith (mempty, classes, mempty) (B.str todoText) propertiesToAttr :: Properties -> Attr propertiesToAttr properties = let - toStringPair = fromKey *** fromValue + toTextPair = fromKey *** fromValue customIdKey = toPropertyKey "custom_id" classKey = toPropertyKey "class" unnumberedKey = toPropertyKey "unnumbered" specialProperties = [customIdKey, classKey, unnumberedKey] id' = maybe mempty fromValue . lookup customIdKey $ properties cls = maybe mempty fromValue . lookup classKey $ properties - kvs' = map toStringPair . filter ((`notElem` specialProperties) . fst) + kvs' = map toTextPair . filter ((`notElem` specialProperties) . fst) $ properties isUnnumbered = maybe False isNonNil . lookup unnumberedKey $ properties in - (id', words cls ++ ["unnumbered" | isUnnumbered], kvs') + (id', T.words cls ++ ["unnumbered" | isUnnumbered], kvs') tagsToInlines :: [Tag] -> Inlines tagsToInlines [] = mempty @@ -336,15 +338,15 @@ planningToBlock planning = do <> B.emph (B.str time) -- | An Org timestamp, including repetition marks. TODO: improve -type Timestamp = String +type Timestamp = Text timestamp :: Monad m => OrgParser m Timestamp timestamp = try $ do openChar <- oneOf "<[" let isActive = openChar == '<' let closeChar = if isActive then '>' else ']' - content <- many1Till anyChar (char closeChar) - return (openChar : content ++ [closeChar]) + content <- many1TillChar anyChar (char closeChar) + return $ T.cons openChar $ content `T.snoc` closeChar -- | Planning information for a subtree/headline. data PlanningInfo = PlanningInfo @@ -374,7 +376,7 @@ planningInfo = try $ do propertiesDrawer :: Monad m => OrgParser m Properties propertiesDrawer = try $ do drawerType <- drawerStart - guard $ map toUpper drawerType == "PROPERTIES" + guard $ T.toUpper drawerType == "PROPERTIES" manyTill property (try endOfDrawer) where property :: Monad m => OrgParser m (PropertyKey, PropertyValue) @@ -382,12 +384,12 @@ propertiesDrawer = try $ do key :: Monad m => OrgParser m PropertyKey key = fmap toPropertyKey . try $ - skipSpaces *> char ':' *> many1Till nonspaceChar (char ':') + skipSpaces *> char ':' *> many1TillChar nonspaceChar (char ':') value :: Monad m => OrgParser m PropertyValue value = fmap toPropertyValue . try $ - skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline) + skipSpaces *> manyTillChar anyChar (try $ skipSpaces *> newline) - endOfDrawer :: Monad m => OrgParser m String + endOfDrawer :: Monad m => OrgParser m Text endOfDrawer = try $ skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs index f783eaa0f..f1f089273 100644 --- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs +++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs @@ -1,4 +1,5 @@ {-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.ExportSettings Copyright : © 2016–2019 Albert Krewinkel @@ -21,6 +22,7 @@ import Text.Pandoc.Readers.Org.Parsing import Control.Monad (mzero, void) import Data.Char (toLower) import Data.Maybe (listToMaybe) +import Data.Text (Text) -- | Read and handle space separated org-mode export settings. exportSettings :: PandocMonad m => OrgParser m () @@ -70,11 +72,11 @@ exportSetting = choice genericExportSetting :: Monad m => OrgParser m a - -> String + -> Text -> ExportSettingSetter a -> OrgParser m () genericExportSetting optionParser settingIdentifier setter = try $ do - _ <- string settingIdentifier *> char ':' + _ <- textStr settingIdentifier *> char ':' value <- optionParser updateState $ modifyExportSettings value where @@ -82,11 +84,11 @@ genericExportSetting optionParser settingIdentifier setter = try $ do st { orgStateExportSettings = setter val . orgStateExportSettings $ st } -- | A boolean option, either nil (False) or non-nil (True). -booleanSetting :: Monad m => String -> ExportSettingSetter Bool -> OrgParser m () +booleanSetting :: Monad m => Text -> ExportSettingSetter Bool -> OrgParser m () booleanSetting = genericExportSetting elispBoolean -- | An integer-valued option. -integerSetting :: Monad m => String -> ExportSettingSetter Int -> OrgParser m () +integerSetting :: Monad m => Text -> ExportSettingSetter Int -> OrgParser m () integerSetting = genericExportSetting parseInt where parseInt = try $ @@ -95,7 +97,7 @@ integerSetting = genericExportSetting parseInt -- | Either the string "headline" or an elisp boolean and treated as an -- @ArchivedTreesOption@. archivedTreeSetting :: Monad m - => String + => Text -> ExportSettingSetter ArchivedTreesOption -> OrgParser m () archivedTreeSetting = @@ -115,42 +117,42 @@ archivedTreeSetting = -- | A list or a complement list (i.e. a list starting with `not`). complementableListSetting :: Monad m - => String - -> ExportSettingSetter (Either [String] [String]) + => Text + -> ExportSettingSetter (Either [Text] [Text]) -> OrgParser m () complementableListSetting = genericExportSetting $ choice - [ Left <$> complementStringList + [ Left <$> complementTextList , Right <$> stringList , (\b -> if b then Left [] else Right []) <$> elispBoolean ] where -- Read a plain list of strings. - stringList :: Monad m => OrgParser m [String] + stringList :: Monad m => OrgParser m [Text] stringList = try $ char '(' - *> sepBy elispString spaces + *> sepBy elispText spaces <* char ')' -- Read an emacs lisp list specifying a complement set. - complementStringList :: Monad m => OrgParser m [String] - complementStringList = try $ + complementTextList :: Monad m => OrgParser m [Text] + complementTextList = try $ string "(not " - *> sepBy elispString spaces + *> sepBy elispText spaces <* char ')' - elispString :: Monad m => OrgParser m String - elispString = try $ + elispText :: Monad m => OrgParser m Text + elispText = try $ char '"' - *> manyTill alphaNum (char '"') + *> manyTillChar alphaNum (char '"') -- | Read but ignore the export setting. -ignoredSetting :: Monad m => String -> OrgParser m () -ignoredSetting s = try (() <$ string s <* char ':' <* many1 nonspaceChar) +ignoredSetting :: Monad m => Text -> OrgParser m () +ignoredSetting s = try (() <$ textStr s <* char ':' <* many1 nonspaceChar) -- | Read any setting string, but ignore it and emit a warning. ignoreAndWarn :: PandocMonad m => OrgParser m () ignoreAndWarn = try $ do - opt <- many1 nonspaceChar + opt <- many1Char nonspaceChar report (UnknownOrgExportOption opt) return () diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs index cae590c5f..da638f717 100644 --- a/src/Text/Pandoc/Readers/Org/Inlines.hs +++ b/src/Text/Pandoc/Readers/Org/Inlines.hs @@ -20,7 +20,7 @@ import Prelude import Text.Pandoc.Readers.Org.BlockStarts (endOfBlock, noteMarker) import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Readers.Org.Parsing -import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename, +import Text.Pandoc.Readers.Org.Shared (cleanLinkText, isImageFilename, originalLang, translateLang, exportsCode) import Text.Pandoc.Builder (Inlines) @@ -38,12 +38,14 @@ import Control.Monad.Trans (lift) import Data.Char (isAlphaNum, isSpace) import Data.List (intersperse) import qualified Data.Map as M +import Data.Text (Text) +import qualified Data.Text as T import Data.Maybe (fromMaybe) -- -- Functions acting on the parser state -- -recordAnchorId :: PandocMonad m => String -> OrgParser m () +recordAnchorId :: PandocMonad m => Text -> OrgParser m () recordAnchorId i = updateState $ \s -> s{ orgStateAnchorIds = i : orgStateAnchorIds s } @@ -127,7 +129,7 @@ linebreak :: PandocMonad m => OrgParser m (F Inlines) linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline str :: PandocMonad m => OrgParser m (F Inlines) -str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ") +str = return . B.str <$> many1Char (noneOf $ specialChars ++ "\n\r ") <* updateLastStrPos -- | An endline character that can be treated as a space, not a structural @@ -321,7 +323,7 @@ linkLikeOrgRefCite = try $ do -- | Read a citation key. The characters allowed in citation keys are taken -- from the `org-ref-cite-re` variable in `org-ref.el`. -orgRefCiteKey :: PandocMonad m => OrgParser m String +orgRefCiteKey :: PandocMonad m => OrgParser m Text orgRefCiteKey = let citeKeySpecialChars = "-_:\\./," :: String isCiteKeySpecialChar c = c `elem` citeKeySpecialChars @@ -329,7 +331,7 @@ orgRefCiteKey = endOfCitation = try $ do many $ satisfy isCiteKeySpecialChar satisfy $ not . isCiteKeyChar - in try $ satisfy isCiteKeyChar `many1Till` lookAhead endOfCitation + in try $ satisfy isCiteKeyChar `many1TillChar` lookAhead endOfCitation -- | Supported citation types. Only a small subset of org-ref types is @@ -384,11 +386,11 @@ footnote = try $ inlineNote <|> referencedNote inlineNote :: PandocMonad m => OrgParser m (F Inlines) inlineNote = try $ do string "[fn:" - ref <- many alphaNum + ref <- manyChar alphaNum char ':' note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']') - unless (null ref) $ - addToNotesTable ("fn:" ++ ref, note) + unless (T.null ref) $ + addToNotesTable ("fn:" <> ref, note) return $ B.note <$> note referencedNote :: PandocMonad m => OrgParser m (F Inlines) @@ -397,7 +399,7 @@ referencedNote = try $ do return $ do notes <- asksF orgStateNotes' case lookup ref notes of - Nothing -> return . B.str $ "[" ++ ref ++ "]" + Nothing -> return . B.str $ "[" <> ref <> "]" Just contents -> do st <- askF let contents' = runF contents st{ orgStateNotes' = [] } @@ -420,7 +422,7 @@ explicitOrImageLink = try $ do return $ do src <- srcF title <- titleF - case cleanLinkString descr of + case cleanLinkText descr of Just imgSrc | isImageFilename imgSrc -> return . B.link src "" $ B.image imgSrc mempty mempty _ -> @@ -429,10 +431,10 @@ explicitOrImageLink = try $ do selflinkOrImage :: PandocMonad m => OrgParser m (F Inlines) selflinkOrImage = try $ do target <- char '[' *> linkTarget <* char ']' - case cleanLinkString target of - Nothing -> case target of - '#':_ -> returnF $ B.link target "" (B.str target) - _ -> return $ internalLink target (B.str target) + case cleanLinkText target of + Nothing -> case T.uncons target of + Just ('#', _) -> returnF $ B.link target "" (B.str target) + _ -> return $ internalLink target (B.str target) Just nonDocTgt -> if isImageFilename nonDocTgt then returnF $ B.image nonDocTgt "" "" else returnF $ B.link nonDocTgt "" (B.str target) @@ -449,35 +451,35 @@ angleLink = try $ do char '>' return link -linkTarget :: PandocMonad m => OrgParser m String -linkTarget = enclosedByPair1 '[' ']' (noneOf "\n\r[]") +linkTarget :: PandocMonad m => OrgParser m Text +linkTarget = T.pack <$> enclosedByPair1 '[' ']' (noneOf "\n\r[]") -possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m String +possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m Text possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]") -applyCustomLinkFormat :: String -> OrgParser m (F String) +applyCustomLinkFormat :: Text -> OrgParser m (F Text) applyCustomLinkFormat link = do - let (linkType, rest) = break (== ':') link + let (linkType, rest) = T.break (== ':') link return $ do formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters - return $ maybe link ($ drop 1 rest) formatter + return $ maybe link ($ T.drop 1 rest) formatter -- | Take a link and return a function which produces new inlines when given -- description inlines. -linkToInlinesF :: String -> Inlines -> F Inlines +linkToInlinesF :: Text -> Inlines -> F Inlines linkToInlinesF linkStr = - case linkStr of - "" -> pure . B.link mempty "" -- wiki link (empty by convention) - ('#':_) -> pure . B.link linkStr "" -- document-local fraction - _ -> case cleanLinkString linkStr of - Just extTgt -> return . B.link extTgt "" - Nothing -> internalLink linkStr -- other internal link - -internalLink :: String -> Inlines -> F Inlines + case T.uncons linkStr of + Nothing -> pure . B.link mempty "" -- wiki link (empty by convention) + Just ('#', _) -> pure . B.link linkStr "" -- document-local fraction + _ -> case cleanLinkText linkStr of + Just extTgt -> return . B.link extTgt "" + Nothing -> internalLink linkStr -- other internal link + +internalLink :: Text -> Inlines -> F Inlines internalLink link title = do anchorB <- (link `elem`) <$> asksF orgStateAnchorIds if anchorB - then return $ B.link ('#':link) "" title + then return $ B.link ("#" <> link) "" title else return $ B.emph title -- | Parse an anchor like @<<anchor-id>>@ and return an empty span with @@ -493,15 +495,15 @@ anchor = try $ do returnF $ B.spanWith (solidify anchorId, [], []) mempty where parseAnchor = string "<<" - *> many1 (noneOf "\t\n\r<>\"' ") + *> many1Char (noneOf "\t\n\r<>\"' ") <* string ">>" <* skipSpaces -- | Replace every char but [a-zA-Z0-9_.-:] with a hyphen '-'. This mirrors -- the org function @org-export-solidify-link-text@. -solidify :: String -> String -solidify = map replaceSpecialChar +solidify :: Text -> Text +solidify = T.map replaceSpecialChar where replaceSpecialChar c | isAlphaNum c = c | c `elem` ("_.-:" :: String) = c @@ -511,25 +513,25 @@ solidify = map replaceSpecialChar inlineCodeBlock :: PandocMonad m => OrgParser m (F Inlines) inlineCodeBlock = try $ do string "src_" - lang <- many1 orgArgWordChar + lang <- many1Char orgArgWordChar opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption - inlineCode <- enclosedByPair1 '{' '}' (noneOf "\n\r") + inlineCode <- T.pack <$> enclosedByPair1 '{' '}' (noneOf "\n\r") let attrClasses = [translateLang lang] let attrKeyVal = originalLang lang <> opts let codeInlineBlck = B.codeWith ("", attrClasses, attrKeyVal) inlineCode returnF $ if exportsCode opts then codeInlineBlck else mempty where - inlineBlockOption :: PandocMonad m => OrgParser m (String, String) + inlineBlockOption :: PandocMonad m => OrgParser m (Text, Text) inlineBlockOption = try $ do argKey <- orgArgKey paramValue <- option "yes" orgInlineParamValue return (argKey, paramValue) - orgInlineParamValue :: PandocMonad m => OrgParser m String + orgInlineParamValue :: PandocMonad m => OrgParser m Text orgInlineParamValue = try $ skipSpaces *> notFollowedBy (char ':') - *> many1 (noneOf "\t\n\r ]") + *> many1Char (noneOf "\t\n\r ]") <* skipSpaces @@ -584,7 +586,7 @@ superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr) math :: PandocMonad m => OrgParser m (F Inlines) math = return . B.math <$> choice [ math1CharBetween '$' - , mathStringBetween '$' + , mathTextBetween '$' , rawMathBetween "\\(" "\\)" ] @@ -604,7 +606,7 @@ updatePositions c = do return c symbol :: PandocMonad m => OrgParser m (F Inlines) -symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions) +symbol = return . B.str . T.singleton <$> (oneOf specialChars >>= updatePositions) emphasisBetween :: PandocMonad m => Char @@ -619,7 +621,7 @@ emphasisBetween c = try $ do verbatimBetween :: PandocMonad m => Char - -> OrgParser m String + -> OrgParser m Text verbatimBetween c = try $ emphasisStart c *> many1TillNOrLessNewlines 1 verbatimChar (emphasisEnd c) @@ -627,33 +629,33 @@ verbatimBetween c = try $ verbatimChar = noneOf "\n\r" >>= updatePositions -- | Parses a raw string delimited by @c@ using Org's math rules -mathStringBetween :: PandocMonad m +mathTextBetween :: PandocMonad m => Char - -> OrgParser m String -mathStringBetween c = try $ do + -> OrgParser m Text +mathTextBetween c = try $ do mathStart c body <- many1TillNOrLessNewlines mathAllowedNewlines (noneOf (c:"\n\r")) (lookAhead $ mathEnd c) final <- mathEnd c - return $ body ++ [final] + return $ T.snoc body final -- | Parse a single character between @c@ using math rules math1CharBetween :: PandocMonad m => Char - -> OrgParser m String + -> OrgParser m Text math1CharBetween c = try $ do char c res <- noneOf $ c:mathForbiddenBorderChars char c eof <|> () <$ lookAhead (oneOf mathPostChars) - return [res] + return $ T.singleton res rawMathBetween :: PandocMonad m - => String - -> String - -> OrgParser m String -rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e) + => Text + -> Text + -> OrgParser m Text +rawMathBetween s e = try $ textStr s *> manyTillChar anyChar (try $ textStr e) -- | Parses the start (opening character) of emphasis emphasisStart :: PandocMonad m => Char -> OrgParser m Char @@ -702,10 +704,10 @@ enclosedInlines start end = try $ enclosedRaw :: (PandocMonad m, Show b) => OrgParser m a -> OrgParser m b - -> OrgParser m String + -> OrgParser m Text enclosedRaw start end = try $ start *> (onSingleLine <|> spanningTwoLines) - where onSingleLine = try $ many1Till (noneOf "\n\r") end + where onSingleLine = try $ many1TillChar (noneOf "\n\r") end spanningTwoLines = try $ anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine @@ -714,7 +716,7 @@ enclosedRaw start end = try $ many1TillNOrLessNewlines :: PandocMonad m => Int -> OrgParser m Char -> OrgParser m a - -> OrgParser m String + -> OrgParser m Text many1TillNOrLessNewlines n p end = try $ nMoreLines (Just n) mempty >>= oneOrMore where @@ -726,7 +728,7 @@ many1TillNOrLessNewlines n p end = try $ rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline) finalLine = try $ manyTill p end minus1 k = k - 1 - oneOrMore cs = cs <$ guard (not $ null cs) + oneOrMore cs = T.pack cs <$ guard (not $ null cs) -- Org allows customization of the way it reads emphasis. We use the defaults -- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components` @@ -773,17 +775,17 @@ subOrSuperExpr :: PandocMonad m => OrgParser m (F Inlines) subOrSuperExpr = try $ choice [ charsInBalanced '{' '}' (noneOf "\n\r") , enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r") - , simpleSubOrSuperString + , simpleSubOrSuperText ] >>= parseFromString (mconcat <$> many inline) - where enclosing (left, right) s = left : s ++ [right] + where enclosing (left, right) s = T.cons left $ T.snoc s right -simpleSubOrSuperString :: PandocMonad m => OrgParser m String -simpleSubOrSuperString = try $ do +simpleSubOrSuperText :: PandocMonad m => OrgParser m Text +simpleSubOrSuperText = try $ do state <- getState guard . exportSubSuperscripts . orgStateExportSettings $ state - choice [ string "*" - , mappend <$> option [] ((:[]) <$> oneOf "+-") - <*> many1 alphaNum + choice [ textStr "*" + , mappend <$> option "" (T.singleton <$> oneOf "+-") + <*> many1Char alphaNum ] inlineLaTeX :: PandocMonad m => OrgParser m (F Inlines) @@ -793,28 +795,28 @@ inlineLaTeX = try $ do maybe mzero returnF $ parseAsMathMLSym cmd `mplus` parseAsMath cmd `mplus` ils where - parseAsMath :: String -> Maybe Inlines + parseAsMath :: Text -> Maybe Inlines parseAsMath cs = B.fromList <$> texMathToPandoc cs - parseAsInlineLaTeX :: PandocMonad m => String -> m (Maybe Inlines) + parseAsInlineLaTeX :: PandocMonad m => Text -> m (Maybe Inlines) parseAsInlineLaTeX cs = maybeRight <$> runParserT inlineCommand state "" cs - parseAsMathMLSym :: String -> Maybe Inlines + parseAsMathMLSym :: Text -> Maybe Inlines parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs) -- drop initial backslash and any trailing "{}" - where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1 + where clean = T.dropWhileEnd (`elem` ("{}" :: String)) . T.drop 1 state :: ParserState state = def{ stateOptions = def{ readerExtensions = enableExtension Ext_raw_tex (readerExtensions def) } } - texMathToPandoc :: String -> Maybe [Inline] + texMathToPandoc :: Text -> Maybe [Inline] texMathToPandoc cs = maybeRight (readTeX cs) >>= writePandoc DisplayInline maybeRight :: Either a b -> Maybe b maybeRight = either (const Nothing) Just -inlineLaTeXCommand :: PandocMonad m => OrgParser m String +inlineLaTeXCommand :: PandocMonad m => OrgParser m Text inlineLaTeXCommand = try $ do rest <- getInput st <- getState @@ -823,21 +825,17 @@ inlineLaTeXCommand = try $ do Right cs -> do -- drop any trailing whitespace, those are not be part of the command as -- far as org mode is concerned. - let cmdNoSpc = dropWhileEnd isSpace cs - let len = length cmdNoSpc + let cmdNoSpc = T.dropWhileEnd isSpace cs + let len = T.length cmdNoSpc count len anyChar return cmdNoSpc _ -> mzero --- Taken from Data.OldList. -dropWhileEnd :: (a -> Bool) -> [a] -> [a] -dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) [] - exportSnippet :: PandocMonad m => OrgParser m (F Inlines) exportSnippet = try $ do string "@@" - format <- many1Till (alphaNum <|> char '-') (char ':') - snippet <- manyTill anyChar (try $ string "@@") + format <- many1TillChar (alphaNum <|> char '-') (char ':') + snippet <- manyTillChar anyChar (try $ string "@@") returnF $ B.rawInline format snippet macro :: PandocMonad m => OrgParser m (F Inlines) @@ -845,7 +843,7 @@ macro = try $ do recursionDepth <- orgStateMacroDepth <$> getState guard $ recursionDepth < 15 string "{{{" - name <- many alphaNum + name <- manyChar alphaNum args <- ([] <$ string "}}}") <|> char '(' *> argument `sepBy` char ',' <* eoa expander <- lookupMacro name <$> getState @@ -857,7 +855,7 @@ macro = try $ do updateState $ \s -> s { orgStateMacroDepth = recursionDepth } return res where - argument = many $ notFollowedBy eoa *> noneOf "," + argument = manyChar $ notFollowedBy eoa *> noneOf "," eoa = string ")}}}" smart :: PandocMonad m => OrgParser m (F Inlines) diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs index 0a388403e..811a5b974 100644 --- a/src/Text/Pandoc/Readers/Org/Meta.hs +++ b/src/Text/Pandoc/Readers/Org/Meta.hs @@ -1,6 +1,7 @@ {-# LANGUAGE NoImplicitPrelude #-} -{-# LANGUAGE FlexibleContexts #-} -{-# LANGUAGE TupleSections #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.Meta Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -30,11 +31,12 @@ import Text.Pandoc.Definition import Text.Pandoc.Shared (safeRead) import Control.Monad (mzero, void, when) -import Data.Char (toLower) import Data.List (intersperse) import Data.Maybe (fromMaybe) import qualified Data.Map as M import qualified Data.Set as Set +import Data.Text (Text) +import qualified Data.Text as T import Network.HTTP (urlEncode) -- | Returns the current meta, respecting export options. @@ -47,7 +49,7 @@ metaExport = do . (if exportWithEmail settings then id else removeMeta "email") <$> orgStateMeta st -removeMeta :: String -> Meta -> Meta +removeMeta :: Text -> Meta -> Meta removeMeta key meta' = let metaMap = unMeta meta' in Meta $ M.delete key metaMap @@ -60,18 +62,18 @@ metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine) declarationLine :: PandocMonad m => OrgParser m () declarationLine = try $ do - key <- map toLower <$> metaKey + key <- T.toLower <$> metaKey (key', value) <- metaValue key let addMetaValue st = st { orgStateMeta = B.setMeta key' <$> value <*> orgStateMeta st } when (key' /= "results") $ updateState addMetaValue -metaKey :: Monad m => OrgParser m String -metaKey = map toLower <$> many1 (noneOf ": \n\r") - <* char ':' - <* skipSpaces +metaKey :: Monad m => OrgParser m Text +metaKey = T.toLower <$> many1Char (noneOf ": \n\r") + <* char ':' + <* skipSpaces -metaValue :: PandocMonad m => String -> OrgParser m (String, F MetaValue) +metaValue :: PandocMonad m => Text -> OrgParser m (Text, F MetaValue) metaValue key = let inclKey = "header-includes" in case key of @@ -88,7 +90,7 @@ metaValue key = -- Org-mode expects class options to contain the surrounding brackets, -- pandoc does not. "latex_class_options" -> ("classoption",) <$> - metaModifiedString (filter (`notElem` "[]")) + metaModifiedString (T.filter (`notElem` ("[]" :: String))) "html_head" -> (inclKey,) <$> accumulatingList inclKey (metaExportSnippet "html") _ -> (key,) <$> metaString @@ -98,25 +100,25 @@ metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline metaInlinesCommaSeparated :: PandocMonad m => OrgParser m (F MetaValue) metaInlinesCommaSeparated = do - itemStrs <- many1 (noneOf ",\n") `sepBy1` char ',' + itemStrs <- many1Char (noneOf ",\n") `sepBy1` char ',' newline - items <- mapM (parseFromString inlinesTillNewline . (++ "\n")) itemStrs + items <- mapM (parseFromString inlinesTillNewline . (<> "\n")) itemStrs let toMetaInlines = MetaInlines . B.toList return $ MetaList . map toMetaInlines <$> sequence items metaString :: Monad m => OrgParser m (F MetaValue) metaString = metaModifiedString id -metaModifiedString :: Monad m => (String -> String) -> OrgParser m (F MetaValue) +metaModifiedString :: Monad m => (Text -> Text) -> OrgParser m (F MetaValue) metaModifiedString f = return . MetaString . f <$> anyLine -- | Read an format specific meta definition -metaExportSnippet :: Monad m => String -> OrgParser m (F MetaValue) +metaExportSnippet :: Monad m => Text -> OrgParser m (F MetaValue) metaExportSnippet format = return . MetaInlines . B.toList . B.rawInline format <$> anyLine -- | Accumulate the result of the @parser@ in a list under @key@. -accumulatingList :: Monad m => String +accumulatingList :: Monad m => Text -> OrgParser m (F MetaValue) -> OrgParser m (F MetaValue) accumulatingList key p = do @@ -147,33 +149,33 @@ optionLine = try $ do "pandoc-emphasis-post" -> emphChars >>= updateState . setEmphasisPostChar _ -> mzero -addLinkFormat :: Monad m => String - -> (String -> String) +addLinkFormat :: Monad m => Text + -> (Text -> Text) -> OrgParser m () addLinkFormat key formatter = updateState $ \s -> let fs = orgStateLinkFormatters s in s{ orgStateLinkFormatters = M.insert key formatter fs } -parseLinkFormat :: Monad m => OrgParser m (String, String -> String) +parseLinkFormat :: Monad m => OrgParser m (Text, Text -> Text) parseLinkFormat = try $ do - linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces + linkType <- T.cons <$> letter <*> manyChar (alphaNum <|> oneOf "-_") <* skipSpaces linkSubst <- parseFormat return (linkType, linkSubst) -- | An ad-hoc, single-argument-only implementation of a printf-style format -- parser. -parseFormat :: Monad m => OrgParser m (String -> String) +parseFormat :: Monad m => OrgParser m (Text -> Text) parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend where -- inefficient, but who cares - replacePlain = try $ (\x -> concat . flip intersperse x) + replacePlain = try $ (\x -> T.concat . flip intersperse x) <$> sequence [tillSpecifier 's', rest] - replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode) + replaceUrl = try $ (\x -> T.concat . flip intersperse x . T.pack . urlEncode . T.unpack) <$> sequence [tillSpecifier 'h', rest] - justAppend = try $ (++) <$> rest + justAppend = try $ (<>) <$> rest - rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r") - tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:"")) + rest = manyTillChar anyChar (eof <|> () <$ oneOf "\n\r") + tillSpecifier c = manyTillChar (noneOf "\n\r") (try $ string ('%':c:"")) tagList :: Monad m => OrgParser m [Tag] tagList = do @@ -231,41 +233,41 @@ todoSequence = try $ do (x:xs) -> return $ keywordsToSequence (reverse xs) [x] where - todoKeywords :: Monad m => OrgParser m [String] + todoKeywords :: Monad m => OrgParser m [Text] todoKeywords = try $ - let keyword = many1 nonspaceChar <* skipSpaces + let keyword = many1Char nonspaceChar <* skipSpaces endOfKeywords = todoDoneSep <|> void newline in manyTill keyword (lookAhead endOfKeywords) todoDoneSep :: Monad m => OrgParser m () todoDoneSep = void . try $ skipSpaces *> char '|' <* skipSpaces1 - keywordsToSequence :: [String] -> [String] -> TodoSequence + keywordsToSequence :: [Text] -> [Text] -> TodoSequence keywordsToSequence todo done = let todoMarkers = map (TodoMarker Todo) todo doneMarkers = map (TodoMarker Done) done in todoMarkers ++ doneMarkers -macroDefinition :: Monad m => OrgParser m (String, [String] -> String) +macroDefinition :: Monad m => OrgParser m (Text, [Text] -> Text) macroDefinition = try $ do - macroName <- many1 nonspaceChar <* skipSpaces + macroName <- many1Char nonspaceChar <* skipSpaces firstPart <- expansionPart (elemOrder, parts) <- unzip <$> many ((,) <$> placeholder <*> expansionPart) let expander = mconcat . alternate (firstPart:parts) . reorder elemOrder return (macroName, expander) where placeholder :: Monad m => OrgParser m Int - placeholder = try . fmap (fromMaybe 1 . safeRead) $ char '$' *> many1 digit + placeholder = try . fmap (fromMaybe 1 . safeRead) $ char '$' *> many1Char digit - expansionPart :: Monad m => OrgParser m String - expansionPart = try $ many (notFollowedBy placeholder *> noneOf "\n\r") + expansionPart :: Monad m => OrgParser m Text + expansionPart = try $ manyChar (notFollowedBy placeholder *> noneOf "\n\r") alternate :: [a] -> [a] -> [a] alternate [] ys = ys alternate xs [] = xs alternate (x:xs) (y:ys) = x : y : alternate xs ys - reorder :: [Int] -> [String] -> [String] + reorder :: [Int] -> [Text] -> [Text] reorder perm xs = let element n = take 1 $ drop (n - 1) xs in concatMap element perm diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs index d6dde8b22..cf5583b76 100644 --- a/src/Text/Pandoc/Readers/Org/ParserState.hs +++ b/src/Text/Pandoc/Readers/Org/ParserState.hs @@ -1,6 +1,7 @@ -{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} +{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Readers.Org.ParserState Copyright : Copyright (C) 2014-2019 Albert Krewinkel @@ -63,16 +64,16 @@ import Text.Pandoc.Readers.LaTeX.Types (Macro) type F = Future OrgParserState -- | An inline note / footnote containing the note key and its (inline) value. -type OrgNoteRecord = (String, F Blocks) +type OrgNoteRecord = (Text, F Blocks) -- | Table of footnotes type OrgNoteTable = [OrgNoteRecord] -- | Map of functions for link transformations. The map key is refers to the -- link-type, the corresponding function transforms the given link string. -type OrgLinkFormatters = M.Map String (String -> String) +type OrgLinkFormatters = M.Map Text (Text -> Text) -- | Macro expander function -type MacroExpander = [String] -> String +type MacroExpander = [Text] -> Text -- | Tag -newtype Tag = Tag { fromTag :: String } +newtype Tag = Tag { fromTag :: Text } deriving (Show, Eq, Ord) -- | The states in which a todo item can be @@ -82,7 +83,7 @@ data TodoState = Todo | Done -- | A ToDo keyword like @TODO@ or @DONE@. data TodoMarker = TodoMarker { todoMarkerState :: TodoState - , todoMarkerName :: String + , todoMarkerName :: Text } deriving (Show, Eq) @@ -91,7 +92,7 @@ type TodoSequence = [TodoMarker] -- | Org-mode parser state data OrgParserState = OrgParserState - { orgStateAnchorIds :: [String] + { orgStateAnchorIds :: [Text] , orgStateEmphasisCharStack :: [Char] , orgStateEmphasisPreChars :: [Char] -- ^ Chars allowed to occur before -- emphasis; spaces and newlines are @@ -102,13 +103,13 @@ data OrgParserState = OrgParserState , orgStateExcludeTags :: Set.Set Tag , orgStateExcludeTagsChanged :: Bool , orgStateExportSettings :: ExportSettings - , orgStateIdentifiers :: Set.Set String - , orgStateIncludeFiles :: [String] + , orgStateIdentifiers :: Set.Set Text + , orgStateIncludeFiles :: [Text] , orgStateLastForbiddenCharPos :: Maybe SourcePos , orgStateLastPreCharPos :: Maybe SourcePos , orgStateLastStrPos :: Maybe SourcePos , orgStateLinkFormatters :: OrgLinkFormatters - , orgStateMacros :: M.Map String MacroExpander + , orgStateMacros :: M.Map Text MacroExpander , orgStateMacroDepth :: Int , orgStateMeta :: F Meta , orgStateNotes' :: OrgNoteTable @@ -212,10 +213,10 @@ activeTodoSequences st = activeTodoMarkers :: OrgParserState -> TodoSequence activeTodoMarkers = concat . activeTodoSequences -lookupMacro :: String -> OrgParserState -> Maybe MacroExpander +lookupMacro :: Text -> OrgParserState -> Maybe MacroExpander lookupMacro macroName = M.lookup macroName . orgStateMacros -registerMacro :: (String, MacroExpander) -> OrgParserState -> OrgParserState +registerMacro :: (Text, MacroExpander) -> OrgParserState -> OrgParserState registerMacro (name, expander) st = let curMacros = orgStateMacros st in st{ orgStateMacros = M.insert name expander curMacros } @@ -236,7 +237,7 @@ data ArchivedTreesOption = -- These settings can be changed via OPTIONS statements. data ExportSettings = ExportSettings { exportArchivedTrees :: ArchivedTreesOption -- ^ How to treat archived trees - , exportDrawers :: Either [String] [String] + , exportDrawers :: Either [Text] [Text] -- ^ Specify drawer names which should be exported. @Left@ names are -- explicitly excluded from the resulting output while @Right@ means that -- only the listed drawer names should be included. diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs index 24aa0779d..718925120 100644 --- a/src/Text/Pandoc/Readers/Org/Parsing.hs +++ b/src/Text/Pandoc/Readers/Org/Parsing.hs @@ -32,7 +32,13 @@ module Text.Pandoc.Readers.Org.Parsing , orgTagWordChar -- * Re-exports from Text.Pandoc.Parser , ParserContext (..) + , textStr + , countChar + , manyChar + , many1Char + , manyTillChar , many1Till + , many1TillChar , notFollowedBy' , spaceChar , nonspaceChar @@ -98,6 +104,7 @@ module Text.Pandoc.Readers.Org.Parsing ) where import Prelude +import Data.Text (Text) import Text.Pandoc.Readers.Org.ParserState import Text.Pandoc.Parsing hiding (F, anyLine, blanklines, newline, @@ -108,14 +115,14 @@ import Control.Monad (guard) import Control.Monad.Reader (ReaderT) -- | The parser used to read org files. -type OrgParser m = ParserT [Char] OrgParserState (ReaderT OrgParserLocal m) +type OrgParser m = ParserT Text OrgParserState (ReaderT OrgParserLocal m) -- -- Adaptions and specializations of parsing utilities -- -- | Parse any line of text -anyLine :: Monad m => OrgParser m String +anyLine :: Monad m => OrgParser m Text anyLine = P.anyLine <* updateLastPreCharPos @@ -123,7 +130,7 @@ anyLine = -- | Like @'Text.Pandoc.Parsing'@, but resets the position of the last character -- allowed before emphasised text. -parseFromString :: Monad m => OrgParser m a -> String -> OrgParser m a +parseFromString :: Monad m => OrgParser m a -> Text -> OrgParser m a parseFromString parser str' = do updateState $ \s -> s{ orgStateLastPreCharPos = Nothing } result <- P.parseFromString parser str' @@ -142,7 +149,7 @@ newline = <* updateLastForbiddenCharPos -- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes. -blanklines :: Monad m => OrgParser m [Char] +blanklines :: Monad m => OrgParser m Text blanklines = P.blanklines <* updateLastPreCharPos @@ -192,21 +199,21 @@ updateLastPreCharPos = getPosition >>= \p -> -- -- | Read the key of a plist style key-value list. -orgArgKey :: Monad m => OrgParser m String +orgArgKey :: Monad m => OrgParser m Text orgArgKey = try $ skipSpaces *> char ':' - *> many1 orgArgWordChar + *> many1Char orgArgWordChar -- | Read the value of a plist style key-value list. -orgArgWord :: Monad m => OrgParser m String -orgArgWord = many1 orgArgWordChar +orgArgWord :: Monad m => OrgParser m Text +orgArgWord = many1Char orgArgWordChar -- | Chars treated as part of a word in plists. orgArgWordChar :: Monad m => OrgParser m Char orgArgWordChar = alphaNum <|> oneOf "-_" -orgTagWord :: Monad m => OrgParser m String -orgTagWord = many1 orgTagWordChar +orgTagWord :: Monad m => OrgParser m Text +orgTagWord = many1Char orgTagWordChar orgTagWordChar :: Monad m => OrgParser m Char orgTagWordChar = alphaNum <|> oneOf "@%#_" diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs index 34f958373..be0a2068e 100644 --- a/src/Text/Pandoc/Readers/Org/Shared.hs +++ b/src/Text/Pandoc/Readers/Org/Shared.hs @@ -10,7 +10,7 @@ Utility functions used in other Pandoc Org modules. -} module Text.Pandoc.Readers.Org.Shared - ( cleanLinkString + ( cleanLinkText , isImageFilename , originalLang , translateLang @@ -19,44 +19,44 @@ module Text.Pandoc.Readers.Org.Shared import Prelude import Data.Char (isAlphaNum) -import Data.List (isPrefixOf) +import Data.Text (Text) +import qualified Data.Text as T import System.FilePath (isValid, takeExtension) - +import Text.Pandoc.Shared (elemText) -- | Check whether the given string looks like the path to of URL of an image. -isImageFilename :: String -> Bool -isImageFilename fp = hasImageExtension && (isValid fp || isKnownProtocolUri) +isImageFilename :: Text -> Bool +isImageFilename fp = hasImageExtension && (isValid (T.unpack fp) || isKnownProtocolUri) where - hasImageExtension = takeExtension fp `elem` imageExtensions - isKnownProtocolUri = any (\x -> (x ++ "://") `isPrefixOf` fp) protocols + hasImageExtension = takeExtension (T.unpack fp) `elem` imageExtensions + isKnownProtocolUri = any (\x -> (x <> "://") `T.isPrefixOf` fp) protocols imageExtensions = [ ".jpeg", ".jpg", ".png", ".gif", ".svg" ] protocols = [ "file", "http", "https" ] -- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if -- the string does not appear to be a link. -cleanLinkString :: String -> Maybe String -cleanLinkString s = - case s of - '/':_ -> Just $ "file://" ++ s -- absolute path - '.':'/':_ -> Just s -- relative path - '.':'.':'/':_ -> Just s -- relative path - -- Relative path or URL (file schema) - 'f':'i':'l':'e':':':s' -> Just $ if "//" `isPrefixOf` s' then s else s' - _ -> if isUrl s then Just s else Nothing - where - isUrl :: String -> Bool - isUrl cs = - let (scheme, path) = break (== ':') cs - in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme - && not (null path) +cleanLinkText :: Text -> Maybe Text +cleanLinkText s + | Just _ <- T.stripPrefix "/" s = Just $ "file://" <> s -- absolute path + | Just _ <- T.stripPrefix "./" s = Just s -- relative path + | Just _ <- T.stripPrefix "../" s = Just s -- relative path + -- Relative path or URL (file schema) + | Just s' <- T.stripPrefix "file:" s = Just $ if "//" `T.isPrefixOf` s' then s else s' + | otherwise = if isUrl s then Just s else Nothing + where + isUrl :: Text -> Bool + isUrl cs = + let (scheme, path) = T.break (== ':') cs + in T.all (\c -> isAlphaNum c || c `elemText` ".-") scheme + && not (T.null path) -- | Creates an key-value pair marking the original language name specified for -- a piece of source code. -- | Creates an key-value attributes marking the original language name -- specified for a piece of source code. -originalLang :: String -> [(String, String)] +originalLang :: Text -> [(Text, Text)] originalLang lang = let transLang = translateLang lang in if transLang == lang @@ -66,7 +66,7 @@ originalLang lang = -- | Translate from Org-mode's programming language identifiers to those used -- by Pandoc. This is useful to allow for proper syntax highlighting in -- Pandoc output. -translateLang :: String -> String +translateLang :: Text -> Text translateLang cs = case cs of "C" -> "c" @@ -79,5 +79,5 @@ translateLang cs = "sqlite" -> "sql" _ -> cs -exportsCode :: [(String, String)] -> Bool +exportsCode :: [(Text, Text)] -> Bool exportsCode = maybe True (`elem` ["code", "both"]) . lookup "exports" |