aboutsummaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Readers/Org
diff options
context:
space:
mode:
Diffstat (limited to 'src/Text/Pandoc/Readers/Org')
-rw-r--r--src/Text/Pandoc/Readers/Org/BlockStarts.hs25
-rw-r--r--src/Text/Pandoc/Readers/Org/Blocks.hs219
-rw-r--r--src/Text/Pandoc/Readers/Org/DocumentTree.hs44
-rw-r--r--src/Text/Pandoc/Readers/Org/ExportSettings.hs40
-rw-r--r--src/Text/Pandoc/Readers/Org/Inlines.hs154
-rw-r--r--src/Text/Pandoc/Readers/Org/Meta.hs72
-rw-r--r--src/Text/Pandoc/Readers/Org/ParserState.hs27
-rw-r--r--src/Text/Pandoc/Readers/Org/Parsing.hs27
-rw-r--r--src/Text/Pandoc/Readers/Org/Shared.hs50
9 files changed, 338 insertions, 320 deletions
diff --git a/src/Text/Pandoc/Readers/Org/BlockStarts.hs b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
index 58db4f46c..b4f3cc0d8 100644
--- a/src/Text/Pandoc/Readers/Org/BlockStarts.hs
+++ b/src/Text/Pandoc/Readers/Org/BlockStarts.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.BlockStarts
Copyright : Copyright (C) 2014-2019 Albert Krewinkel
@@ -25,6 +26,8 @@ module Text.Pandoc.Readers.Org.BlockStarts
import Prelude
import Control.Monad (void)
+import Data.Text (Text)
+import qualified Data.Text as T
import Text.Pandoc.Readers.Org.Parsing
-- | Horizontal Line (five -- dashes or more)
@@ -49,15 +52,15 @@ gridTableStart :: Monad m => OrgParser m ()
gridTableStart = try $ skipSpaces <* char '+' <* char '-'
-latexEnvStart :: Monad m => OrgParser m String
+latexEnvStart :: Monad m => OrgParser m Text
latexEnvStart = try $
skipSpaces *> string "\\begin{"
*> latexEnvName
<* string "}"
<* blankline
where
- latexEnvName :: Monad m => OrgParser m String
- latexEnvName = try $ mappend <$> many1 alphaNum <*> option "" (string "*")
+ latexEnvName :: Monad m => OrgParser m Text
+ latexEnvName = try $ mappend <$> many1Char alphaNum <*> option "" (textStr "*")
bulletListStart :: Monad m => OrgParser m Int
bulletListStart = try $ do
@@ -68,7 +71,7 @@ bulletListStart = try $ do
return (ind + 1)
genericListStart :: Monad m
- => OrgParser m String
+ => OrgParser m Text
-> OrgParser m Int
genericListStart listMarker = try $ do
ind <- length <$> many spaceChar
@@ -82,11 +85,11 @@ eol = void (char '\n')
orderedListStart :: Monad m => OrgParser m Int
orderedListStart = genericListStart orderedListMarker
-- Ordered list markers allowed in org-mode
- where orderedListMarker = mappend <$> many1 digit <*> (pure <$> oneOf ".)")
+ where orderedListMarker = T.snoc <$> many1Char digit <*> oneOf ".)"
-drawerStart :: Monad m => OrgParser m String
+drawerStart :: Monad m => OrgParser m Text
drawerStart = try $ skipSpaces *> drawerName <* skipSpaces <* newline
- where drawerName = char ':' *> manyTill nonspaceChar (char ':')
+ where drawerName = char ':' *> manyTillChar nonspaceChar (char ':')
metaLineStart :: Monad m => OrgParser m ()
metaLineStart = try $ skipSpaces <* string "#+"
@@ -99,12 +102,12 @@ commentLineStart = try $
exampleLineStart :: Monad m => OrgParser m ()
exampleLineStart = () <$ try (skipSpaces *> string ": ")
-noteMarker :: Monad m => OrgParser m String
+noteMarker :: Monad m => OrgParser m Text
noteMarker = try $ do
char '['
- choice [ many1Till digit (char ']')
- , (++) <$> string "fn:"
- <*> many1Till (noneOf "\n\r\t ") (char ']')
+ choice [ many1TillChar digit (char ']')
+ , (<>) <$> textStr "fn:"
+ <*> many1TillChar (noneOf "\n\r\t ") (char ']')
]
-- | Succeeds if the parser is at the end of a block.
diff --git a/src/Text/Pandoc/Readers/Org/Blocks.hs b/src/Text/Pandoc/Readers/Org/Blocks.hs
index cba876f06..de51dec3d 100644
--- a/src/Text/Pandoc/Readers/Org/Blocks.hs
+++ b/src/Text/Pandoc/Readers/Org/Blocks.hs
@@ -1,6 +1,7 @@
{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.Blocks
Copyright : Copyright (C) 2014-2019 Albert Krewinkel
@@ -23,7 +24,7 @@ import Text.Pandoc.Readers.Org.Inlines
import Text.Pandoc.Readers.Org.Meta (metaExport, metaKey, metaLine)
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
-import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename,
+import Text.Pandoc.Readers.Org.Shared (cleanLinkText, isImageFilename,
originalLang, translateLang, exportsCode)
import Text.Pandoc.Builder (Blocks, Inlines)
@@ -33,11 +34,13 @@ import Text.Pandoc.Options
import Text.Pandoc.Shared (compactify, compactifyDL, safeRead)
import Control.Monad (foldM, guard, mzero, void)
-import Data.Char (isSpace, toLower, toUpper)
+import Data.Char (isSpace)
import Data.Default (Default)
-import Data.List (foldl', isPrefixOf)
+import Data.List (foldl')
import Data.Maybe (fromMaybe, isJust, isNothing)
+import Data.Text (Text)
+import qualified Data.Text as T
import qualified Text.Pandoc.Builder as B
import qualified Text.Pandoc.Walk as Walk
@@ -90,10 +93,10 @@ horizontalRule = return B.horizontalRule <$ try hline
-- | Attributes that may be added to figures (like a name or caption).
data BlockAttributes = BlockAttributes
- { blockAttrName :: Maybe String
- , blockAttrLabel :: Maybe String
+ { blockAttrName :: Maybe Text
+ , blockAttrLabel :: Maybe Text
, blockAttrCaption :: Maybe (F Inlines)
- , blockAttrKeyValues :: [(String, String)]
+ , blockAttrKeyValues :: [(Text, Text)]
}
-- | Convert BlockAttributes into pandoc Attr
@@ -103,14 +106,14 @@ attrFromBlockAttributes BlockAttributes{..} =
ident = fromMaybe mempty $ lookup "id" blockAttrKeyValues
classes = case lookup "class" blockAttrKeyValues of
Nothing -> []
- Just clsStr -> words clsStr
+ Just clsStr -> T.words clsStr
kv = filter ((`notElem` ["id", "class"]) . fst) blockAttrKeyValues
in (ident, classes, kv)
-stringyMetaAttribute :: Monad m => OrgParser m (String, String)
+stringyMetaAttribute :: Monad m => OrgParser m (Text, Text)
stringyMetaAttribute = try $ do
metaLineStart
- attrName <- map toUpper <$> many1Till nonspaceChar (char ':')
+ attrName <- T.toUpper <$> many1TillChar nonspaceChar (char ':')
skipSpaces
attrValue <- anyLine <|> ("" <$ newline)
return (attrName, attrValue)
@@ -129,8 +132,8 @@ blockAttributes = try $ do
let label = lookup "LABEL" kv
caption' <- case caption of
Nothing -> return Nothing
- Just s -> Just <$> parseFromString inlines (s ++ "\n")
- kvAttrs' <- parseFromString keyValues . (++ "\n") $ fromMaybe mempty kvAttrs
+ Just s -> Just <$> parseFromString inlines (s <> "\n")
+ kvAttrs' <- parseFromString keyValues . (<> "\n") $ fromMaybe mempty kvAttrs
return BlockAttributes
{ blockAttrName = name
, blockAttrLabel = label
@@ -138,31 +141,31 @@ blockAttributes = try $ do
, blockAttrKeyValues = kvAttrs'
}
where
- isBlockAttr :: String -> Bool
+ isBlockAttr :: Text -> Bool
isBlockAttr = flip elem
[ "NAME", "LABEL", "CAPTION"
, "ATTR_HTML", "ATTR_LATEX"
, "RESULTS"
]
- appendValues :: String -> Maybe String -> (String, String) -> Maybe String
+ appendValues :: Text -> Maybe Text -> (Text, Text) -> Maybe Text
appendValues attrName accValue (key, value) =
if key /= attrName
then accValue
else case accValue of
- Just acc -> Just $ acc ++ ' ':value
+ Just acc -> Just $ acc <> " " <> value
Nothing -> Just value
-- | Parse key-value pairs for HTML attributes
-keyValues :: Monad m => OrgParser m [(String, String)]
+keyValues :: Monad m => OrgParser m [(Text, Text)]
keyValues = try $
manyTill ((,) <$> key <*> value) newline
where
- key :: Monad m => OrgParser m String
- key = try $ skipSpaces *> char ':' *> many1 nonspaceChar
+ key :: Monad m => OrgParser m Text
+ key = try $ skipSpaces *> char ':' *> many1Char nonspaceChar
- value :: Monad m => OrgParser m String
- value = skipSpaces *> manyTill anyChar endOfValue
+ value :: Monad m => OrgParser m Text
+ value = skipSpaces *> manyTillChar anyChar endOfValue
endOfValue :: Monad m => OrgParser m ()
endOfValue =
@@ -180,7 +183,7 @@ orgBlock = try $ do
blockAttrs <- blockAttributes
blkType <- blockHeaderStart
($ blkType) $
- case map toLower blkType of
+ case T.toLower blkType of
"export" -> exportBlock
"comment" -> rawBlockLines (const mempty)
"html" -> rawBlockLines (return . B.rawBlock (lowercase blkType))
@@ -194,13 +197,13 @@ orgBlock = try $ do
let (ident, classes, kv) = attrFromBlockAttributes blockAttrs
in fmap $ B.divWith (ident, classes ++ [blkType], kv)
where
- blockHeaderStart :: Monad m => OrgParser m String
+ blockHeaderStart :: Monad m => OrgParser m Text
blockHeaderStart = try $ skipSpaces *> stringAnyCase "#+begin_" *> orgArgWord
- lowercase :: String -> String
- lowercase = map toLower
+ lowercase :: Text -> Text
+ lowercase = T.toLower
-exampleBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks)
+exampleBlock :: PandocMonad m => BlockAttributes -> Text -> OrgParser m (F Blocks)
exampleBlock blockAttrs _label = do
skipSpaces
(classes, kv) <- switchesAsAttributes
@@ -210,54 +213,54 @@ exampleBlock blockAttrs _label = do
let codeBlck = B.codeBlockWith (id', "example":classes, kv) content
return . return $ codeBlck
-rawBlockLines :: Monad m => (String -> F Blocks) -> String -> OrgParser m (F Blocks)
+rawBlockLines :: Monad m => (Text -> F Blocks) -> Text -> OrgParser m (F Blocks)
rawBlockLines f blockType = ignHeaders *> (f <$> rawBlockContent blockType)
-parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> String -> OrgParser m (F Blocks)
+parseBlockLines :: PandocMonad m => (F Blocks -> F Blocks) -> Text -> OrgParser m (F Blocks)
parseBlockLines f blockType = ignHeaders *> (f <$> parsedBlockContent)
where
parsedBlockContent :: PandocMonad m => OrgParser m (F Blocks)
parsedBlockContent = try $ do
raw <- rawBlockContent blockType
- parseFromString blocks (raw ++ "\n")
+ parseFromString blocks (raw <> "\n")
-- | Read the raw string content of a block
-rawBlockContent :: Monad m => String -> OrgParser m String
+rawBlockContent :: Monad m => Text -> OrgParser m Text
rawBlockContent blockType = try $ do
blkLines <- manyTill rawLine blockEnder
tabLen <- getOption readerTabStop
trimP <- orgStateTrimLeadBlkIndent <$> getState
- let stripIndent strs = if trimP then map (drop (shortestIndent strs)) strs else strs
- (unlines
+ let stripIndent strs = if trimP then map (T.drop (shortestIndent strs)) strs else strs
+ (T.unlines
. stripIndent
. map (tabsToSpaces tabLen . commaEscaped)
$ blkLines)
<$ updateState (\s -> s { orgStateTrimLeadBlkIndent = True })
where
- rawLine :: Monad m => OrgParser m String
+ rawLine :: Monad m => OrgParser m Text
rawLine = try $ ("" <$ blankline) <|> anyLine
blockEnder :: Monad m => OrgParser m ()
blockEnder = try $ skipSpaces <* stringAnyCase ("#+end_" <> blockType)
- shortestIndent :: [String] -> Int
- shortestIndent = foldr (min . length . takeWhile isSpace) maxBound
- . filter (not . null)
-
- tabsToSpaces :: Int -> String -> String
- tabsToSpaces _ [] = []
- tabsToSpaces tabLen cs'@(c:cs) =
- case c of
- ' ' -> ' ':tabsToSpaces tabLen cs
- '\t' -> replicate tabLen ' ' ++ tabsToSpaces tabLen cs
- _ -> cs'
-
- commaEscaped :: String -> String
- commaEscaped (',':cs@('*':_)) = cs
- commaEscaped (',':cs@('#':'+':_)) = cs
- commaEscaped (' ':cs) = ' ':commaEscaped cs
- commaEscaped ('\t':cs) = '\t':commaEscaped cs
- commaEscaped cs = cs
+ shortestIndent :: [Text] -> Int
+ shortestIndent = foldr (min . T.length . T.takeWhile isSpace) maxBound
+ . filter (not . T.null)
+
+ tabsToSpaces :: Int -> Text -> Text
+ tabsToSpaces tabStop t =
+ let (ind, suff) = T.span (\c -> c == ' ' || c == '\t') t
+ tabNum = T.length $ T.filter (== '\n') ind
+ spaceNum = T.length ind - tabNum
+ in T.replicate (spaceNum + tabStop * tabNum) " " <> suff
+
+ commaEscaped t =
+ let (ind, suff) = T.span (\c -> c == ' ' || c == '\t') t
+ in case T.uncons suff of
+ Just (',', cs)
+ | "*" <- T.take 1 cs -> ind <> cs
+ | "#+" <- T.take 2 cs -> ind <> cs
+ _ -> t
-- | Read but ignore all remaining block headers.
ignHeaders :: Monad m => OrgParser m ()
@@ -265,34 +268,34 @@ ignHeaders = (() <$ newline) <|> (() <$ anyLine)
-- | Read a block containing code intended for export in specific backends
-- only.
-exportBlock :: Monad m => String -> OrgParser m (F Blocks)
+exportBlock :: Monad m => Text -> OrgParser m (F Blocks)
exportBlock blockType = try $ do
exportType <- skipSpaces *> orgArgWord <* ignHeaders
contents <- rawBlockContent blockType
- returnF (B.rawBlock (map toLower exportType) contents)
+ returnF (B.rawBlock (T.toLower exportType) contents)
-verseBlock :: PandocMonad m => String -> OrgParser m (F Blocks)
+verseBlock :: PandocMonad m => Text -> OrgParser m (F Blocks)
verseBlock blockType = try $ do
ignHeaders
content <- rawBlockContent blockType
fmap B.lineBlock . sequence
- <$> mapM parseVerseLine (lines content)
+ <$> mapM parseVerseLine (T.lines content)
where
-- replace initial spaces with nonbreaking spaces to preserve
-- indentation, parse the rest as normal inline
- parseVerseLine :: PandocMonad m => String -> OrgParser m (F Inlines)
+ parseVerseLine :: PandocMonad m => Text -> OrgParser m (F Inlines)
parseVerseLine cs = do
- let (initialSpaces, indentedLine) = span isSpace cs
- let nbspIndent = if null initialSpaces
+ let (initialSpaces, indentedLine) = T.span isSpace cs
+ let nbspIndent = if T.null initialSpaces
then mempty
- else B.str $ map (const '\160') initialSpaces
- line <- parseFromString inlines (indentedLine ++ "\n")
+ else B.str $ T.map (const '\160') initialSpaces
+ line <- parseFromString inlines (indentedLine <> "\n")
return (trimInlinesF $ pure nbspIndent <> line)
-- | Read a code block and the associated results block if present. Which of
-- boths blocks is included in the output is determined using the "exports"
-- argument in the block header.
-codeBlock :: PandocMonad m => BlockAttributes -> String -> OrgParser m (F Blocks)
+codeBlock :: PandocMonad m => BlockAttributes -> Text -> OrgParser m (F Blocks)
codeBlock blockAttrs blockType = do
skipSpaces
(classes, kv) <- codeHeaderArgs <|> (mempty <$ ignHeaders)
@@ -314,7 +317,7 @@ codeBlock blockAttrs blockType = do
labelledBlock :: F Inlines -> F Blocks
labelledBlock = fmap (B.plain . B.spanWith ("", ["label"], []))
- exportsResults :: [(String, String)] -> Bool
+ exportsResults :: [(Text, Text)] -> Bool
exportsResults = maybe False (`elem` ["results", "both"]) . lookup "exports"
-- | Parse the result of an evaluated babel code block.
@@ -329,7 +332,7 @@ babelResultsBlock = try $ do
resultsMarker = try . void $ stringAnyCase "#+RESULTS:" *> blankline
-- | Parse code block arguments
-codeHeaderArgs :: Monad m => OrgParser m ([String], [(String, String)])
+codeHeaderArgs :: Monad m => OrgParser m ([Text], [(Text, Text)])
codeHeaderArgs = try $ do
language <- skipSpaces *> orgArgWord
(switchClasses, switchKv) <- switchesAsAttributes
@@ -338,14 +341,14 @@ codeHeaderArgs = try $ do
, originalLang language <> switchKv <> parameters
)
-switchesAsAttributes :: Monad m => OrgParser m ([String], [(String, String)])
+switchesAsAttributes :: Monad m => OrgParser m ([Text], [(Text, Text)])
switchesAsAttributes = try $ do
switches <- skipSpaces *> try (switch `sepBy` many1 spaceChar)
return $ foldr addToAttr ([], []) switches
where
- addToAttr :: (Char, Maybe String, SwitchPolarity)
- -> ([String], [(String, String)])
- -> ([String], [(String, String)])
+ addToAttr :: (Char, Maybe Text, SwitchPolarity)
+ -> ([Text], [(Text, Text)])
+ -> ([Text], [(Text, Text)])
addToAttr ('n', lineNum, pol) (cls, kv) =
let kv' = case lineNum of
Just num -> ("startFrom", num):kv
@@ -365,15 +368,15 @@ switchPolarity :: Monad m => OrgParser m SwitchPolarity
switchPolarity = (SwitchMinus <$ char '-') <|> (SwitchPlus <$ char '+')
-- | Parses a source block switch option.
-switch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity)
+switch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity)
switch = try $ lineNumberSwitch <|> labelSwitch
<|> whitespaceSwitch <|> simpleSwitch
where
simpleSwitch = (\pol c -> (c, Nothing, pol)) <$> switchPolarity <*> letter
labelSwitch = genericSwitch 'l' $
- char '"' *> many1Till nonspaceChar (char '"')
+ char '"' *> many1TillChar nonspaceChar (char '"')
-whitespaceSwitch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity)
+whitespaceSwitch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity)
whitespaceSwitch = do
string "-i"
updateState $ \s -> s { orgStateTrimLeadBlkIndent = False }
@@ -382,8 +385,8 @@ whitespaceSwitch = do
-- | Generic source block switch-option parser.
genericSwitch :: Monad m
=> Char
- -> OrgParser m String
- -> OrgParser m (Char, Maybe String, SwitchPolarity)
+ -> OrgParser m Text
+ -> OrgParser m (Char, Maybe Text, SwitchPolarity)
genericSwitch c p = try $ do
polarity <- switchPolarity <* char c <* skipSpaces
arg <- optionMaybe p
@@ -391,17 +394,17 @@ genericSwitch c p = try $ do
-- | Reads a line number switch option. The line number switch can be used with
-- example and source blocks.
-lineNumberSwitch :: Monad m => OrgParser m (Char, Maybe String, SwitchPolarity)
-lineNumberSwitch = genericSwitch 'n' (many digit)
+lineNumberSwitch :: Monad m => OrgParser m (Char, Maybe Text, SwitchPolarity)
+lineNumberSwitch = genericSwitch 'n' (manyChar digit)
-blockOption :: Monad m => OrgParser m (String, String)
+blockOption :: Monad m => OrgParser m (Text, Text)
blockOption = try $ do
argKey <- orgArgKey
paramValue <- option "yes" orgParamValue
return (argKey, paramValue)
-orgParamValue :: Monad m => OrgParser m String
-orgParamValue = try $
+orgParamValue :: Monad m => OrgParser m Text
+orgParamValue = try $ fmap T.pack $
skipSpaces
*> notFollowedBy orgArgKey
*> noneOf "\n\r" `many1Till` endOfValue
@@ -420,7 +423,7 @@ orgParamValue = try $
-- export setting.
genericDrawer :: PandocMonad m => OrgParser m (F Blocks)
genericDrawer = try $ do
- name <- map toUpper <$> drawerStart
+ name <- T.toUpper <$> drawerStart
content <- manyTill drawerLine (try drawerEnd)
state <- getState
-- Include drawer if it is explicitly included in or not explicitly excluded
@@ -432,16 +435,16 @@ genericDrawer = try $ do
Right names | name `notElem` names -> return mempty
_ -> drawerDiv name <$> parseLines content
where
- parseLines :: PandocMonad m => [String] -> OrgParser m (F Blocks)
- parseLines = parseFromString blocks . (++ "\n") . unlines
+ parseLines :: PandocMonad m => [Text] -> OrgParser m (F Blocks)
+ parseLines = parseFromString blocks . (<> "\n") . T.unlines
- drawerDiv :: String -> F Blocks -> F Blocks
+ drawerDiv :: Text -> F Blocks -> F Blocks
drawerDiv drawerName = fmap $ B.divWith (mempty, [drawerName, "drawer"], mempty)
-drawerLine :: Monad m => OrgParser m String
+drawerLine :: Monad m => OrgParser m Text
drawerLine = anyLine
-drawerEnd :: Monad m => OrgParser m String
+drawerEnd :: Monad m => OrgParser m Text
drawerEnd = try $
skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
@@ -456,17 +459,17 @@ figure :: PandocMonad m => OrgParser m (F Blocks)
figure = try $ do
figAttrs <- blockAttributes
src <- skipSpaces *> selfTarget <* skipSpaces <* endOfParagraph
- case cleanLinkString src of
+ case cleanLinkText src of
Nothing -> mzero
Just imgSrc -> do
guard (isImageFilename imgSrc)
let isFigure = isJust $ blockAttrCaption figAttrs
return $ imageBlock isFigure figAttrs imgSrc
where
- selfTarget :: PandocMonad m => OrgParser m String
+ selfTarget :: PandocMonad m => OrgParser m Text
selfTarget = try $ char '[' *> linkTarget <* char ']'
- imageBlock :: Bool -> BlockAttributes -> String -> F Blocks
+ imageBlock :: Bool -> BlockAttributes -> Text -> F Blocks
imageBlock isFigure figAttrs imgSrc =
let
figName = fromMaybe mempty $ blockAttrName figAttrs
@@ -478,11 +481,11 @@ figure = try $ do
in
B.para . B.imageWith attr imgSrc figTitle <$> figCaption
- withFigPrefix :: String -> String
+ withFigPrefix :: Text -> Text
withFigPrefix cs =
- if "fig:" `isPrefixOf` cs
+ if "fig:" `T.isPrefixOf` cs
then cs
- else "fig:" ++ cs
+ else "fig:" <> cs
-- | Succeeds if looking at the end of the current paragraph
endOfParagraph :: Monad m => OrgParser m ()
@@ -495,12 +498,12 @@ endOfParagraph = try $ skipSpaces *> newline *> endOfBlock
-- | Example code marked up by a leading colon.
example :: Monad m => OrgParser m (F Blocks)
-example = try $ returnF . exampleCode =<< unlines <$> many1 exampleLine
+example = try $ returnF . exampleCode =<< T.unlines <$> many1 exampleLine
where
- exampleLine :: Monad m => OrgParser m String
+ exampleLine :: Monad m => OrgParser m Text
exampleLine = try $ exampleLineStart *> anyLine
-exampleCode :: String -> Blocks
+exampleCode :: Text -> Blocks
exampleCode = B.codeBlockWith ("", ["example"], [])
@@ -516,7 +519,7 @@ include :: PandocMonad m => OrgParser m (F Blocks)
include = try $ do
metaLineStart <* stringAnyCase "include:" <* skipSpaces
filename <- includeTarget
- includeArgs <- many (try $ skipSpaces *> many1 alphaNum)
+ includeArgs <- many (try $ skipSpaces *> many1Char alphaNum)
params <- keyValues
blocksParser <- case includeArgs of
("example" : _) -> return $ pure . B.codeBlock <$> parseRaw
@@ -535,10 +538,10 @@ include = try $ do
char '"'
manyTill (noneOf "\n\r\t") (char '"')
- parseRaw :: PandocMonad m => OrgParser m String
- parseRaw = many anyChar
+ parseRaw :: PandocMonad m => OrgParser m Text
+ parseRaw = manyChar anyChar
- blockFilter :: [(String, String)] -> [Block] -> [Block]
+ blockFilter :: [(Text, Text)] -> [Block] -> [Block]
blockFilter params blks =
let minlvl = lookup "minlevel" params
in case (minlvl >>= safeRead :: Maybe Int) of
@@ -660,7 +663,7 @@ columnPropertyCell = emptyCell <|> propCell <?> "alignment info"
<$> (skipSpaces
*> char '<'
*> optionMaybe tableAlignFromChar)
- <*> (optionMaybe (many1 digit >>= safeRead)
+ <*> (optionMaybe (many1Char digit >>= safeRead)
<* char '>'
<* emptyCell)
@@ -739,10 +742,10 @@ latexFragment = try $ do
, "\\end{", e, "}\n"
]
-latexEnd :: Monad m => String -> OrgParser m ()
+latexEnd :: Monad m => Text -> OrgParser m ()
latexEnd envName = try $
() <$ skipSpaces
- <* string ("\\end{" ++ envName ++ "}")
+ <* textStr ("\\end{" <> envName <> "}")
<* blankline
@@ -813,12 +816,12 @@ definitionListItem :: PandocMonad m
-> OrgParser m (F (Inlines, [Blocks]))
definitionListItem parseIndentedMarker = try $ do
markerLength <- parseIndentedMarker
- term <- manyTill (noneOf "\n\r") (try definitionMarker)
+ term <- manyTillChar (noneOf "\n\r") (try definitionMarker)
line1 <- anyLineNewline
blank <- option "" ("\n" <$ blankline)
- cont <- concat <$> many (listContinuation markerLength)
+ cont <- T.concat <$> many (listContinuation markerLength)
term' <- parseFromString inlines term
- contents' <- parseFromString blocks $ line1 ++ blank ++ cont
+ contents' <- parseFromString blocks $ line1 <> blank <> cont
return $ (,) <$> term' <*> fmap (:[]) contents'
where
definitionMarker =
@@ -832,16 +835,16 @@ listItem parseIndentedMarker = try . withContext ListItemState $ do
markerLength <- try parseIndentedMarker
firstLine <- anyLineNewline
blank <- option "" ("\n" <$ blankline)
- rest <- concat <$> many (listContinuation markerLength)
- parseFromString blocks $ firstLine ++ blank ++ rest
+ rest <- T.concat <$> many (listContinuation markerLength)
+ parseFromString blocks $ firstLine <> blank <> rest
-- continuation of a list item - indented and separated by blankline or endline.
-- Note: nested lists are parsed as continuations.
-listContinuation :: PandocMonad m => Int -> OrgParser m String
+listContinuation :: PandocMonad m => Int -> OrgParser m Text
listContinuation markerLength = try $ do
notFollowedBy' blankline
- mappend <$> (concat <$> many1 (listContinuation' markerLength))
- <*> many blankline
+ mappend <$> (T.concat <$> many1 (listContinuation' markerLength))
+ <*> manyChar blankline
where
listContinuation' indentation =
blockLines indentation <|> listLine indentation
@@ -853,6 +856,6 @@ listContinuation markerLength = try $ do
>> blockAttributes
>>= (\blockAttrs ->
case attrFromBlockAttributes blockAttrs of
- ("", [], []) -> count 1 anyChar
+ ("", [], []) -> countChar 1 anyChar
_ -> indentWith indentation))
>> (snd <$> withRaw orgBlock)
diff --git a/src/Text/Pandoc/Readers/Org/DocumentTree.hs b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
index c96087be7..09a501b68 100644
--- a/src/Text/Pandoc/Readers/Org/DocumentTree.hs
+++ b/src/Text/Pandoc/Readers/Org/DocumentTree.hs
@@ -1,5 +1,6 @@
-{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.DocumentTree
Copyright : Copyright (C) 2014-2019 Albert Krewinkel
@@ -17,9 +18,9 @@ module Text.Pandoc.Readers.Org.DocumentTree
import Prelude
import Control.Arrow ((***))
import Control.Monad (guard, void)
-import Data.Char (toLower, toUpper)
import Data.List (intersperse)
import Data.Maybe (mapMaybe)
+import Data.Text (Text)
import Text.Pandoc.Builder (Blocks, Inlines)
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
@@ -28,6 +29,7 @@ import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
import qualified Data.Set as Set
+import qualified Data.Text as T
import qualified Text.Pandoc.Builder as B
--
@@ -59,28 +61,28 @@ documentTree blocks inline = do
}
-- | Create a tag containing the given string.
-toTag :: String -> Tag
+toTag :: Text -> Tag
toTag = Tag
-- | The key (also called name or type) of a property.
-newtype PropertyKey = PropertyKey { fromKey :: String }
+newtype PropertyKey = PropertyKey { fromKey :: Text }
deriving (Show, Eq, Ord)
-- | Create a property key containing the given string. Org mode keys are
-- case insensitive and are hence converted to lower case.
-toPropertyKey :: String -> PropertyKey
-toPropertyKey = PropertyKey . map toLower
+toPropertyKey :: Text -> PropertyKey
+toPropertyKey = PropertyKey . T.toLower
-- | The value assigned to a property.
-newtype PropertyValue = PropertyValue { fromValue :: String }
+newtype PropertyValue = PropertyValue { fromValue :: Text }
-- | Create a property value containing the given string.
-toPropertyValue :: String -> PropertyValue
+toPropertyValue :: Text -> PropertyValue
toPropertyValue = PropertyValue
-- | Check whether the property value is non-nil (i.e. truish).
isNonNil :: PropertyValue -> Bool
-isNonNil p = map toLower (fromValue p) `notElem` ["()", "{}", "nil"]
+isNonNil p = T.toLower (fromValue p) `notElem` ["()", "{}", "nil"]
-- | Key/value pairs from a PROPERTIES drawer
type Properties = [(PropertyKey, PropertyValue)]
@@ -273,7 +275,7 @@ headlineToHeader hdln = do
todoKeyword :: Monad m => OrgParser m TodoMarker
todoKeyword = try $ do
taskStates <- activeTodoMarkers <$> getState
- let kwParser tdm = try (tdm <$ string (todoMarkerName tdm)
+ let kwParser tdm = try (tdm <$ textStr (todoMarkerName tdm)
<* spaceChar
<* updateLastPreCharPos)
choice (map kwParser taskStates)
@@ -281,26 +283,26 @@ todoKeyword = try $ do
todoKeywordToInlines :: TodoMarker -> Inlines
todoKeywordToInlines tdm =
let todoText = todoMarkerName tdm
- todoState = map toLower . show $ todoMarkerState tdm
+ todoState = T.toLower . T.pack . show $ todoMarkerState tdm
classes = [todoState, todoText]
in B.spanWith (mempty, classes, mempty) (B.str todoText)
propertiesToAttr :: Properties -> Attr
propertiesToAttr properties =
let
- toStringPair = fromKey *** fromValue
+ toTextPair = fromKey *** fromValue
customIdKey = toPropertyKey "custom_id"
classKey = toPropertyKey "class"
unnumberedKey = toPropertyKey "unnumbered"
specialProperties = [customIdKey, classKey, unnumberedKey]
id' = maybe mempty fromValue . lookup customIdKey $ properties
cls = maybe mempty fromValue . lookup classKey $ properties
- kvs' = map toStringPair . filter ((`notElem` specialProperties) . fst)
+ kvs' = map toTextPair . filter ((`notElem` specialProperties) . fst)
$ properties
isUnnumbered =
maybe False isNonNil . lookup unnumberedKey $ properties
in
- (id', words cls ++ ["unnumbered" | isUnnumbered], kvs')
+ (id', T.words cls ++ ["unnumbered" | isUnnumbered], kvs')
tagsToInlines :: [Tag] -> Inlines
tagsToInlines [] = mempty
@@ -336,15 +338,15 @@ planningToBlock planning = do
<> B.emph (B.str time)
-- | An Org timestamp, including repetition marks. TODO: improve
-type Timestamp = String
+type Timestamp = Text
timestamp :: Monad m => OrgParser m Timestamp
timestamp = try $ do
openChar <- oneOf "<["
let isActive = openChar == '<'
let closeChar = if isActive then '>' else ']'
- content <- many1Till anyChar (char closeChar)
- return (openChar : content ++ [closeChar])
+ content <- many1TillChar anyChar (char closeChar)
+ return $ T.cons openChar $ content `T.snoc` closeChar
-- | Planning information for a subtree/headline.
data PlanningInfo = PlanningInfo
@@ -374,7 +376,7 @@ planningInfo = try $ do
propertiesDrawer :: Monad m => OrgParser m Properties
propertiesDrawer = try $ do
drawerType <- drawerStart
- guard $ map toUpper drawerType == "PROPERTIES"
+ guard $ T.toUpper drawerType == "PROPERTIES"
manyTill property (try endOfDrawer)
where
property :: Monad m => OrgParser m (PropertyKey, PropertyValue)
@@ -382,12 +384,12 @@ propertiesDrawer = try $ do
key :: Monad m => OrgParser m PropertyKey
key = fmap toPropertyKey . try $
- skipSpaces *> char ':' *> many1Till nonspaceChar (char ':')
+ skipSpaces *> char ':' *> many1TillChar nonspaceChar (char ':')
value :: Monad m => OrgParser m PropertyValue
value = fmap toPropertyValue . try $
- skipSpaces *> manyTill anyChar (try $ skipSpaces *> newline)
+ skipSpaces *> manyTillChar anyChar (try $ skipSpaces *> newline)
- endOfDrawer :: Monad m => OrgParser m String
+ endOfDrawer :: Monad m => OrgParser m Text
endOfDrawer = try $
skipSpaces *> stringAnyCase ":END:" <* skipSpaces <* newline
diff --git a/src/Text/Pandoc/Readers/Org/ExportSettings.hs b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
index f783eaa0f..f1f089273 100644
--- a/src/Text/Pandoc/Readers/Org/ExportSettings.hs
+++ b/src/Text/Pandoc/Readers/Org/ExportSettings.hs
@@ -1,4 +1,5 @@
{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.ExportSettings
Copyright : © 2016–2019 Albert Krewinkel
@@ -21,6 +22,7 @@ import Text.Pandoc.Readers.Org.Parsing
import Control.Monad (mzero, void)
import Data.Char (toLower)
import Data.Maybe (listToMaybe)
+import Data.Text (Text)
-- | Read and handle space separated org-mode export settings.
exportSettings :: PandocMonad m => OrgParser m ()
@@ -70,11 +72,11 @@ exportSetting = choice
genericExportSetting :: Monad m
=> OrgParser m a
- -> String
+ -> Text
-> ExportSettingSetter a
-> OrgParser m ()
genericExportSetting optionParser settingIdentifier setter = try $ do
- _ <- string settingIdentifier *> char ':'
+ _ <- textStr settingIdentifier *> char ':'
value <- optionParser
updateState $ modifyExportSettings value
where
@@ -82,11 +84,11 @@ genericExportSetting optionParser settingIdentifier setter = try $ do
st { orgStateExportSettings = setter val . orgStateExportSettings $ st }
-- | A boolean option, either nil (False) or non-nil (True).
-booleanSetting :: Monad m => String -> ExportSettingSetter Bool -> OrgParser m ()
+booleanSetting :: Monad m => Text -> ExportSettingSetter Bool -> OrgParser m ()
booleanSetting = genericExportSetting elispBoolean
-- | An integer-valued option.
-integerSetting :: Monad m => String -> ExportSettingSetter Int -> OrgParser m ()
+integerSetting :: Monad m => Text -> ExportSettingSetter Int -> OrgParser m ()
integerSetting = genericExportSetting parseInt
where
parseInt = try $
@@ -95,7 +97,7 @@ integerSetting = genericExportSetting parseInt
-- | Either the string "headline" or an elisp boolean and treated as an
-- @ArchivedTreesOption@.
archivedTreeSetting :: Monad m
- => String
+ => Text
-> ExportSettingSetter ArchivedTreesOption
-> OrgParser m ()
archivedTreeSetting =
@@ -115,42 +117,42 @@ archivedTreeSetting =
-- | A list or a complement list (i.e. a list starting with `not`).
complementableListSetting :: Monad m
- => String
- -> ExportSettingSetter (Either [String] [String])
+ => Text
+ -> ExportSettingSetter (Either [Text] [Text])
-> OrgParser m ()
complementableListSetting = genericExportSetting $ choice
- [ Left <$> complementStringList
+ [ Left <$> complementTextList
, Right <$> stringList
, (\b -> if b then Left [] else Right []) <$> elispBoolean
]
where
-- Read a plain list of strings.
- stringList :: Monad m => OrgParser m [String]
+ stringList :: Monad m => OrgParser m [Text]
stringList = try $
char '('
- *> sepBy elispString spaces
+ *> sepBy elispText spaces
<* char ')'
-- Read an emacs lisp list specifying a complement set.
- complementStringList :: Monad m => OrgParser m [String]
- complementStringList = try $
+ complementTextList :: Monad m => OrgParser m [Text]
+ complementTextList = try $
string "(not "
- *> sepBy elispString spaces
+ *> sepBy elispText spaces
<* char ')'
- elispString :: Monad m => OrgParser m String
- elispString = try $
+ elispText :: Monad m => OrgParser m Text
+ elispText = try $
char '"'
- *> manyTill alphaNum (char '"')
+ *> manyTillChar alphaNum (char '"')
-- | Read but ignore the export setting.
-ignoredSetting :: Monad m => String -> OrgParser m ()
-ignoredSetting s = try (() <$ string s <* char ':' <* many1 nonspaceChar)
+ignoredSetting :: Monad m => Text -> OrgParser m ()
+ignoredSetting s = try (() <$ textStr s <* char ':' <* many1 nonspaceChar)
-- | Read any setting string, but ignore it and emit a warning.
ignoreAndWarn :: PandocMonad m => OrgParser m ()
ignoreAndWarn = try $ do
- opt <- many1 nonspaceChar
+ opt <- many1Char nonspaceChar
report (UnknownOrgExportOption opt)
return ()
diff --git a/src/Text/Pandoc/Readers/Org/Inlines.hs b/src/Text/Pandoc/Readers/Org/Inlines.hs
index cae590c5f..da638f717 100644
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@@ -20,7 +20,7 @@ import Prelude
import Text.Pandoc.Readers.Org.BlockStarts (endOfBlock, noteMarker)
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
-import Text.Pandoc.Readers.Org.Shared (cleanLinkString, isImageFilename,
+import Text.Pandoc.Readers.Org.Shared (cleanLinkText, isImageFilename,
originalLang, translateLang, exportsCode)
import Text.Pandoc.Builder (Inlines)
@@ -38,12 +38,14 @@ import Control.Monad.Trans (lift)
import Data.Char (isAlphaNum, isSpace)
import Data.List (intersperse)
import qualified Data.Map as M
+import Data.Text (Text)
+import qualified Data.Text as T
import Data.Maybe (fromMaybe)
--
-- Functions acting on the parser state
--
-recordAnchorId :: PandocMonad m => String -> OrgParser m ()
+recordAnchorId :: PandocMonad m => Text -> OrgParser m ()
recordAnchorId i = updateState $ \s ->
s{ orgStateAnchorIds = i : orgStateAnchorIds s }
@@ -127,7 +129,7 @@ linebreak :: PandocMonad m => OrgParser m (F Inlines)
linebreak = try $ pure B.linebreak <$ string "\\\\" <* skipSpaces <* newline
str :: PandocMonad m => OrgParser m (F Inlines)
-str = return . B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
+str = return . B.str <$> many1Char (noneOf $ specialChars ++ "\n\r ")
<* updateLastStrPos
-- | An endline character that can be treated as a space, not a structural
@@ -321,7 +323,7 @@ linkLikeOrgRefCite = try $ do
-- | Read a citation key. The characters allowed in citation keys are taken
-- from the `org-ref-cite-re` variable in `org-ref.el`.
-orgRefCiteKey :: PandocMonad m => OrgParser m String
+orgRefCiteKey :: PandocMonad m => OrgParser m Text
orgRefCiteKey =
let citeKeySpecialChars = "-_:\\./," :: String
isCiteKeySpecialChar c = c `elem` citeKeySpecialChars
@@ -329,7 +331,7 @@ orgRefCiteKey =
endOfCitation = try $ do
many $ satisfy isCiteKeySpecialChar
satisfy $ not . isCiteKeyChar
- in try $ satisfy isCiteKeyChar `many1Till` lookAhead endOfCitation
+ in try $ satisfy isCiteKeyChar `many1TillChar` lookAhead endOfCitation
-- | Supported citation types. Only a small subset of org-ref types is
@@ -384,11 +386,11 @@ footnote = try $ inlineNote <|> referencedNote
inlineNote :: PandocMonad m => OrgParser m (F Inlines)
inlineNote = try $ do
string "[fn:"
- ref <- many alphaNum
+ ref <- manyChar alphaNum
char ':'
note <- fmap B.para . trimInlinesF . mconcat <$> many1Till inline (char ']')
- unless (null ref) $
- addToNotesTable ("fn:" ++ ref, note)
+ unless (T.null ref) $
+ addToNotesTable ("fn:" <> ref, note)
return $ B.note <$> note
referencedNote :: PandocMonad m => OrgParser m (F Inlines)
@@ -397,7 +399,7 @@ referencedNote = try $ do
return $ do
notes <- asksF orgStateNotes'
case lookup ref notes of
- Nothing -> return . B.str $ "[" ++ ref ++ "]"
+ Nothing -> return . B.str $ "[" <> ref <> "]"
Just contents -> do
st <- askF
let contents' = runF contents st{ orgStateNotes' = [] }
@@ -420,7 +422,7 @@ explicitOrImageLink = try $ do
return $ do
src <- srcF
title <- titleF
- case cleanLinkString descr of
+ case cleanLinkText descr of
Just imgSrc | isImageFilename imgSrc ->
return . B.link src "" $ B.image imgSrc mempty mempty
_ ->
@@ -429,10 +431,10 @@ explicitOrImageLink = try $ do
selflinkOrImage :: PandocMonad m => OrgParser m (F Inlines)
selflinkOrImage = try $ do
target <- char '[' *> linkTarget <* char ']'
- case cleanLinkString target of
- Nothing -> case target of
- '#':_ -> returnF $ B.link target "" (B.str target)
- _ -> return $ internalLink target (B.str target)
+ case cleanLinkText target of
+ Nothing -> case T.uncons target of
+ Just ('#', _) -> returnF $ B.link target "" (B.str target)
+ _ -> return $ internalLink target (B.str target)
Just nonDocTgt -> if isImageFilename nonDocTgt
then returnF $ B.image nonDocTgt "" ""
else returnF $ B.link nonDocTgt "" (B.str target)
@@ -449,35 +451,35 @@ angleLink = try $ do
char '>'
return link
-linkTarget :: PandocMonad m => OrgParser m String
-linkTarget = enclosedByPair1 '[' ']' (noneOf "\n\r[]")
+linkTarget :: PandocMonad m => OrgParser m Text
+linkTarget = T.pack <$> enclosedByPair1 '[' ']' (noneOf "\n\r[]")
-possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m String
+possiblyEmptyLinkTarget :: PandocMonad m => OrgParser m Text
possiblyEmptyLinkTarget = try linkTarget <|> ("" <$ string "[]")
-applyCustomLinkFormat :: String -> OrgParser m (F String)
+applyCustomLinkFormat :: Text -> OrgParser m (F Text)
applyCustomLinkFormat link = do
- let (linkType, rest) = break (== ':') link
+ let (linkType, rest) = T.break (== ':') link
return $ do
formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters
- return $ maybe link ($ drop 1 rest) formatter
+ return $ maybe link ($ T.drop 1 rest) formatter
-- | Take a link and return a function which produces new inlines when given
-- description inlines.
-linkToInlinesF :: String -> Inlines -> F Inlines
+linkToInlinesF :: Text -> Inlines -> F Inlines
linkToInlinesF linkStr =
- case linkStr of
- "" -> pure . B.link mempty "" -- wiki link (empty by convention)
- ('#':_) -> pure . B.link linkStr "" -- document-local fraction
- _ -> case cleanLinkString linkStr of
- Just extTgt -> return . B.link extTgt ""
- Nothing -> internalLink linkStr -- other internal link
-
-internalLink :: String -> Inlines -> F Inlines
+ case T.uncons linkStr of
+ Nothing -> pure . B.link mempty "" -- wiki link (empty by convention)
+ Just ('#', _) -> pure . B.link linkStr "" -- document-local fraction
+ _ -> case cleanLinkText linkStr of
+ Just extTgt -> return . B.link extTgt ""
+ Nothing -> internalLink linkStr -- other internal link
+
+internalLink :: Text -> Inlines -> F Inlines
internalLink link title = do
anchorB <- (link `elem`) <$> asksF orgStateAnchorIds
if anchorB
- then return $ B.link ('#':link) "" title
+ then return $ B.link ("#" <> link) "" title
else return $ B.emph title
-- | Parse an anchor like @<<anchor-id>>@ and return an empty span with
@@ -493,15 +495,15 @@ anchor = try $ do
returnF $ B.spanWith (solidify anchorId, [], []) mempty
where
parseAnchor = string "<<"
- *> many1 (noneOf "\t\n\r<>\"' ")
+ *> many1Char (noneOf "\t\n\r<>\"' ")
<* string ">>"
<* skipSpaces
-- | Replace every char but [a-zA-Z0-9_.-:] with a hyphen '-'. This mirrors
-- the org function @org-export-solidify-link-text@.
-solidify :: String -> String
-solidify = map replaceSpecialChar
+solidify :: Text -> Text
+solidify = T.map replaceSpecialChar
where replaceSpecialChar c
| isAlphaNum c = c
| c `elem` ("_.-:" :: String) = c
@@ -511,25 +513,25 @@ solidify = map replaceSpecialChar
inlineCodeBlock :: PandocMonad m => OrgParser m (F Inlines)
inlineCodeBlock = try $ do
string "src_"
- lang <- many1 orgArgWordChar
+ lang <- many1Char orgArgWordChar
opts <- option [] $ enclosedByPair '[' ']' inlineBlockOption
- inlineCode <- enclosedByPair1 '{' '}' (noneOf "\n\r")
+ inlineCode <- T.pack <$> enclosedByPair1 '{' '}' (noneOf "\n\r")
let attrClasses = [translateLang lang]
let attrKeyVal = originalLang lang <> opts
let codeInlineBlck = B.codeWith ("", attrClasses, attrKeyVal) inlineCode
returnF $ if exportsCode opts then codeInlineBlck else mempty
where
- inlineBlockOption :: PandocMonad m => OrgParser m (String, String)
+ inlineBlockOption :: PandocMonad m => OrgParser m (Text, Text)
inlineBlockOption = try $ do
argKey <- orgArgKey
paramValue <- option "yes" orgInlineParamValue
return (argKey, paramValue)
- orgInlineParamValue :: PandocMonad m => OrgParser m String
+ orgInlineParamValue :: PandocMonad m => OrgParser m Text
orgInlineParamValue = try $
skipSpaces
*> notFollowedBy (char ':')
- *> many1 (noneOf "\t\n\r ]")
+ *> many1Char (noneOf "\t\n\r ]")
<* skipSpaces
@@ -584,7 +586,7 @@ superscript = fmap B.superscript <$> try (char '^' *> subOrSuperExpr)
math :: PandocMonad m => OrgParser m (F Inlines)
math = return . B.math <$> choice [ math1CharBetween '$'
- , mathStringBetween '$'
+ , mathTextBetween '$'
, rawMathBetween "\\(" "\\)"
]
@@ -604,7 +606,7 @@ updatePositions c = do
return c
symbol :: PandocMonad m => OrgParser m (F Inlines)
-symbol = return . B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
+symbol = return . B.str . T.singleton <$> (oneOf specialChars >>= updatePositions)
emphasisBetween :: PandocMonad m
=> Char
@@ -619,7 +621,7 @@ emphasisBetween c = try $ do
verbatimBetween :: PandocMonad m
=> Char
- -> OrgParser m String
+ -> OrgParser m Text
verbatimBetween c = try $
emphasisStart c *>
many1TillNOrLessNewlines 1 verbatimChar (emphasisEnd c)
@@ -627,33 +629,33 @@ verbatimBetween c = try $
verbatimChar = noneOf "\n\r" >>= updatePositions
-- | Parses a raw string delimited by @c@ using Org's math rules
-mathStringBetween :: PandocMonad m
+mathTextBetween :: PandocMonad m
=> Char
- -> OrgParser m String
-mathStringBetween c = try $ do
+ -> OrgParser m Text
+mathTextBetween c = try $ do
mathStart c
body <- many1TillNOrLessNewlines mathAllowedNewlines
(noneOf (c:"\n\r"))
(lookAhead $ mathEnd c)
final <- mathEnd c
- return $ body ++ [final]
+ return $ T.snoc body final
-- | Parse a single character between @c@ using math rules
math1CharBetween :: PandocMonad m
=> Char
- -> OrgParser m String
+ -> OrgParser m Text
math1CharBetween c = try $ do
char c
res <- noneOf $ c:mathForbiddenBorderChars
char c
eof <|> () <$ lookAhead (oneOf mathPostChars)
- return [res]
+ return $ T.singleton res
rawMathBetween :: PandocMonad m
- => String
- -> String
- -> OrgParser m String
-rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e)
+ => Text
+ -> Text
+ -> OrgParser m Text
+rawMathBetween s e = try $ textStr s *> manyTillChar anyChar (try $ textStr e)
-- | Parses the start (opening character) of emphasis
emphasisStart :: PandocMonad m => Char -> OrgParser m Char
@@ -702,10 +704,10 @@ enclosedInlines start end = try $
enclosedRaw :: (PandocMonad m, Show b) => OrgParser m a
-> OrgParser m b
- -> OrgParser m String
+ -> OrgParser m Text
enclosedRaw start end = try $
start *> (onSingleLine <|> spanningTwoLines)
- where onSingleLine = try $ many1Till (noneOf "\n\r") end
+ where onSingleLine = try $ many1TillChar (noneOf "\n\r") end
spanningTwoLines = try $
anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine
@@ -714,7 +716,7 @@ enclosedRaw start end = try $
many1TillNOrLessNewlines :: PandocMonad m => Int
-> OrgParser m Char
-> OrgParser m a
- -> OrgParser m String
+ -> OrgParser m Text
many1TillNOrLessNewlines n p end = try $
nMoreLines (Just n) mempty >>= oneOrMore
where
@@ -726,7 +728,7 @@ many1TillNOrLessNewlines n p end = try $
rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p newline)
finalLine = try $ manyTill p end
minus1 k = k - 1
- oneOrMore cs = cs <$ guard (not $ null cs)
+ oneOrMore cs = T.pack cs <$ guard (not $ null cs)
-- Org allows customization of the way it reads emphasis. We use the defaults
-- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components`
@@ -773,17 +775,17 @@ subOrSuperExpr :: PandocMonad m => OrgParser m (F Inlines)
subOrSuperExpr = try $
choice [ charsInBalanced '{' '}' (noneOf "\n\r")
, enclosing ('(', ')') <$> charsInBalanced '(' ')' (noneOf "\n\r")
- , simpleSubOrSuperString
+ , simpleSubOrSuperText
] >>= parseFromString (mconcat <$> many inline)
- where enclosing (left, right) s = left : s ++ [right]
+ where enclosing (left, right) s = T.cons left $ T.snoc s right
-simpleSubOrSuperString :: PandocMonad m => OrgParser m String
-simpleSubOrSuperString = try $ do
+simpleSubOrSuperText :: PandocMonad m => OrgParser m Text
+simpleSubOrSuperText = try $ do
state <- getState
guard . exportSubSuperscripts . orgStateExportSettings $ state
- choice [ string "*"
- , mappend <$> option [] ((:[]) <$> oneOf "+-")
- <*> many1 alphaNum
+ choice [ textStr "*"
+ , mappend <$> option "" (T.singleton <$> oneOf "+-")
+ <*> many1Char alphaNum
]
inlineLaTeX :: PandocMonad m => OrgParser m (F Inlines)
@@ -793,28 +795,28 @@ inlineLaTeX = try $ do
maybe mzero returnF $
parseAsMathMLSym cmd `mplus` parseAsMath cmd `mplus` ils
where
- parseAsMath :: String -> Maybe Inlines
+ parseAsMath :: Text -> Maybe Inlines
parseAsMath cs = B.fromList <$> texMathToPandoc cs
- parseAsInlineLaTeX :: PandocMonad m => String -> m (Maybe Inlines)
+ parseAsInlineLaTeX :: PandocMonad m => Text -> m (Maybe Inlines)
parseAsInlineLaTeX cs = maybeRight <$> runParserT inlineCommand state "" cs
- parseAsMathMLSym :: String -> Maybe Inlines
+ parseAsMathMLSym :: Text -> Maybe Inlines
parseAsMathMLSym cs = B.str <$> MathMLEntityMap.getUnicode (clean cs)
-- drop initial backslash and any trailing "{}"
- where clean = dropWhileEnd (`elem` ("{}" :: String)) . drop 1
+ where clean = T.dropWhileEnd (`elem` ("{}" :: String)) . T.drop 1
state :: ParserState
state = def{ stateOptions = def{ readerExtensions =
enableExtension Ext_raw_tex (readerExtensions def) } }
- texMathToPandoc :: String -> Maybe [Inline]
+ texMathToPandoc :: Text -> Maybe [Inline]
texMathToPandoc cs = maybeRight (readTeX cs) >>= writePandoc DisplayInline
maybeRight :: Either a b -> Maybe b
maybeRight = either (const Nothing) Just
-inlineLaTeXCommand :: PandocMonad m => OrgParser m String
+inlineLaTeXCommand :: PandocMonad m => OrgParser m Text
inlineLaTeXCommand = try $ do
rest <- getInput
st <- getState
@@ -823,21 +825,17 @@ inlineLaTeXCommand = try $ do
Right cs -> do
-- drop any trailing whitespace, those are not be part of the command as
-- far as org mode is concerned.
- let cmdNoSpc = dropWhileEnd isSpace cs
- let len = length cmdNoSpc
+ let cmdNoSpc = T.dropWhileEnd isSpace cs
+ let len = T.length cmdNoSpc
count len anyChar
return cmdNoSpc
_ -> mzero
--- Taken from Data.OldList.
-dropWhileEnd :: (a -> Bool) -> [a] -> [a]
-dropWhileEnd p = foldr (\x xs -> if p x && null xs then [] else x : xs) []
-
exportSnippet :: PandocMonad m => OrgParser m (F Inlines)
exportSnippet = try $ do
string "@@"
- format <- many1Till (alphaNum <|> char '-') (char ':')
- snippet <- manyTill anyChar (try $ string "@@")
+ format <- many1TillChar (alphaNum <|> char '-') (char ':')
+ snippet <- manyTillChar anyChar (try $ string "@@")
returnF $ B.rawInline format snippet
macro :: PandocMonad m => OrgParser m (F Inlines)
@@ -845,7 +843,7 @@ macro = try $ do
recursionDepth <- orgStateMacroDepth <$> getState
guard $ recursionDepth < 15
string "{{{"
- name <- many alphaNum
+ name <- manyChar alphaNum
args <- ([] <$ string "}}}")
<|> char '(' *> argument `sepBy` char ',' <* eoa
expander <- lookupMacro name <$> getState
@@ -857,7 +855,7 @@ macro = try $ do
updateState $ \s -> s { orgStateMacroDepth = recursionDepth }
return res
where
- argument = many $ notFollowedBy eoa *> noneOf ","
+ argument = manyChar $ notFollowedBy eoa *> noneOf ","
eoa = string ")}}}"
smart :: PandocMonad m => OrgParser m (F Inlines)
diff --git a/src/Text/Pandoc/Readers/Org/Meta.hs b/src/Text/Pandoc/Readers/Org/Meta.hs
index 0a388403e..811a5b974 100644
--- a/src/Text/Pandoc/Readers/Org/Meta.hs
+++ b/src/Text/Pandoc/Readers/Org/Meta.hs
@@ -1,6 +1,7 @@
{-# LANGUAGE NoImplicitPrelude #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.Meta
Copyright : Copyright (C) 2014-2019 Albert Krewinkel
@@ -30,11 +31,12 @@ import Text.Pandoc.Definition
import Text.Pandoc.Shared (safeRead)
import Control.Monad (mzero, void, when)
-import Data.Char (toLower)
import Data.List (intersperse)
import Data.Maybe (fromMaybe)
import qualified Data.Map as M
import qualified Data.Set as Set
+import Data.Text (Text)
+import qualified Data.Text as T
import Network.HTTP (urlEncode)
-- | Returns the current meta, respecting export options.
@@ -47,7 +49,7 @@ metaExport = do
. (if exportWithEmail settings then id else removeMeta "email")
<$> orgStateMeta st
-removeMeta :: String -> Meta -> Meta
+removeMeta :: Text -> Meta -> Meta
removeMeta key meta' =
let metaMap = unMeta meta'
in Meta $ M.delete key metaMap
@@ -60,18 +62,18 @@ metaLine = mempty <$ metaLineStart <* (optionLine <|> declarationLine)
declarationLine :: PandocMonad m => OrgParser m ()
declarationLine = try $ do
- key <- map toLower <$> metaKey
+ key <- T.toLower <$> metaKey
(key', value) <- metaValue key
let addMetaValue st =
st { orgStateMeta = B.setMeta key' <$> value <*> orgStateMeta st }
when (key' /= "results") $ updateState addMetaValue
-metaKey :: Monad m => OrgParser m String
-metaKey = map toLower <$> many1 (noneOf ": \n\r")
- <* char ':'
- <* skipSpaces
+metaKey :: Monad m => OrgParser m Text
+metaKey = T.toLower <$> many1Char (noneOf ": \n\r")
+ <* char ':'
+ <* skipSpaces
-metaValue :: PandocMonad m => String -> OrgParser m (String, F MetaValue)
+metaValue :: PandocMonad m => Text -> OrgParser m (Text, F MetaValue)
metaValue key =
let inclKey = "header-includes"
in case key of
@@ -88,7 +90,7 @@ metaValue key =
-- Org-mode expects class options to contain the surrounding brackets,
-- pandoc does not.
"latex_class_options" -> ("classoption",) <$>
- metaModifiedString (filter (`notElem` "[]"))
+ metaModifiedString (T.filter (`notElem` ("[]" :: String)))
"html_head" -> (inclKey,) <$>
accumulatingList inclKey (metaExportSnippet "html")
_ -> (key,) <$> metaString
@@ -98,25 +100,25 @@ metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline
metaInlinesCommaSeparated :: PandocMonad m => OrgParser m (F MetaValue)
metaInlinesCommaSeparated = do
- itemStrs <- many1 (noneOf ",\n") `sepBy1` char ','
+ itemStrs <- many1Char (noneOf ",\n") `sepBy1` char ','
newline
- items <- mapM (parseFromString inlinesTillNewline . (++ "\n")) itemStrs
+ items <- mapM (parseFromString inlinesTillNewline . (<> "\n")) itemStrs
let toMetaInlines = MetaInlines . B.toList
return $ MetaList . map toMetaInlines <$> sequence items
metaString :: Monad m => OrgParser m (F MetaValue)
metaString = metaModifiedString id
-metaModifiedString :: Monad m => (String -> String) -> OrgParser m (F MetaValue)
+metaModifiedString :: Monad m => (Text -> Text) -> OrgParser m (F MetaValue)
metaModifiedString f = return . MetaString . f <$> anyLine
-- | Read an format specific meta definition
-metaExportSnippet :: Monad m => String -> OrgParser m (F MetaValue)
+metaExportSnippet :: Monad m => Text -> OrgParser m (F MetaValue)
metaExportSnippet format =
return . MetaInlines . B.toList . B.rawInline format <$> anyLine
-- | Accumulate the result of the @parser@ in a list under @key@.
-accumulatingList :: Monad m => String
+accumulatingList :: Monad m => Text
-> OrgParser m (F MetaValue)
-> OrgParser m (F MetaValue)
accumulatingList key p = do
@@ -147,33 +149,33 @@ optionLine = try $ do
"pandoc-emphasis-post" -> emphChars >>= updateState . setEmphasisPostChar
_ -> mzero
-addLinkFormat :: Monad m => String
- -> (String -> String)
+addLinkFormat :: Monad m => Text
+ -> (Text -> Text)
-> OrgParser m ()
addLinkFormat key formatter = updateState $ \s ->
let fs = orgStateLinkFormatters s
in s{ orgStateLinkFormatters = M.insert key formatter fs }
-parseLinkFormat :: Monad m => OrgParser m (String, String -> String)
+parseLinkFormat :: Monad m => OrgParser m (Text, Text -> Text)
parseLinkFormat = try $ do
- linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces
+ linkType <- T.cons <$> letter <*> manyChar (alphaNum <|> oneOf "-_") <* skipSpaces
linkSubst <- parseFormat
return (linkType, linkSubst)
-- | An ad-hoc, single-argument-only implementation of a printf-style format
-- parser.
-parseFormat :: Monad m => OrgParser m (String -> String)
+parseFormat :: Monad m => OrgParser m (Text -> Text)
parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend
where
-- inefficient, but who cares
- replacePlain = try $ (\x -> concat . flip intersperse x)
+ replacePlain = try $ (\x -> T.concat . flip intersperse x)
<$> sequence [tillSpecifier 's', rest]
- replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode)
+ replaceUrl = try $ (\x -> T.concat . flip intersperse x . T.pack . urlEncode . T.unpack)
<$> sequence [tillSpecifier 'h', rest]
- justAppend = try $ (++) <$> rest
+ justAppend = try $ (<>) <$> rest
- rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r")
- tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))
+ rest = manyTillChar anyChar (eof <|> () <$ oneOf "\n\r")
+ tillSpecifier c = manyTillChar (noneOf "\n\r") (try $ string ('%':c:""))
tagList :: Monad m => OrgParser m [Tag]
tagList = do
@@ -231,41 +233,41 @@ todoSequence = try $ do
(x:xs) -> return $ keywordsToSequence (reverse xs) [x]
where
- todoKeywords :: Monad m => OrgParser m [String]
+ todoKeywords :: Monad m => OrgParser m [Text]
todoKeywords = try $
- let keyword = many1 nonspaceChar <* skipSpaces
+ let keyword = many1Char nonspaceChar <* skipSpaces
endOfKeywords = todoDoneSep <|> void newline
in manyTill keyword (lookAhead endOfKeywords)
todoDoneSep :: Monad m => OrgParser m ()
todoDoneSep = void . try $ skipSpaces *> char '|' <* skipSpaces1
- keywordsToSequence :: [String] -> [String] -> TodoSequence
+ keywordsToSequence :: [Text] -> [Text] -> TodoSequence
keywordsToSequence todo done =
let todoMarkers = map (TodoMarker Todo) todo
doneMarkers = map (TodoMarker Done) done
in todoMarkers ++ doneMarkers
-macroDefinition :: Monad m => OrgParser m (String, [String] -> String)
+macroDefinition :: Monad m => OrgParser m (Text, [Text] -> Text)
macroDefinition = try $ do
- macroName <- many1 nonspaceChar <* skipSpaces
+ macroName <- many1Char nonspaceChar <* skipSpaces
firstPart <- expansionPart
(elemOrder, parts) <- unzip <$> many ((,) <$> placeholder <*> expansionPart)
let expander = mconcat . alternate (firstPart:parts) . reorder elemOrder
return (macroName, expander)
where
placeholder :: Monad m => OrgParser m Int
- placeholder = try . fmap (fromMaybe 1 . safeRead) $ char '$' *> many1 digit
+ placeholder = try . fmap (fromMaybe 1 . safeRead) $ char '$' *> many1Char digit
- expansionPart :: Monad m => OrgParser m String
- expansionPart = try $ many (notFollowedBy placeholder *> noneOf "\n\r")
+ expansionPart :: Monad m => OrgParser m Text
+ expansionPart = try $ manyChar (notFollowedBy placeholder *> noneOf "\n\r")
alternate :: [a] -> [a] -> [a]
alternate [] ys = ys
alternate xs [] = xs
alternate (x:xs) (y:ys) = x : y : alternate xs ys
- reorder :: [Int] -> [String] -> [String]
+ reorder :: [Int] -> [Text] -> [Text]
reorder perm xs =
let element n = take 1 $ drop (n - 1) xs
in concatMap element perm
diff --git a/src/Text/Pandoc/Readers/Org/ParserState.hs b/src/Text/Pandoc/Readers/Org/ParserState.hs
index d6dde8b22..cf5583b76 100644
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@@ -1,6 +1,7 @@
-{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Org.ParserState
Copyright : Copyright (C) 2014-2019 Albert Krewinkel
@@ -63,16 +64,16 @@ import Text.Pandoc.Readers.LaTeX.Types (Macro)
type F = Future OrgParserState
-- | An inline note / footnote containing the note key and its (inline) value.
-type OrgNoteRecord = (String, F Blocks)
+type OrgNoteRecord = (Text, F Blocks)
-- | Table of footnotes
type OrgNoteTable = [OrgNoteRecord]
-- | Map of functions for link transformations. The map key is refers to the
-- link-type, the corresponding function transforms the given link string.
-type OrgLinkFormatters = M.Map String (String -> String)
+type OrgLinkFormatters = M.Map Text (Text -> Text)
-- | Macro expander function
-type MacroExpander = [String] -> String
+type MacroExpander = [Text] -> Text
-- | Tag
-newtype Tag = Tag { fromTag :: String }
+newtype Tag = Tag { fromTag :: Text }
deriving (Show, Eq, Ord)
-- | The states in which a todo item can be
@@ -82,7 +83,7 @@ data TodoState = Todo | Done
-- | A ToDo keyword like @TODO@ or @DONE@.
data TodoMarker = TodoMarker
{ todoMarkerState :: TodoState
- , todoMarkerName :: String
+ , todoMarkerName :: Text
}
deriving (Show, Eq)
@@ -91,7 +92,7 @@ type TodoSequence = [TodoMarker]
-- | Org-mode parser state
data OrgParserState = OrgParserState
- { orgStateAnchorIds :: [String]
+ { orgStateAnchorIds :: [Text]
, orgStateEmphasisCharStack :: [Char]
, orgStateEmphasisPreChars :: [Char] -- ^ Chars allowed to occur before
-- emphasis; spaces and newlines are
@@ -102,13 +103,13 @@ data OrgParserState = OrgParserState
, orgStateExcludeTags :: Set.Set Tag
, orgStateExcludeTagsChanged :: Bool
, orgStateExportSettings :: ExportSettings
- , orgStateIdentifiers :: Set.Set String
- , orgStateIncludeFiles :: [String]
+ , orgStateIdentifiers :: Set.Set Text
+ , orgStateIncludeFiles :: [Text]
, orgStateLastForbiddenCharPos :: Maybe SourcePos
, orgStateLastPreCharPos :: Maybe SourcePos
, orgStateLastStrPos :: Maybe SourcePos
, orgStateLinkFormatters :: OrgLinkFormatters
- , orgStateMacros :: M.Map String MacroExpander
+ , orgStateMacros :: M.Map Text MacroExpander
, orgStateMacroDepth :: Int
, orgStateMeta :: F Meta
, orgStateNotes' :: OrgNoteTable
@@ -212,10 +213,10 @@ activeTodoSequences st =
activeTodoMarkers :: OrgParserState -> TodoSequence
activeTodoMarkers = concat . activeTodoSequences
-lookupMacro :: String -> OrgParserState -> Maybe MacroExpander
+lookupMacro :: Text -> OrgParserState -> Maybe MacroExpander
lookupMacro macroName = M.lookup macroName . orgStateMacros
-registerMacro :: (String, MacroExpander) -> OrgParserState -> OrgParserState
+registerMacro :: (Text, MacroExpander) -> OrgParserState -> OrgParserState
registerMacro (name, expander) st =
let curMacros = orgStateMacros st
in st{ orgStateMacros = M.insert name expander curMacros }
@@ -236,7 +237,7 @@ data ArchivedTreesOption =
-- These settings can be changed via OPTIONS statements.
data ExportSettings = ExportSettings
{ exportArchivedTrees :: ArchivedTreesOption -- ^ How to treat archived trees
- , exportDrawers :: Either [String] [String]
+ , exportDrawers :: Either [Text] [Text]
-- ^ Specify drawer names which should be exported. @Left@ names are
-- explicitly excluded from the resulting output while @Right@ means that
-- only the listed drawer names should be included.
diff --git a/src/Text/Pandoc/Readers/Org/Parsing.hs b/src/Text/Pandoc/Readers/Org/Parsing.hs
index 24aa0779d..718925120 100644
--- a/src/Text/Pandoc/Readers/Org/Parsing.hs
+++ b/src/Text/Pandoc/Readers/Org/Parsing.hs
@@ -32,7 +32,13 @@ module Text.Pandoc.Readers.Org.Parsing
, orgTagWordChar
-- * Re-exports from Text.Pandoc.Parser
, ParserContext (..)
+ , textStr
+ , countChar
+ , manyChar
+ , many1Char
+ , manyTillChar
, many1Till
+ , many1TillChar
, notFollowedBy'
, spaceChar
, nonspaceChar
@@ -98,6 +104,7 @@ module Text.Pandoc.Readers.Org.Parsing
) where
import Prelude
+import Data.Text (Text)
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Parsing hiding (F, anyLine, blanklines, newline,
@@ -108,14 +115,14 @@ import Control.Monad (guard)
import Control.Monad.Reader (ReaderT)
-- | The parser used to read org files.
-type OrgParser m = ParserT [Char] OrgParserState (ReaderT OrgParserLocal m)
+type OrgParser m = ParserT Text OrgParserState (ReaderT OrgParserLocal m)
--
-- Adaptions and specializations of parsing utilities
--
-- | Parse any line of text
-anyLine :: Monad m => OrgParser m String
+anyLine :: Monad m => OrgParser m Text
anyLine =
P.anyLine
<* updateLastPreCharPos
@@ -123,7 +130,7 @@ anyLine =
-- | Like @'Text.Pandoc.Parsing'@, but resets the position of the last character
-- allowed before emphasised text.
-parseFromString :: Monad m => OrgParser m a -> String -> OrgParser m a
+parseFromString :: Monad m => OrgParser m a -> Text -> OrgParser m a
parseFromString parser str' = do
updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
result <- P.parseFromString parser str'
@@ -142,7 +149,7 @@ newline =
<* updateLastForbiddenCharPos
-- | Like @Text.Parsec.Char.blanklines@, but causes additional state changes.
-blanklines :: Monad m => OrgParser m [Char]
+blanklines :: Monad m => OrgParser m Text
blanklines =
P.blanklines
<* updateLastPreCharPos
@@ -192,21 +199,21 @@ updateLastPreCharPos = getPosition >>= \p ->
--
-- | Read the key of a plist style key-value list.
-orgArgKey :: Monad m => OrgParser m String
+orgArgKey :: Monad m => OrgParser m Text
orgArgKey = try $
skipSpaces *> char ':'
- *> many1 orgArgWordChar
+ *> many1Char orgArgWordChar
-- | Read the value of a plist style key-value list.
-orgArgWord :: Monad m => OrgParser m String
-orgArgWord = many1 orgArgWordChar
+orgArgWord :: Monad m => OrgParser m Text
+orgArgWord = many1Char orgArgWordChar
-- | Chars treated as part of a word in plists.
orgArgWordChar :: Monad m => OrgParser m Char
orgArgWordChar = alphaNum <|> oneOf "-_"
-orgTagWord :: Monad m => OrgParser m String
-orgTagWord = many1 orgTagWordChar
+orgTagWord :: Monad m => OrgParser m Text
+orgTagWord = many1Char orgTagWordChar
orgTagWordChar :: Monad m => OrgParser m Char
orgTagWordChar = alphaNum <|> oneOf "@%#_"
diff --git a/src/Text/Pandoc/Readers/Org/Shared.hs b/src/Text/Pandoc/Readers/Org/Shared.hs
index 34f958373..be0a2068e 100644
--- a/src/Text/Pandoc/Readers/Org/Shared.hs
+++ b/src/Text/Pandoc/Readers/Org/Shared.hs
@@ -10,7 +10,7 @@
Utility functions used in other Pandoc Org modules.
-}
module Text.Pandoc.Readers.Org.Shared
- ( cleanLinkString
+ ( cleanLinkText
, isImageFilename
, originalLang
, translateLang
@@ -19,44 +19,44 @@ module Text.Pandoc.Readers.Org.Shared
import Prelude
import Data.Char (isAlphaNum)
-import Data.List (isPrefixOf)
+import Data.Text (Text)
+import qualified Data.Text as T
import System.FilePath (isValid, takeExtension)
-
+import Text.Pandoc.Shared (elemText)
-- | Check whether the given string looks like the path to of URL of an image.
-isImageFilename :: String -> Bool
-isImageFilename fp = hasImageExtension && (isValid fp || isKnownProtocolUri)
+isImageFilename :: Text -> Bool
+isImageFilename fp = hasImageExtension && (isValid (T.unpack fp) || isKnownProtocolUri)
where
- hasImageExtension = takeExtension fp `elem` imageExtensions
- isKnownProtocolUri = any (\x -> (x ++ "://") `isPrefixOf` fp) protocols
+ hasImageExtension = takeExtension (T.unpack fp) `elem` imageExtensions
+ isKnownProtocolUri = any (\x -> (x <> "://") `T.isPrefixOf` fp) protocols
imageExtensions = [ ".jpeg", ".jpg", ".png", ".gif", ".svg" ]
protocols = [ "file", "http", "https" ]
-- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if
-- the string does not appear to be a link.
-cleanLinkString :: String -> Maybe String
-cleanLinkString s =
- case s of
- '/':_ -> Just $ "file://" ++ s -- absolute path
- '.':'/':_ -> Just s -- relative path
- '.':'.':'/':_ -> Just s -- relative path
- -- Relative path or URL (file schema)
- 'f':'i':'l':'e':':':s' -> Just $ if "//" `isPrefixOf` s' then s else s'
- _ -> if isUrl s then Just s else Nothing
- where
- isUrl :: String -> Bool
- isUrl cs =
- let (scheme, path) = break (== ':') cs
- in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
- && not (null path)
+cleanLinkText :: Text -> Maybe Text
+cleanLinkText s
+ | Just _ <- T.stripPrefix "/" s = Just $ "file://" <> s -- absolute path
+ | Just _ <- T.stripPrefix "./" s = Just s -- relative path
+ | Just _ <- T.stripPrefix "../" s = Just s -- relative path
+ -- Relative path or URL (file schema)
+ | Just s' <- T.stripPrefix "file:" s = Just $ if "//" `T.isPrefixOf` s' then s else s'
+ | otherwise = if isUrl s then Just s else Nothing
+ where
+ isUrl :: Text -> Bool
+ isUrl cs =
+ let (scheme, path) = T.break (== ':') cs
+ in T.all (\c -> isAlphaNum c || c `elemText` ".-") scheme
+ && not (T.null path)
-- | Creates an key-value pair marking the original language name specified for
-- a piece of source code.
-- | Creates an key-value attributes marking the original language name
-- specified for a piece of source code.
-originalLang :: String -> [(String, String)]
+originalLang :: Text -> [(Text, Text)]
originalLang lang =
let transLang = translateLang lang
in if transLang == lang
@@ -66,7 +66,7 @@ originalLang lang =
-- | Translate from Org-mode's programming language identifiers to those used
-- by Pandoc. This is useful to allow for proper syntax highlighting in
-- Pandoc output.
-translateLang :: String -> String
+translateLang :: Text -> Text
translateLang cs =
case cs of
"C" -> "c"
@@ -79,5 +79,5 @@ translateLang cs =
"sqlite" -> "sql"
_ -> cs
-exportsCode :: [(String, String)] -> Bool
+exportsCode :: [(Text, Text)] -> Bool
exportsCode = maybe True (`elem` ["code", "both"]) . lookup "exports"